FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -1,29 +0,0 @@
1
- import importlib
2
-
3
- has_orjson = importlib.util.find_spec("orjson") is not None
4
- has_polars = importlib.util.find_spec("polars") is not None
5
-
6
- if has_orjson and has_polars:
7
- from .ext import AbstractFileSystem
8
- else:
9
- from fsspec import AbstractFileSystem
10
-
11
- from .base import DirFileSystem, get_filesystem # noqa: E402
12
- from .storage_options import AwsStorageOptions # noqa: E402
13
- from .storage_options import AzureStorageOptions # noqa: E402
14
- from .storage_options import GcsStorageOptions # noqa: E402
15
- from .storage_options import (BaseStorageOptions, GitHubStorageOptions,
16
- GitLabStorageOptions, StorageOptions)
17
-
18
- __all__ = [
19
- "get_filesystem",
20
- "DirFileSystem",
21
- "AbstractFileSystem",
22
- "StorageOptions",
23
- "AwsStorageOptions",
24
- "AzureStorageOptions",
25
- "GcsStorageOptions",
26
- "GitHubStorageOptions",
27
- "GitLabStorageOptions",
28
- "BaseStorageOptions",
29
- ]
flowerpower/fs/base.py DELETED
@@ -1,662 +0,0 @@
1
- import base64
2
- import inspect
3
- import os
4
- import posixpath
5
- import urllib
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- import fsspec
10
- import requests
11
- from fsspec import filesystem
12
- from fsspec.implementations.cache_mapper import AbstractCacheMapper
13
- from fsspec.implementations.cached import SimpleCacheFileSystem
14
- from fsspec.implementations.dirfs import DirFileSystem
15
- from fsspec.implementations.memory import MemoryFile
16
- from fsspec.utils import infer_storage_options
17
- from loguru import logger
18
-
19
- from ..utils.logging import setup_logging
20
- from . import has_orjson, has_polars
21
-
22
- if has_orjson and has_polars:
23
- from .ext import AbstractFileSystem
24
- else:
25
- from fsspec import AbstractFileSystem
26
-
27
- from .storage_options import BaseStorageOptions
28
- from .storage_options import from_dict as storage_options_from_dict
29
-
30
- setup_logging()
31
-
32
-
33
- class FileNameCacheMapper(AbstractCacheMapper):
34
- """Maps remote file paths to local cache paths while preserving directory structure.
35
-
36
- This cache mapper maintains the original file path structure in the cache directory,
37
- creating necessary subdirectories as needed.
38
-
39
- Attributes:
40
- directory (str): Base directory for cached files
41
-
42
- Example:
43
- >>> # Create cache mapper for S3 files
44
- >>> mapper = FileNameCacheMapper("/tmp/cache")
45
- >>>
46
- >>> # Map remote path to cache path
47
- >>> cache_path = mapper("bucket/data/file.csv")
48
- >>> print(cache_path) # Preserves structure
49
- 'bucket/data/file.csv'
50
- """
51
-
52
- def __init__(self, directory: str):
53
- """Initialize cache mapper with base directory.
54
-
55
- Args:
56
- directory: Base directory where cached files will be stored
57
- """
58
- self.directory = directory
59
-
60
- def __call__(self, path: str) -> str:
61
- """Map remote file path to cache file path.
62
-
63
- Creates necessary subdirectories in the cache directory to maintain
64
- the original path structure.
65
-
66
- Args:
67
- path: Original file path from remote filesystem
68
-
69
- Returns:
70
- str: Cache file path that preserves original structure
71
-
72
- Example:
73
- >>> mapper = FileNameCacheMapper("/tmp/cache")
74
- >>> # Maps maintain directory structure
75
- >>> print(mapper("data/nested/file.txt"))
76
- 'data/nested/file.txt'
77
- """
78
- os.makedirs(
79
- posixpath.dirname(posixpath.join(self.directory, path)), exist_ok=True
80
- )
81
- return path
82
-
83
-
84
- class MonitoredSimpleCacheFileSystem(SimpleCacheFileSystem):
85
- """Enhanced caching filesystem with monitoring and improved path handling.
86
-
87
- This filesystem extends SimpleCacheFileSystem to provide:
88
- - Verbose logging of cache operations
89
- - Improved path mapping for cache files
90
- - Enhanced synchronization capabilities
91
- - Better handling of parallel operations
92
-
93
- Attributes:
94
- _verbose (bool): Whether to print verbose cache operations
95
- _mapper (FileNameCacheMapper): Maps remote paths to cache paths
96
- storage (list[str]): List of cache storage locations
97
- fs (AbstractFileSystem): Underlying filesystem being cached
98
-
99
- Example:
100
- >>> from fsspec import filesystem
101
- >>> # Create monitored cache for S3
102
- >>> s3 = filesystem("s3", key="ACCESS_KEY", secret="SECRET_KEY")
103
- >>> cached_fs = MonitoredSimpleCacheFileSystem(
104
- ... fs=s3,
105
- ... cache_storage="/tmp/s3_cache",
106
- ... verbose=True
107
- ... )
108
- >>>
109
- >>> # Read file (downloads and caches)
110
- >>> with cached_fs.open("bucket/data.csv") as f:
111
- ... data = f.read()
112
- Downloading s3://bucket/data.csv
113
- >>>
114
- >>> # Second read uses cache
115
- >>> with cached_fs.open("bucket/data.csv") as f:
116
- ... data = f.read() # No download message
117
- """
118
-
119
- def __init__(self, **kwargs: Any):
120
- """Initialize monitored cache filesystem.
121
-
122
- Args:
123
- **kwargs: Configuration options including:
124
- fs (AbstractFileSystem): Filesystem to cache
125
- cache_storage (str): Cache directory path
126
- verbose (bool): Enable verbose logging
127
- And any other SimpleCacheFileSystem options
128
-
129
- Example:
130
- >>> # Cache with custom settings
131
- >>> cached_fs = MonitoredSimpleCacheFileSystem(
132
- ... fs=remote_fs,
133
- ... cache_storage="/tmp/cache",
134
- ... verbose=True,
135
- ... same_names=True # Use original filenames
136
- ... )
137
- """
138
- self._verbose = kwargs.get("verbose", False)
139
- super().__init__(**kwargs)
140
- self._mapper = FileNameCacheMapper(kwargs.get("cache_storage"))
141
-
142
- def _check_file(self, path: str) -> str | None:
143
- """Check if file exists in cache and download if needed.
144
-
145
- Args:
146
- path: Path to file in the remote filesystem
147
-
148
- Returns:
149
- str | None: Path to cached file if found/downloaded, None otherwise
150
-
151
- Example:
152
- >>> fs = MonitoredSimpleCacheFileSystem(
153
- ... fs=remote_fs,
154
- ... cache_storage="/tmp/cache"
155
- ... )
156
- >>> cached_path = fs._check_file("data.csv")
157
- >>> print(cached_path)
158
- '/tmp/cache/data.csv'
159
- """
160
- self._check_cache()
161
- cache_path = self._mapper(path)
162
- for storage in self.storage:
163
- fn = posixpath.join(storage, cache_path)
164
- if posixpath.exists(fn):
165
- return fn
166
- if self._verbose:
167
- logger.info(f"Downloading {self.protocol[0]}://{path}")
168
-
169
- def size(self, path: str) -> int:
170
- """Get size of file in bytes.
171
-
172
- Checks cache first, falls back to remote filesystem.
173
-
174
- Args:
175
- path: Path to file
176
-
177
- Returns:
178
- int: Size of file in bytes
179
-
180
- Example:
181
- >>> fs = MonitoredSimpleCacheFileSystem(
182
- ... fs=remote_fs,
183
- ... cache_storage="/tmp/cache"
184
- ... )
185
- >>> size = fs.size("large_file.dat")
186
- >>> print(f"File size: {size} bytes")
187
- """
188
- cached_file = self._check_file(self._strip_protocol(path))
189
- if cached_file is None:
190
- return self.fs.size(path)
191
- else:
192
- return posixpath.getsize(cached_file)
193
-
194
- def sync_cache(self, reload: bool = False) -> None:
195
- """Synchronize cache with remote filesystem.
196
-
197
- Downloads all files in remote path to cache if not present.
198
-
199
- Args:
200
- reload: Whether to force reload all files, ignoring existing cache
201
-
202
- Example:
203
- >>> fs = MonitoredSimpleCacheFileSystem(
204
- ... fs=remote_fs,
205
- ... cache_storage="/tmp/cache"
206
- ... )
207
- >>> # Initial sync
208
- >>> fs.sync_cache()
209
- >>>
210
- >>> # Force reload all files
211
- >>> fs.sync_cache(reload=True)
212
- """
213
- if reload:
214
- self.clear_cache()
215
- content = self.glob("**/*")
216
- [self.open(f).close() for f in content if self.isfile(f)]
217
-
218
- def __getattribute__(self, item):
219
- if item in {
220
- # new items
221
- "size",
222
- "glob",
223
- "sync_cache",
224
- # previous
225
- "load_cache",
226
- "_open",
227
- "save_cache",
228
- "close_and_update",
229
- "__init__",
230
- "__getattribute__",
231
- "__reduce__",
232
- "_make_local_details",
233
- "open",
234
- "cat",
235
- "cat_file",
236
- "cat_ranges",
237
- "get",
238
- "read_block",
239
- "tail",
240
- "head",
241
- "info",
242
- "ls",
243
- "exists",
244
- "isfile",
245
- "isdir",
246
- "_check_file",
247
- "_check_cache",
248
- "_mkcache",
249
- "clear_cache",
250
- "clear_expired_cache",
251
- "pop_from_cache",
252
- "local_file",
253
- "_paths_from_path",
254
- "get_mapper",
255
- "open_many",
256
- "commit_many",
257
- "hash_name",
258
- "__hash__",
259
- "__eq__",
260
- "to_json",
261
- "to_dict",
262
- "cache_size",
263
- "pipe_file",
264
- "pipe",
265
- "start_transaction",
266
- "end_transaction",
267
- }:
268
- # all the methods defined in this class. Note `open` here, since
269
- # it calls `_open`, but is actually in superclass
270
- return lambda *args, **kw: getattr(type(self), item).__get__(self)(
271
- *args, **kw
272
- )
273
- if item in ["__reduce_ex__"]:
274
- raise AttributeError
275
- if item in ["transaction"]:
276
- # property
277
- return type(self).transaction.__get__(self)
278
- if item in ["_cache", "transaction_type"]:
279
- # class attributes
280
- return getattr(type(self), item)
281
- if item == "__class__":
282
- return type(self)
283
- d = object.__getattribute__(self, "__dict__")
284
- fs = d.get("fs", None) # fs is not immediately defined
285
- if item in d:
286
- return d[item]
287
- elif fs is not None:
288
- if item in fs.__dict__:
289
- # attribute of instance
290
- return fs.__dict__[item]
291
- # attributed belonging to the target filesystem
292
- cls = type(fs)
293
- m = getattr(cls, item)
294
- if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
295
- not hasattr(m, "__self__") or m.__self__ is None
296
- ):
297
- # instance method
298
- return m.__get__(fs, cls)
299
- return m # class method or attribute
300
- else:
301
- # attributes of the superclass, while target is being set up
302
- return super().__getattribute__(item)
303
-
304
-
305
- class GitLabFileSystem(AbstractFileSystem):
306
- """FSSpec-compatible filesystem interface for GitLab repositories.
307
-
308
- Provides access to files in GitLab repositories through the GitLab API,
309
- supporting read operations with authentication.
310
-
311
- Attributes:
312
- project_name (str): Name of the GitLab project
313
- project_id (str): ID of the GitLab project
314
- access_token (str): GitLab personal access token
315
- branch (str): Git branch to read from
316
- base_url (str): GitLab instance URL
317
-
318
- Example:
319
- >>> # Access public project
320
- >>> fs = GitLabFileSystem(
321
- ... project_name="my-project",
322
- ... access_token="glpat-xxxx"
323
- ... )
324
- >>>
325
- >>> # Read file contents
326
- >>> with fs.open("path/to/file.txt") as f:
327
- ... content = f.read()
328
- >>>
329
- >>> # List directory
330
- >>> files = fs.ls("path/to/dir")
331
- >>>
332
- >>> # Access enterprise GitLab
333
- >>> fs = GitLabFileSystem(
334
- ... project_id="12345",
335
- ... access_token="glpat-xxxx",
336
- ... base_url="https://gitlab.company.com",
337
- ... branch="develop"
338
- ... )
339
- """
340
-
341
- def __init__(
342
- self,
343
- project_name: str | None = None,
344
- project_id: str | None = None,
345
- access_token: str | None = None,
346
- branch: str = "main",
347
- base_url: str = "https://gitlab.com",
348
- **kwargs,
349
- ):
350
- """Initialize GitLab filesystem.
351
-
352
- Args:
353
- project_name: Name of the GitLab project. Required if project_id not provided.
354
- project_id: ID of the GitLab project. Required if project_name not provided.
355
- access_token: GitLab personal access token for authentication.
356
- Required for private repositories.
357
- branch: Git branch to read from. Defaults to "main".
358
- base_url: GitLab instance URL. Defaults to "https://gitlab.com".
359
- **kwargs: Additional arguments passed to AbstractFileSystem.
360
-
361
- Raises:
362
- ValueError: If neither project_name nor project_id is provided
363
- requests.RequestException: If GitLab API request fails
364
- """
365
- super().__init__(**kwargs)
366
- self.project_name = project_name
367
- self.project_id = project_id
368
- self.access_token = access_token
369
- self.branch = branch
370
- self.base_url = base_url.rstrip("/")
371
- self._validate_init()
372
- if not self.project_id:
373
- self.project_id = self._get_project_id()
374
-
375
- def _validate_init(self) -> None:
376
- """Validate initialization parameters.
377
-
378
- Ensures that either project_id or project_name is provided.
379
-
380
- Raises:
381
- ValueError: If neither project_id nor project_name is provided
382
- """
383
- if not self.project_id and not self.project_name:
384
- raise ValueError("Either 'project_id' or 'project_name' must be provided")
385
-
386
- def _get_project_id(self) -> str:
387
- """Retrieve project ID from GitLab API using project name.
388
-
389
- Makes an API request to search for projects and find the matching project ID.
390
-
391
- Returns:
392
- str: The GitLab project ID
393
-
394
- Raises:
395
- ValueError: If project not found
396
- requests.RequestException: If API request fails
397
- """
398
- url = f"{self.base_url}/api/v4/projects"
399
- headers = {"PRIVATE-TOKEN": self.access_token}
400
- params = {"search": self.project_name}
401
- response = requests.get(url, headers=headers, params=params)
402
-
403
- if response.status_code == 200:
404
- projects = response.json()
405
- for project in projects:
406
- if project["name"] == self.project_name:
407
- return project["id"]
408
- raise ValueError(f"Project '{self.project_name}' not found")
409
- else:
410
- response.raise_for_status()
411
-
412
- def _open(self, path: str, mode: str = "rb", **kwargs) -> MemoryFile:
413
- """Open a file from GitLab repository.
414
-
415
- Retrieves file content from GitLab API and returns it as a memory file.
416
-
417
- Args:
418
- path: Path to file within repository
419
- mode: File open mode. Only "rb" (read binary) is supported.
420
- **kwargs: Additional arguments (unused)
421
-
422
- Returns:
423
- MemoryFile: File-like object containing file content
424
-
425
- Raises:
426
- NotImplementedError: If mode is not "rb"
427
- requests.RequestException: If API request fails
428
-
429
- Example:
430
- >>> fs = GitLabFileSystem(project_id="12345", access_token="glpat-xxxx")
431
- >>> with fs.open("README.md") as f:
432
- ... content = f.read()
433
- ... print(content.decode())
434
- """
435
- if mode != "rb":
436
- raise NotImplementedError("Only read mode is supported")
437
-
438
- url = (
439
- f"{self.base_url}/api/v4/projects/{self.project_id}/repository/files/"
440
- f"{urllib.parse.quote_plus(path)}?ref={self.branch}"
441
- )
442
- headers = {"PRIVATE-TOKEN": self.access_token}
443
- response = requests.get(url, headers=headers)
444
-
445
- if response.status_code == 200:
446
- file_content = base64.b64decode(response.json()["content"])
447
- return MemoryFile(None, None, file_content)
448
- else:
449
- response.raise_for_status()
450
-
451
- def _ls(self, path: str, detail: bool = False, **kwargs) -> list[str] | list[dict]:
452
- """List contents of a directory in GitLab repository.
453
-
454
- Args:
455
- path: Directory path within repository
456
- detail: Whether to return detailed information about each entry.
457
- If True, returns list of dicts with file metadata.
458
- If False, returns list of filenames.
459
- **kwargs: Additional arguments (unused)
460
-
461
- Returns:
462
- list[str] | list[dict]: List of file/directory names or detailed info
463
-
464
- Raises:
465
- requests.RequestException: If API request fails
466
-
467
- Example:
468
- >>> fs = GitLabFileSystem(project_id="12345", access_token="glpat-xxxx")
469
- >>> # List filenames
470
- >>> files = fs.ls("docs")
471
- >>> print(files)
472
- ['README.md', 'API.md']
473
- >>>
474
- >>> # List with details
475
- >>> details = fs.ls("docs", detail=True)
476
- >>> for item in details:
477
- ... print(f"{item['name']}: {item['type']}")
478
- """
479
- url = f"{self.base_url}/api/v4/projects/{self.project_id}/repository/tree?path={path}&ref={self.branch}"
480
- headers = {"PRIVATE-TOKEN": self.access_token}
481
- response = requests.get(url, headers=headers)
482
-
483
- if response.status_code == 200:
484
- files = response.json()
485
- if detail:
486
- return files
487
- else:
488
- return [file["name"] for file in files]
489
- else:
490
- response.raise_for_status()
491
-
492
-
493
- try:
494
- fsspec.register_implementation("gitlab", GitLabFileSystem)
495
- except ValueError as e:
496
- _ = e
497
-
498
-
499
- # Original ls Methode speichern
500
- dirfs_ls_o = DirFileSystem.ls
501
- mscf_ls_o = MonitoredSimpleCacheFileSystem.ls
502
-
503
-
504
- # Neue ls Methode definieren
505
- def dir_ls_p(self, path, detail=False, **kwargs):
506
- return dirfs_ls_o(self, path, detail=detail, **kwargs)
507
-
508
-
509
- def mscf_ls_p(self, path, detail=False, **kwargs):
510
- return mscf_ls_o(self, path, detail=detail, **kwargs)
511
-
512
-
513
- # patchen
514
- DirFileSystem.ls = dir_ls_p
515
- MonitoredSimpleCacheFileSystem.ls = mscf_ls_p
516
-
517
-
518
- def get_filesystem(
519
- path: str | Path | None = None,
520
- storage_options: BaseStorageOptions | dict[str, str] | None = None,
521
- dirfs: bool = True,
522
- cached: bool = False,
523
- cache_storage: str | None = None,
524
- fs: AbstractFileSystem | None = None,
525
- **storage_options_kwargs,
526
- ) -> AbstractFileSystem:
527
- """Get a filesystem instance based on path or configuration.
528
-
529
- This function creates and configures a filesystem instance based on the provided path
530
- and options. It supports various filesystem types including local, S3, GCS, Azure,
531
- and Git-based filesystems.
532
-
533
- Args:
534
- path: URI or path to the filesystem location. Examples:
535
- - Local: "/path/to/data"
536
- - S3: "s3://bucket/path"
537
- - GCS: "gs://bucket/path"
538
- - Azure: "abfs://container/path"
539
- - GitHub: "github://org/repo/path"
540
- storage_options: Configuration options for the filesystem. Can be:
541
- - BaseStorageOptions object with protocol-specific settings
542
- - Dictionary of key-value pairs for authentication/configuration
543
- - None to use environment variables or default credentials
544
- dirfs: Whether to wrap filesystem in DirFileSystem for path-based operations.
545
- Set to False when you need direct protocol-specific features.
546
- cached: Whether to enable local caching of remote files.
547
- Useful for frequently accessed remote files.
548
- cache_storage: Directory path for cached files. Defaults to path-based location
549
- in current directory if not specified.
550
- fs: Existing filesystem instance to wrap with caching or dirfs.
551
- Use this to customize an existing filesystem instance.
552
- **storage_options_kwargs: Additional keyword arguments for storage options.
553
- Alternative to passing storage_options dictionary.
554
-
555
- Returns:
556
- AbstractFileSystem: Configured filesystem instance with requested features.
557
-
558
- Raises:
559
- ValueError: If storage protocol or options are invalid
560
- FSSpecError: If filesystem initialization fails
561
- ImportError: If required filesystem backend is not installed
562
-
563
- Example:
564
- >>> # Local filesystem
565
- >>> fs = get_filesystem("/path/to/data")
566
- >>>
567
- >>> # S3 with credentials
568
- >>> fs = get_filesystem(
569
- ... "s3://bucket/data",
570
- ... storage_options={
571
- ... "key": "ACCESS_KEY",
572
- ... "secret": "SECRET_KEY"
573
- ... }
574
- ... )
575
- >>>
576
- >>> # Cached GCS filesystem
577
- >>> fs = get_filesystem(
578
- ... "gs://bucket/data",
579
- ... storage_options=GcsStorageOptions(
580
- ... token="service_account.json"
581
- ... ),
582
- ... cached=True,
583
- ... cache_storage="/tmp/gcs_cache"
584
- ... )
585
- >>>
586
- >>> # Azure with environment credentials
587
- >>> fs = get_filesystem(
588
- ... "abfs://container/data",
589
- ... storage_options=AzureStorageOptions.from_env()
590
- ... )
591
- >>>
592
- >>> # Wrap existing filesystem
593
- >>> base_fs = filesystem("s3", key="ACCESS", secret="SECRET")
594
- >>> cached_fs = get_filesystem(
595
- ... fs=base_fs,
596
- ... cached=True
597
- ... )
598
- """
599
- if fs is not None:
600
- if dirfs:
601
- base_path = path.split("://")[-1]
602
- if fs.protocol == "dir":
603
- if base_path != fs.path:
604
- fs = DirFileSystem(
605
- path=posixpath.join(
606
- fs.path, base_path.replace(fs.path, "").lstrip("/")
607
- ),
608
- fs=fs.fs,
609
- )
610
- else:
611
- fs = DirFileSystem(path=base_path, fs=fs)
612
- if cached:
613
- if fs.is_cache_fs:
614
- return fs
615
- fs = MonitoredSimpleCacheFileSystem(fs=fs, cache_storage=cache_storage)
616
-
617
- return fs
618
-
619
- pp = infer_storage_options(str(path) if isinstance(path, Path) else path)
620
- protocol = (
621
- storage_options_kwargs.get("protocol", None)
622
- or (
623
- storage_options.get("protocol", None)
624
- if isinstance(storage_options, dict)
625
- else getattr(storage_options, "protocol", None)
626
- )
627
- or pp.get("protocol", "file")
628
- )
629
-
630
- if protocol == "file" or protocol == "local":
631
- fs = filesystem(protocol)
632
- fs.is_cache_fs = False
633
- if dirfs:
634
- fs = DirFileSystem(path=path, fs=fs)
635
- fs.is_cache_fs = False
636
- return fs
637
-
638
- host = pp.get("host", "")
639
- path = pp.get("path", "").lstrip("/")
640
- if len(host) and host not in path:
641
- path = posixpath.join(host, path)
642
- if "." in path:
643
- path = posixpath.dirname(path)
644
-
645
- if isinstance(storage_options, dict):
646
- storage_options = storage_options_from_dict(protocol, storage_options)
647
-
648
- if storage_options is None:
649
- storage_options = storage_options_from_dict(protocol, storage_options_kwargs)
650
-
651
- fs = storage_options.to_filesystem()
652
- fs.is_cache_fs = False
653
- if dirfs and len(path):
654
- fs = DirFileSystem(path=path, fs=fs)
655
- fs.is_cache_fs = False
656
- if cached:
657
- if cache_storage is None:
658
- cache_storage = (Path.cwd() / path).as_posix()
659
- fs = MonitoredSimpleCacheFileSystem(fs=fs, cache_storage=cache_storage)
660
- fs.is_cache_fs = True
661
-
662
- return fs