FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -37
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +32 -1
  14. flowerpower/cli/pipeline.py +559 -406
  15. flowerpower/cli/utils.py +29 -18
  16. flowerpower/flowerpower.py +12 -8
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +2 -142
  59. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  60. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  61. flowerpower/_web/__init__.py +0 -61
  62. flowerpower/_web/routes/config.py +0 -103
  63. flowerpower/_web/routes/pipelines.py +0 -173
  64. flowerpower/_web/routes/scheduler.py +0 -136
  65. flowerpower/cfg/pipeline/tracker.py +0 -14
  66. flowerpower/cfg/project/open_telemetry.py +0 -8
  67. flowerpower/cfg/project/tracker.py +0 -11
  68. flowerpower/cfg/project/worker.py +0 -19
  69. flowerpower/cli/scheduler.py +0 -309
  70. flowerpower/cli/web.py +0 -44
  71. flowerpower/event_handler.py +0 -23
  72. flowerpower/mqtt.py +0 -609
  73. flowerpower/pipeline.py +0 -2499
  74. flowerpower/scheduler.py +0 -680
  75. flowerpower/tui.py +0 -79
  76. flowerpower/utils/datastore.py +0 -186
  77. flowerpower/utils/eventbroker.py +0 -127
  78. flowerpower/utils/executor.py +0 -58
  79. flowerpower/utils/trigger.py +0 -140
  80. flowerpower-0.9.13.1.dist-info/METADATA +0 -586
  81. flowerpower-0.9.13.1.dist-info/RECORD +0 -76
  82. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  83. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
  84. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  85. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/cli/utils.py CHANGED
@@ -1,15 +1,20 @@
1
1
  import ast
2
+ import importlib
2
3
  import json
3
- from pathlib import Path
4
+ import posixpath
4
5
  import re
5
- import importlib
6
+ import sys
7
+ from pathlib import Path
6
8
  from typing import Callable
9
+
7
10
  from loguru import logger
8
- import sys
9
- import posixpath
10
11
 
11
12
  from flowerpower.pipeline import PipelineManager
12
13
 
14
+ from ..utils.logging import setup_logging
15
+
16
+ setup_logging()
17
+
13
18
 
14
19
  # Parse additional parameters
15
20
  def parse_param_dict(param_str: str | None) -> dict:
@@ -99,12 +104,13 @@ def parse_dict_or_list_param(
99
104
  logger.warning(f"Could not parse {param_type} parameter: {value}")
100
105
  return None
101
106
 
107
+
102
108
  def load_hook(
103
- pipeline_name: str,
104
- function_path: str,
105
- base_dir = None,
106
- storage_options: str | None = None,
107
- ) -> Callable:
109
+ pipeline_name: str,
110
+ function_path: str,
111
+ base_dir=None,
112
+ storage_options: str | None = None,
113
+ ) -> Callable:
108
114
  """
109
115
  Load a hook function from a specified path.
110
116
  This function dynamically imports the module and retrieves the function
@@ -118,21 +124,26 @@ def load_hook(
118
124
  Returns:
119
125
  Callable: The loaded hook function
120
126
  """
121
- with PipelineManager(
122
- storage_options=storage_options, base_dir=base_dir
123
- ) as pm:
124
- path_segments = function_path.rsplit('.', 2)
127
+ with PipelineManager(storage_options=storage_options, base_dir=base_dir) as pm:
128
+ path_segments = function_path.rsplit(".", 2)
125
129
  if len(path_segments) == 2:
126
130
  # If the function path is in the format 'module_name.function_name'
127
131
  module_name, function_name = path_segments
128
- module_path = ''
132
+ module_path = ""
129
133
  elif len(path_segments) == 3:
130
134
  # If the function path is in the format 'package.[subpackage.]module_name.function_name'
131
135
  module_path, module_name, function_name = path_segments
132
136
 
133
-
134
- logger.debug(posixpath.join(pm._fs.path,"hooks", pipeline_name , module_path.replace('.', '/')))
135
- sys.path.append(posixpath.join(pm._fs.path,"hooks", pipeline_name ,module_path.replace('.', '/')))
137
+ logger.debug(
138
+ posixpath.join(
139
+ pm._fs.path, "hooks", pipeline_name, module_path.replace(".", "/")
140
+ )
141
+ )
142
+ sys.path.append(
143
+ posixpath.join(
144
+ pm._fs.path, "hooks", pipeline_name, module_path.replace(".", "/")
145
+ )
146
+ )
136
147
  hook_module = importlib.import_module(module_name)
137
148
  hook_function = getattr(hook_module, function_name)
138
- return hook_function
149
+ return hook_function
@@ -6,15 +6,19 @@ from pathlib import Path
6
6
  import rich
7
7
  from fsspec.spec import AbstractFileSystem
8
8
 
9
- from .cfg import Config
9
+ from .cfg import ProjectConfig
10
10
  from .fs import get_filesystem
11
-
11
+ from . import settings
12
12
 
13
13
  def init(
14
14
  name: str | None = None,
15
15
  base_dir: str | None = None,
16
16
  storage_options: dict = {},
17
17
  fs: AbstractFileSystem | None = None,
18
+ job_queue_type: str = settings.DEFAULT_JOB_QUEUE,
19
+ cfg_dir: str = settings.CONFIG_DIR,
20
+ pipelines_dir: str = settings.PIPELINES_DIR,
21
+ hooks_dir: str = settings.HOOKS_DIR,
18
22
  ):
19
23
  if name is None:
20
24
  name = str(Path.cwd().name)
@@ -25,11 +29,11 @@ def init(
25
29
 
26
30
  fs = get_filesystem(posixpath.join(base_dir, name), **storage_options)
27
31
 
28
- fs.makedirs("conf/pipelines", exist_ok=True)
29
- fs.makedirs("pipelines", exist_ok=True)
30
- fs.makedirs("hooks", exist_ok=True)
32
+ fs.makedirs(f"{cfg_dir}/pipelines", exist_ok=True)
33
+ fs.makedirs(pipelines_dir, exist_ok=True)
34
+ fs.makedirs(hooks_dir, exist_ok=True)
31
35
 
32
- cfg = Config.load(base_dir=posixpath.join(base_dir, name), name=name)
36
+ cfg = ProjectConfig.load(base_dir=posixpath.join(base_dir, name), name=name, job_queue_type=job_queue_type)
33
37
 
34
38
  with open(posixpath.join(base_dir, name, "README.md"), "w") as f:
35
39
  f.write(
@@ -55,9 +59,9 @@ def init(
55
59
  [dim]More options:[/dim]
56
60
  [blue underline]https://docs.astral.sh/uv/getting-started/installation/[/blue underline]
57
61
 
58
- 🚀 Initialize your project:
62
+ 🚀 Initialize uv in your flowerpower project:
59
63
  [dim]Run the following in your project directory:[/dim]
60
- [bold white]uv init --app --no-readme --vcs git[/bold white]
64
+ [bold white]uv init --bare --no-readme[/bold white]
61
65
  """
62
66
  )
63
67
 
@@ -1,10 +1,28 @@
1
1
  import importlib
2
2
 
3
3
  has_orjson = importlib.util.find_spec("orjson") is not None
4
+ has_polars = importlib.util.find_spec("polars") is not None
4
5
 
5
- if has_orjson:
6
+ if has_orjson and has_polars:
6
7
  from .ext import AbstractFileSystem
7
8
  else:
8
9
  from fsspec import AbstractFileSystem
9
10
 
10
- from .base import get_filesystem
11
+ from .base import get_filesystem # noqa: E402
12
+ from .storage_options import AwsStorageOptions # noqa: E402
13
+ from .storage_options import AzureStorageOptions # noqa: E402
14
+ from .storage_options import (BaseStorageOptions, GcsStorageOptions,
15
+ GitHubStorageOptions, GitLabStorageOptions,
16
+ StorageOptions)
17
+
18
+ __all__ = [
19
+ "get_filesystem",
20
+ "AbstractFileSystem",
21
+ "StorageOptions",
22
+ "AwsStorageOptions",
23
+ "AzureStorageOptions",
24
+ "GcsStorageOptions",
25
+ "GitHubStorageOptions",
26
+ "GitLabStorageOptions",
27
+ "BaseStorageOptions",
28
+ ]
flowerpower/fs/base.py CHANGED
@@ -4,6 +4,7 @@ import os
4
4
  import posixpath
5
5
  import urllib
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import fsspec
9
10
  import requests
@@ -15,16 +16,65 @@ from fsspec.implementations.memory import MemoryFile
15
16
  from fsspec.utils import infer_storage_options
16
17
  from loguru import logger
17
18
 
18
- from .ext import AbstractFileSystem
19
+ from ..utils.logging import setup_logging
20
+ from . import has_orjson, has_polars
21
+
22
+ if has_orjson and has_polars:
23
+ from .ext import AbstractFileSystem
24
+ else:
25
+ from fsspec import AbstractFileSystem
26
+
19
27
  from .storage_options import BaseStorageOptions
20
28
  from .storage_options import from_dict as storage_options_from_dict
21
29
 
30
+ setup_logging()
31
+
22
32
 
23
33
  class FileNameCacheMapper(AbstractCacheMapper):
24
- def __init__(self, directory):
34
+ """Maps remote file paths to local cache paths while preserving directory structure.
35
+
36
+ This cache mapper maintains the original file path structure in the cache directory,
37
+ creating necessary subdirectories as needed.
38
+
39
+ Attributes:
40
+ directory (str): Base directory for cached files
41
+
42
+ Example:
43
+ >>> # Create cache mapper for S3 files
44
+ >>> mapper = FileNameCacheMapper("/tmp/cache")
45
+ >>>
46
+ >>> # Map remote path to cache path
47
+ >>> cache_path = mapper("bucket/data/file.csv")
48
+ >>> print(cache_path) # Preserves structure
49
+ 'bucket/data/file.csv'
50
+ """
51
+
52
+ def __init__(self, directory: str):
53
+ """Initialize cache mapper with base directory.
54
+
55
+ Args:
56
+ directory: Base directory where cached files will be stored
57
+ """
25
58
  self.directory = directory
26
59
 
27
60
  def __call__(self, path: str) -> str:
61
+ """Map remote file path to cache file path.
62
+
63
+ Creates necessary subdirectories in the cache directory to maintain
64
+ the original path structure.
65
+
66
+ Args:
67
+ path: Original file path from remote filesystem
68
+
69
+ Returns:
70
+ str: Cache file path that preserves original structure
71
+
72
+ Example:
73
+ >>> mapper = FileNameCacheMapper("/tmp/cache")
74
+ >>> # Maps maintain directory structure
75
+ >>> print(mapper("data/nested/file.txt"))
76
+ 'data/nested/file.txt'
77
+ """
28
78
  os.makedirs(
29
79
  posixpath.dirname(posixpath.join(self.directory, path)), exist_ok=True
30
80
  )
@@ -32,15 +82,81 @@ class FileNameCacheMapper(AbstractCacheMapper):
32
82
 
33
83
 
34
84
  class MonitoredSimpleCacheFileSystem(SimpleCacheFileSystem):
35
- def __init__(self, **kwargs):
36
- # kwargs["cache_storage"] = posixpath.join(
37
- # kwargs.get("cache_storage"), kwargs.get("fs").protocol[0]
38
- # )
85
+ """Enhanced caching filesystem with monitoring and improved path handling.
86
+
87
+ This filesystem extends SimpleCacheFileSystem to provide:
88
+ - Verbose logging of cache operations
89
+ - Improved path mapping for cache files
90
+ - Enhanced synchronization capabilities
91
+ - Better handling of parallel operations
92
+
93
+ Attributes:
94
+ _verbose (bool): Whether to print verbose cache operations
95
+ _mapper (FileNameCacheMapper): Maps remote paths to cache paths
96
+ storage (list[str]): List of cache storage locations
97
+ fs (AbstractFileSystem): Underlying filesystem being cached
98
+
99
+ Example:
100
+ >>> from fsspec import filesystem
101
+ >>> # Create monitored cache for S3
102
+ >>> s3 = filesystem("s3", key="ACCESS_KEY", secret="SECRET_KEY")
103
+ >>> cached_fs = MonitoredSimpleCacheFileSystem(
104
+ ... fs=s3,
105
+ ... cache_storage="/tmp/s3_cache",
106
+ ... verbose=True
107
+ ... )
108
+ >>>
109
+ >>> # Read file (downloads and caches)
110
+ >>> with cached_fs.open("bucket/data.csv") as f:
111
+ ... data = f.read()
112
+ Downloading s3://bucket/data.csv
113
+ >>>
114
+ >>> # Second read uses cache
115
+ >>> with cached_fs.open("bucket/data.csv") as f:
116
+ ... data = f.read() # No download message
117
+ """
118
+
119
+ def __init__(self, **kwargs: Any):
120
+ """Initialize monitored cache filesystem.
121
+
122
+ Args:
123
+ **kwargs: Configuration options including:
124
+ fs (AbstractFileSystem): Filesystem to cache
125
+ cache_storage (str): Cache directory path
126
+ verbose (bool): Enable verbose logging
127
+ And any other SimpleCacheFileSystem options
128
+
129
+ Example:
130
+ >>> # Cache with custom settings
131
+ >>> cached_fs = MonitoredSimpleCacheFileSystem(
132
+ ... fs=remote_fs,
133
+ ... cache_storage="/tmp/cache",
134
+ ... verbose=True,
135
+ ... same_names=True # Use original filenames
136
+ ... )
137
+ """
39
138
  self._verbose = kwargs.get("verbose", False)
40
139
  super().__init__(**kwargs)
41
140
  self._mapper = FileNameCacheMapper(kwargs.get("cache_storage"))
42
141
 
43
- def _check_file(self, path):
142
+ def _check_file(self, path: str) -> str | None:
143
+ """Check if file exists in cache and download if needed.
144
+
145
+ Args:
146
+ path: Path to file in the remote filesystem
147
+
148
+ Returns:
149
+ str | None: Path to cached file if found/downloaded, None otherwise
150
+
151
+ Example:
152
+ >>> fs = MonitoredSimpleCacheFileSystem(
153
+ ... fs=remote_fs,
154
+ ... cache_storage="/tmp/cache"
155
+ ... )
156
+ >>> cached_path = fs._check_file("data.csv")
157
+ >>> print(cached_path)
158
+ '/tmp/cache/data.csv'
159
+ """
44
160
  self._check_cache()
45
161
  cache_path = self._mapper(path)
46
162
  for storage in self.storage:
@@ -50,17 +166,50 @@ class MonitoredSimpleCacheFileSystem(SimpleCacheFileSystem):
50
166
  if self._verbose:
51
167
  logger.info(f"Downloading {self.protocol[0]}://{path}")
52
168
 
53
- # def glob(self, path):
54
- # return [self._strip_protocol(path)]
169
+ def size(self, path: str) -> int:
170
+ """Get size of file in bytes.
171
+
172
+ Checks cache first, falls back to remote filesystem.
173
+
174
+ Args:
175
+ path: Path to file
55
176
 
56
- def size(self, path):
177
+ Returns:
178
+ int: Size of file in bytes
179
+
180
+ Example:
181
+ >>> fs = MonitoredSimpleCacheFileSystem(
182
+ ... fs=remote_fs,
183
+ ... cache_storage="/tmp/cache"
184
+ ... )
185
+ >>> size = fs.size("large_file.dat")
186
+ >>> print(f"File size: {size} bytes")
187
+ """
57
188
  cached_file = self._check_file(self._strip_protocol(path))
58
189
  if cached_file is None:
59
190
  return self.fs.size(path)
60
191
  else:
61
192
  return posixpath.getsize(cached_file)
62
193
 
63
- def sync(self, reload: bool = False):
194
+ def sync_cache(self, reload: bool = False) -> None:
195
+ """Synchronize cache with remote filesystem.
196
+
197
+ Downloads all files in remote path to cache if not present.
198
+
199
+ Args:
200
+ reload: Whether to force reload all files, ignoring existing cache
201
+
202
+ Example:
203
+ >>> fs = MonitoredSimpleCacheFileSystem(
204
+ ... fs=remote_fs,
205
+ ... cache_storage="/tmp/cache"
206
+ ... )
207
+ >>> # Initial sync
208
+ >>> fs.sync_cache()
209
+ >>>
210
+ >>> # Force reload all files
211
+ >>> fs.sync_cache(reload=True)
212
+ """
64
213
  if reload:
65
214
  self.clear_cache()
66
215
  content = self.glob("**/*")
@@ -154,6 +303,41 @@ class MonitoredSimpleCacheFileSystem(SimpleCacheFileSystem):
154
303
 
155
304
 
156
305
  class GitLabFileSystem(AbstractFileSystem):
306
+ """FSSpec-compatible filesystem interface for GitLab repositories.
307
+
308
+ Provides access to files in GitLab repositories through the GitLab API,
309
+ supporting read operations with authentication.
310
+
311
+ Attributes:
312
+ project_name (str): Name of the GitLab project
313
+ project_id (str): ID of the GitLab project
314
+ access_token (str): GitLab personal access token
315
+ branch (str): Git branch to read from
316
+ base_url (str): GitLab instance URL
317
+
318
+ Example:
319
+ >>> # Access public project
320
+ >>> fs = GitLabFileSystem(
321
+ ... project_name="my-project",
322
+ ... access_token="glpat-xxxx"
323
+ ... )
324
+ >>>
325
+ >>> # Read file contents
326
+ >>> with fs.open("path/to/file.txt") as f:
327
+ ... content = f.read()
328
+ >>>
329
+ >>> # List directory
330
+ >>> files = fs.ls("path/to/dir")
331
+ >>>
332
+ >>> # Access enterprise GitLab
333
+ >>> fs = GitLabFileSystem(
334
+ ... project_id="12345",
335
+ ... access_token="glpat-xxxx",
336
+ ... base_url="https://gitlab.company.com",
337
+ ... branch="develop"
338
+ ... )
339
+ """
340
+
157
341
  def __init__(
158
342
  self,
159
343
  project_name: str | None = None,
@@ -163,6 +347,21 @@ class GitLabFileSystem(AbstractFileSystem):
163
347
  base_url: str = "https://gitlab.com",
164
348
  **kwargs,
165
349
  ):
350
+ """Initialize GitLab filesystem.
351
+
352
+ Args:
353
+ project_name: Name of the GitLab project. Required if project_id not provided.
354
+ project_id: ID of the GitLab project. Required if project_name not provided.
355
+ access_token: GitLab personal access token for authentication.
356
+ Required for private repositories.
357
+ branch: Git branch to read from. Defaults to "main".
358
+ base_url: GitLab instance URL. Defaults to "https://gitlab.com".
359
+ **kwargs: Additional arguments passed to AbstractFileSystem.
360
+
361
+ Raises:
362
+ ValueError: If neither project_name nor project_id is provided
363
+ requests.RequestException: If GitLab API request fails
364
+ """
166
365
  super().__init__(**kwargs)
167
366
  self.project_name = project_name
168
367
  self.project_id = project_id
@@ -173,11 +372,29 @@ class GitLabFileSystem(AbstractFileSystem):
173
372
  if not self.project_id:
174
373
  self.project_id = self._get_project_id()
175
374
 
176
- def _validate_init(self):
375
+ def _validate_init(self) -> None:
376
+ """Validate initialization parameters.
377
+
378
+ Ensures that either project_id or project_name is provided.
379
+
380
+ Raises:
381
+ ValueError: If neither project_id nor project_name is provided
382
+ """
177
383
  if not self.project_id and not self.project_name:
178
384
  raise ValueError("Either 'project_id' or 'project_name' must be provided")
179
385
 
180
- def _get_project_id(self):
386
+ def _get_project_id(self) -> str:
387
+ """Retrieve project ID from GitLab API using project name.
388
+
389
+ Makes an API request to search for projects and find the matching project ID.
390
+
391
+ Returns:
392
+ str: The GitLab project ID
393
+
394
+ Raises:
395
+ ValueError: If project not found
396
+ requests.RequestException: If API request fails
397
+ """
181
398
  url = f"{self.base_url}/api/v4/projects"
182
399
  headers = {"PRIVATE-TOKEN": self.access_token}
183
400
  params = {"search": self.project_name}
@@ -192,7 +409,29 @@ class GitLabFileSystem(AbstractFileSystem):
192
409
  else:
193
410
  response.raise_for_status()
194
411
 
195
- def _open(self, path, mode="rb", **kwargs):
412
+ def _open(self, path: str, mode: str = "rb", **kwargs) -> MemoryFile:
413
+ """Open a file from GitLab repository.
414
+
415
+ Retrieves file content from GitLab API and returns it as a memory file.
416
+
417
+ Args:
418
+ path: Path to file within repository
419
+ mode: File open mode. Only "rb" (read binary) is supported.
420
+ **kwargs: Additional arguments (unused)
421
+
422
+ Returns:
423
+ MemoryFile: File-like object containing file content
424
+
425
+ Raises:
426
+ NotImplementedError: If mode is not "rb"
427
+ requests.RequestException: If API request fails
428
+
429
+ Example:
430
+ >>> fs = GitLabFileSystem(project_id="12345", access_token="glpat-xxxx")
431
+ >>> with fs.open("README.md") as f:
432
+ ... content = f.read()
433
+ ... print(content.decode())
434
+ """
196
435
  if mode != "rb":
197
436
  raise NotImplementedError("Only read mode is supported")
198
437
 
@@ -209,7 +448,34 @@ class GitLabFileSystem(AbstractFileSystem):
209
448
  else:
210
449
  response.raise_for_status()
211
450
 
212
- def _ls(self, path, detail=False, **kwargs):
451
+ def _ls(self, path: str, detail: bool = False, **kwargs) -> list[str] | list[dict]:
452
+ """List contents of a directory in GitLab repository.
453
+
454
+ Args:
455
+ path: Directory path within repository
456
+ detail: Whether to return detailed information about each entry.
457
+ If True, returns list of dicts with file metadata.
458
+ If False, returns list of filenames.
459
+ **kwargs: Additional arguments (unused)
460
+
461
+ Returns:
462
+ list[str] | list[dict]: List of file/directory names or detailed info
463
+
464
+ Raises:
465
+ requests.RequestException: If API request fails
466
+
467
+ Example:
468
+ >>> fs = GitLabFileSystem(project_id="12345", access_token="glpat-xxxx")
469
+ >>> # List filenames
470
+ >>> files = fs.ls("docs")
471
+ >>> print(files)
472
+ ['README.md', 'API.md']
473
+ >>>
474
+ >>> # List with details
475
+ >>> details = fs.ls("docs", detail=True)
476
+ >>> for item in details:
477
+ ... print(f"{item['name']}: {item['type']}")
478
+ """
213
479
  url = f"{self.base_url}/api/v4/projects/{self.project_id}/repository/tree?path={path}&ref={self.branch}"
214
480
  headers = {"PRIVATE-TOKEN": self.access_token}
215
481
  response = requests.get(url, headers=headers)
@@ -258,19 +524,77 @@ def get_filesystem(
258
524
  fs: AbstractFileSystem | None = None,
259
525
  **storage_options_kwargs,
260
526
  ) -> AbstractFileSystem:
261
- """
262
- Get a filesystem based on the given path.
527
+ """Get a filesystem instance based on path or configuration.
263
528
 
264
- Args:
265
- path: (str, optional) Path to the filesystem. Defaults to None.
266
- storage_options: (AwsStorageOptions | GitHubStorageOptions | GitLabStorageOptions |
267
- GcsStorageOptions | AzureStorageOptions | dict[str, str], optional) Storage options.
268
- Defaults to None.
269
- dirfs: (bool, optional) If True, return a DirFileSystem. Defaults to True.
270
- cached: (bool, optional) If True, use a cached filesystem. Defaults to False.
271
- cache_storage: (str, optional) Path to the cache storage. Defaults to None.
272
- **storage_options_kwargs: Additional keyword arguments for the storage options.
529
+ This function creates and configures a filesystem instance based on the provided path
530
+ and options. It supports various filesystem types including local, S3, GCS, Azure,
531
+ and Git-based filesystems.
273
532
 
533
+ Args:
534
+ path: URI or path to the filesystem location. Examples:
535
+ - Local: "/path/to/data"
536
+ - S3: "s3://bucket/path"
537
+ - GCS: "gs://bucket/path"
538
+ - Azure: "abfs://container/path"
539
+ - GitHub: "github://org/repo/path"
540
+ storage_options: Configuration options for the filesystem. Can be:
541
+ - BaseStorageOptions object with protocol-specific settings
542
+ - Dictionary of key-value pairs for authentication/configuration
543
+ - None to use environment variables or default credentials
544
+ dirfs: Whether to wrap filesystem in DirFileSystem for path-based operations.
545
+ Set to False when you need direct protocol-specific features.
546
+ cached: Whether to enable local caching of remote files.
547
+ Useful for frequently accessed remote files.
548
+ cache_storage: Directory path for cached files. Defaults to path-based location
549
+ in current directory if not specified.
550
+ fs: Existing filesystem instance to wrap with caching or dirfs.
551
+ Use this to customize an existing filesystem instance.
552
+ **storage_options_kwargs: Additional keyword arguments for storage options.
553
+ Alternative to passing storage_options dictionary.
554
+
555
+ Returns:
556
+ AbstractFileSystem: Configured filesystem instance with requested features.
557
+
558
+ Raises:
559
+ ValueError: If storage protocol or options are invalid
560
+ FSSpecError: If filesystem initialization fails
561
+ ImportError: If required filesystem backend is not installed
562
+
563
+ Example:
564
+ >>> # Local filesystem
565
+ >>> fs = get_filesystem("/path/to/data")
566
+ >>>
567
+ >>> # S3 with credentials
568
+ >>> fs = get_filesystem(
569
+ ... "s3://bucket/data",
570
+ ... storage_options={
571
+ ... "key": "ACCESS_KEY",
572
+ ... "secret": "SECRET_KEY"
573
+ ... }
574
+ ... )
575
+ >>>
576
+ >>> # Cached GCS filesystem
577
+ >>> fs = get_filesystem(
578
+ ... "gs://bucket/data",
579
+ ... storage_options=GcsStorageOptions(
580
+ ... token="service_account.json"
581
+ ... ),
582
+ ... cached=True,
583
+ ... cache_storage="/tmp/gcs_cache"
584
+ ... )
585
+ >>>
586
+ >>> # Azure with environment credentials
587
+ >>> fs = get_filesystem(
588
+ ... "abfs://container/data",
589
+ ... storage_options=AzureStorageOptions.from_env()
590
+ ... )
591
+ >>>
592
+ >>> # Wrap existing filesystem
593
+ >>> base_fs = filesystem("s3", key="ACCESS", secret="SECRET")
594
+ >>> cached_fs = get_filesystem(
595
+ ... fs=base_fs,
596
+ ... cached=True
597
+ ... )
274
598
  """
275
599
  if fs is not None:
276
600
  if cached: