fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. fabricks/api/context.py +15 -3
  2. fabricks/api/notebooks/schedule.py +2 -3
  3. fabricks/api/parsers.py +2 -1
  4. fabricks/api/utils.py +3 -1
  5. fabricks/cdc/__init__.py +1 -2
  6. fabricks/cdc/base/__init__.py +1 -2
  7. fabricks/cdc/base/_types.py +5 -3
  8. fabricks/cdc/base/configurator.py +5 -0
  9. fabricks/cdc/base/generator.py +7 -3
  10. fabricks/cdc/base/merger.py +2 -0
  11. fabricks/cdc/base/processor.py +15 -0
  12. fabricks/cdc/templates/README.md +490 -0
  13. fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
  14. fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
  15. fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
  16. fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
  17. fabricks/cdc/templates/queries/context.sql.jinja +104 -96
  18. fabricks/cdc/templates/query.sql.jinja +1 -1
  19. fabricks/context/__init__.py +13 -1
  20. fabricks/context/config.py +13 -122
  21. fabricks/context/log.py +92 -1
  22. fabricks/context/runtime.py +35 -69
  23. fabricks/context/spark_session.py +8 -7
  24. fabricks/context/utils.py +26 -39
  25. fabricks/core/__init__.py +2 -2
  26. fabricks/core/dags/base.py +5 -5
  27. fabricks/core/dags/processor.py +2 -3
  28. fabricks/core/extenders.py +1 -1
  29. fabricks/core/job_schema.py +26 -16
  30. fabricks/core/jobs/__init__.py +1 -7
  31. fabricks/core/jobs/base/README.md +1545 -0
  32. fabricks/core/jobs/base/__init__.py +1 -8
  33. fabricks/core/jobs/base/checker.py +7 -7
  34. fabricks/core/jobs/base/configurator.py +142 -63
  35. fabricks/core/jobs/base/generator.py +38 -34
  36. fabricks/core/jobs/base/invoker.py +48 -63
  37. fabricks/core/jobs/base/processor.py +13 -28
  38. fabricks/core/jobs/bronze.py +88 -38
  39. fabricks/core/jobs/get_job.py +3 -6
  40. fabricks/core/jobs/get_job_conf.py +19 -68
  41. fabricks/core/jobs/get_jobs.py +10 -11
  42. fabricks/core/jobs/get_schedules.py +3 -17
  43. fabricks/core/jobs/gold.py +96 -43
  44. fabricks/core/jobs/silver.py +42 -22
  45. fabricks/core/masks.py +11 -8
  46. fabricks/core/parsers/__init__.py +0 -2
  47. fabricks/core/parsers/base.py +10 -10
  48. fabricks/core/parsers/decorator.py +1 -1
  49. fabricks/core/parsers/get_parser.py +4 -5
  50. fabricks/core/schedules/process.py +1 -4
  51. fabricks/core/steps/base.py +27 -17
  52. fabricks/core/steps/get_step.py +2 -4
  53. fabricks/core/steps/get_step_conf.py +3 -7
  54. fabricks/core/udfs.py +9 -8
  55. fabricks/core/views.py +2 -2
  56. fabricks/deploy/__init__.py +27 -16
  57. fabricks/deploy/masks.py +1 -1
  58. fabricks/deploy/notebooks.py +19 -16
  59. fabricks/deploy/schedules.py +1 -1
  60. fabricks/deploy/tables.py +66 -49
  61. fabricks/deploy/udfs.py +2 -2
  62. fabricks/deploy/views.py +15 -16
  63. fabricks/metastore/database.py +3 -3
  64. fabricks/metastore/table.py +103 -68
  65. fabricks/models/__init__.py +125 -0
  66. fabricks/models/common.py +79 -0
  67. fabricks/models/config.py +225 -0
  68. fabricks/models/dependency.py +50 -0
  69. fabricks/models/job.py +157 -0
  70. fabricks/models/path.py +17 -0
  71. fabricks/models/runtime.py +182 -0
  72. fabricks/models/schedule.py +21 -0
  73. fabricks/models/step.py +103 -0
  74. fabricks/models/table.py +77 -0
  75. fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
  76. fabricks/utils/helpers.py +6 -5
  77. fabricks/utils/log.py +25 -6
  78. fabricks/utils/path.py +269 -102
  79. fabricks/utils/pip.py +7 -7
  80. fabricks/utils/read/read.py +23 -22
  81. fabricks/utils/read/read_yaml.py +2 -2
  82. fabricks/utils/write/delta.py +4 -4
  83. fabricks/utils/write/stream.py +2 -2
  84. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
  85. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
  86. fabricks/context/_types.py +0 -137
  87. fabricks/context/helpers.py +0 -63
  88. fabricks/core/jobs/base/_types.py +0 -284
  89. fabricks/core/parsers/_types.py +0 -6
  90. fabricks/utils/fdict.py +0 -240
  91. fabricks/utils/pydantic.py +0 -94
  92. fabricks/utils/schema/__init__.py +0 -7
  93. fabricks/utils/schema/get_json_schema_for_type.py +0 -161
  94. fabricks/utils/schema/get_schema_for_type.py +0 -99
  95. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
fabricks/utils/path.py CHANGED
@@ -1,23 +1,25 @@
1
- import os
2
1
  import posixpath
2
+ from abc import ABC, abstractmethod
3
3
  from pathlib import Path as PathlibPath
4
4
  from typing import List, Optional, Union
5
5
 
6
6
  from pyspark.sql.dataframe import DataFrame
7
+ from typing_extensions import deprecated
7
8
 
8
9
  from fabricks.utils.spark import spark
9
10
 
10
11
 
11
- class Path:
12
- def __init__(self, path: Union[str, PathlibPath], assume_git: bool = False):
13
- self.assume_git = assume_git
12
+ class BasePath(ABC):
13
+ """Abstract base class for all path types."""
14
14
 
15
+ def __init__(self, path: Union[str, PathlibPath]):
16
+ """Initialize the path."""
15
17
  if isinstance(path, PathlibPath):
16
18
  path = path.as_posix()
17
19
 
18
20
  new_path = str(path)
19
21
  if new_path.startswith("abfss:/") and not new_path.startswith("abfss://"):
20
- new_path = new_path.replace("abfss:/", "abfss://") # // is replaced by / by pathlibpath
22
+ new_path = new_path.replace("abfss:/", "abfss://")
21
23
 
22
24
  self.path: str = new_path
23
25
 
@@ -26,64 +28,100 @@ class Path:
26
28
  cls,
27
29
  uri: str,
28
30
  regex: Optional[dict[str, str]] = None,
29
- assume_git: Optional[bool] = False,
30
31
  ):
32
+ """Create a path from a URI with optional regex substitution."""
31
33
  uri = uri.strip()
32
- if assume_git is None:
33
- assume_git = False
34
34
  if regex:
35
35
  import re
36
36
 
37
37
  for key, value in regex.items():
38
38
  uri = re.sub(rf"{key}", value, uri)
39
39
 
40
- return cls(uri, assume_git=assume_git)
40
+ return cls(uri)
41
+
42
+ @property
43
+ def string(self) -> str:
44
+ """Get the string representation of the path."""
45
+ return self.path
41
46
 
42
47
  @property
43
48
  def pathlibpath(self) -> PathlibPath:
49
+ """Get the pathlib representation of the path."""
44
50
  return PathlibPath(self.string)
45
51
 
46
- @property
47
- def string(self) -> str:
48
- return self.path
52
+ def get_file_name(self) -> str:
53
+ """Get the file name from the path."""
54
+ return self.pathlibpath.name
49
55
 
50
- def get_container(self) -> str:
51
- import re
56
+ def get_sql(self) -> str:
57
+ """Read and return SQL content from a .sql file."""
58
+ p = self.string
59
+ if not p.endswith(".sql"):
60
+ p += ".sql"
52
61
 
53
- assert self.string.startswith("abfss://")
62
+ with open(p, "r") as f:
63
+ sql = f.read()
54
64
 
55
- r = re.compile(r"(?<=abfss:\/\/)(.+?)(?=@)")
56
- m = re.findall(r, self.string)[0]
57
- return m
65
+ return sql
58
66
 
59
- def get_storage_account(self) -> str:
60
- import re
67
+ def is_sql(self) -> bool:
68
+ """Check if the path points to a SQL file."""
69
+ return self.string.endswith(".sql")
61
70
 
62
- assert self.string.startswith("abfss://")
71
+ def joinpath(self, *other):
72
+ """Join this path with other path segments."""
73
+ parts = [str(o) for o in other]
74
+ base = self.string
63
75
 
64
- r = re.compile(r"(?<=@)(.+?)(?=\.)")
65
- m = re.findall(r, self.string)[0]
66
- return m
76
+ joined = posixpath.join(base, *parts)
77
+ new = posixpath.normpath(joined)
67
78
 
68
- def get_file_name(self) -> str:
69
- return self.pathlibpath.name
79
+ return self.__class__(path=new)
70
80
 
71
- def get_file_system(self) -> str:
72
- import re
81
+ def append(self, other: str):
82
+ """Append a string to the path."""
83
+ new_path = self.string + other
84
+ return self.__class__(path=new_path)
73
85
 
74
- assert self.string.startswith("abfss://")
86
+ def parent(self):
87
+ """Get the parent directory of the path."""
88
+ new_path = self.pathlibpath.parent
89
+ return self.__class__(path=new_path)
75
90
 
76
- r = re.compile(r"(?<=\.)(.+)(?=\/)")
77
- m = re.findall(r, self.string)[0]
78
- return m
91
+ @abstractmethod
92
+ def exists(self) -> bool:
93
+ """Check if the path exists."""
79
94
 
80
- def get_dbfs_mnt_path(self) -> str:
81
- mount_point = self.pathlibpath.parts[1].split(".")[0].split("@")[0]
82
- rest = self.pathlibpath.parts[2:]
95
+ @abstractmethod
96
+ def walk(
97
+ self,
98
+ depth: Optional[int] = None,
99
+ convert: Optional[bool] = False,
100
+ file_format: Optional[str] = None,
101
+ ) -> List:
102
+ """Walk the path and return all files."""
83
103
 
84
- return str(os.path.join("/dbfs/mnt", mount_point, "/".join(rest)))
104
+ @abstractmethod
105
+ def _yield(self, path: Union[str, PathlibPath]):
106
+ """Recursively yield all file paths under the given path."""
107
+
108
+ def __str__(self) -> str:
109
+ return self.string
110
+
111
+
112
+ class GitPath(BasePath):
113
+ def __init__(self, path: Union[str, PathlibPath]):
114
+ super().__init__(path=path)
115
+
116
+ def exists(self) -> bool:
117
+ """Check if the path exists in the local/git file system."""
118
+ try:
119
+ return self.pathlibpath.exists()
120
+ except Exception:
121
+ return False
85
122
 
86
123
  def get_notebook_path(self) -> str:
124
+ """Get the notebook path for Databricks workspace."""
87
125
  path = self.path.replace("Workspace/", "")
88
126
  if path.endswith(".ipynb"):
89
127
  path = path.replace(".ipynb", "")
@@ -91,61 +129,90 @@ class Path:
91
129
  path = path.replace(".py", "")
92
130
  return path
93
131
 
94
- def get_sql(self) -> str:
95
- p = self.string
96
- if not p.endswith(".sql"):
97
- p += ".sql"
132
+ def walk(
133
+ self,
134
+ depth: Optional[int] = None,
135
+ convert: Optional[bool] = False,
136
+ file_format: Optional[str] = None,
137
+ ) -> List:
138
+ out = []
139
+ if self.exists():
140
+ if self.pathlibpath.is_file():
141
+ out = [self.string]
142
+ else:
143
+ out = list(self._yield(self.string))
98
144
 
99
- with open(p, "r") as f:
100
- sql = f.read()
145
+ if file_format:
146
+ out = [o for o in out if o.endswith(".sql")]
147
+ if convert:
148
+ out = [self.__class__(o) for o in out]
149
+ return out
101
150
 
102
- return sql
151
+ def _yield(self, path: Union[str, PathlibPath]):
152
+ """Recursively yield all file paths in the git/local file system."""
153
+ if isinstance(path, str):
154
+ path = PathlibPath(path)
103
155
 
104
- def is_sql(self) -> bool:
105
- return self.string.endswith(".sql")
156
+ for child in path.glob(r"*"):
157
+ if child.is_dir():
158
+ yield from self._yield(child)
159
+ else:
160
+ yield str(child)
161
+
162
+
163
+ class FileSharePath(BasePath):
164
+ def __init__(self, path: Union[str, PathlibPath]):
165
+ super().__init__(path=path)
106
166
 
107
167
  def exists(self) -> bool:
168
+ """Check if the path exists in the distributed file system."""
108
169
  try:
109
- if self.assume_git:
110
- return self.pathlibpath.exists()
170
+ from fabricks.utils.spark import dbutils
111
171
 
112
- else:
113
- from fabricks.utils.spark import dbutils
172
+ assert dbutils is not None, "dbutils not found"
173
+ dbutils.fs.ls(self.string)
174
+ return True
175
+ except Exception:
176
+ return False
177
+
178
+ def get_container(self) -> str:
179
+ """Get the container name from an ABFSS path."""
180
+ import re
114
181
 
115
- assert dbutils is not None, "dbutils not found"
182
+ assert self.string.startswith("abfss://")
116
183
 
117
- dbutils.fs.ls(self.string)
118
- return True
184
+ r = re.compile(r"(?<=abfss:\/\/)(.+?)(?=@)")
185
+ m = re.findall(r, self.string)[0]
186
+ return m
119
187
 
120
- except Exception:
121
- return False
188
+ def get_storage_account(self) -> str:
189
+ """Get the storage account name from an ABFSS path."""
190
+ import re
122
191
 
123
- def joinpath(self, *other):
124
- parts = [str(o) for o in other]
125
- base = self.string
192
+ assert self.string.startswith("abfss://")
126
193
 
127
- joined = posixpath.join(base, *parts)
128
- new = posixpath.normpath(joined)
194
+ r = re.compile(r"(?<=@)(.+?)(?=\.)")
195
+ m = re.findall(r, self.string)[0]
196
+ return m
129
197
 
130
- return Path(path=new, assume_git=self.assume_git)
198
+ def get_file_system(self) -> str:
199
+ """Get the file system from an ABFSS path."""
200
+ import re
131
201
 
132
- def append(self, other: str):
133
- new_path = self.string + other
134
- return Path(path=new_path, assume_git=self.assume_git)
202
+ assert self.string.startswith("abfss://")
135
203
 
136
- def parent(self, *other):
137
- new_path = self.pathlibpath.parent
138
- return Path(path=new_path, assume_git=self.assume_git)
204
+ r = re.compile(r"(?<=\.)(.+)(?=\/)")
205
+ m = re.findall(r, self.string)[0]
206
+ return m
139
207
 
140
- def get_file_info(self) -> DataFrame:
141
- assert not self.assume_git
208
+ def get_dbfs_mnt_path(self) -> str:
209
+ """Get the DBFS mount path."""
210
+ import os
142
211
 
143
- rows = self._yield_file_info(self.string)
144
- df = spark.createDataFrame(
145
- rows,
146
- schema=["path", "name", "size", "modification_time"],
147
- )
148
- return df
212
+ mount_point = self.pathlibpath.parts[1].split(".")[0].split("@")[0]
213
+ rest = self.pathlibpath.parts[2:]
214
+
215
+ return str(os.path.join("/dbfs/mnt", mount_point, "/".join(rest)))
149
216
 
150
217
  def walk(
151
218
  self,
@@ -158,20 +225,31 @@ class Path:
158
225
  if self.pathlibpath.is_file():
159
226
  out = [self.string]
160
227
  elif depth:
161
- assert not self.assume_git
162
228
  out = self._list_fs(depth)
163
229
  else:
164
- if self.assume_git:
165
- out = list(self._yield_git(self.string))
166
- else:
167
- out = list(self._yield_fs(self.string))
230
+ out = list(self._yield(self.string))
168
231
 
169
232
  if file_format:
170
233
  out = [o for o in out if o.endswith(".sql")]
171
234
  if convert:
172
- out = [Path(o) for o in out]
235
+ out = [self.__class__(o) for o in out]
173
236
  return out
174
237
 
238
+ def get_file_info(self) -> DataFrame:
239
+ rows = self._yield_file_info(self.string)
240
+ df = spark.createDataFrame(
241
+ rows,
242
+ schema=["path", "name", "size", "modification_time"],
243
+ )
244
+ return df
245
+
246
+ def rm(self):
247
+ from databricks.sdk.runtime import dbutils
248
+
249
+ if self.exists():
250
+ list(self._rm(self.string))
251
+ dbutils.fs.rm(self.string, recurse=True)
252
+
175
253
  def _list_fs(self, depth: int) -> List:
176
254
  from databricks.sdk.runtime import dbutils
177
255
 
@@ -205,32 +283,17 @@ class Path:
205
283
  else:
206
284
  yield dbutils.fs.ls(child.path)[0]
207
285
 
208
- def _yield_fs(self, path: str):
286
+ def _yield(self, path: Union[str, PathlibPath]):
287
+ """Recursively yield all file paths in the distributed file system."""
209
288
  from databricks.sdk.runtime import dbutils
210
289
 
211
- for child in dbutils.fs.ls(path):
290
+ path_str = str(path)
291
+ for child in dbutils.fs.ls(path_str):
212
292
  if child.isDir(): # type: ignore
213
- yield from self._yield_fs(child.path)
293
+ yield from self._yield(child.path)
214
294
  else:
215
295
  yield str(child.path)
216
296
 
217
- def _yield_git(self, path: Union[str, PathlibPath]):
218
- if isinstance(path, str):
219
- path = PathlibPath(path)
220
-
221
- for child in path.glob(r"*"):
222
- if child.is_dir():
223
- yield from self._yield_git(child)
224
- else:
225
- yield str(child)
226
-
227
- def rm(self):
228
- from databricks.sdk.runtime import dbutils
229
-
230
- if self.exists():
231
- list(self._rm(self.string))
232
- dbutils.fs.rm(self.string, recurse=True)
233
-
234
297
  def _rm(self, path: str):
235
298
  from databricks.sdk.runtime import dbutils
236
299
 
@@ -244,5 +307,109 @@ class Path:
244
307
  except Exception:
245
308
  return False
246
309
 
247
- def __str__(self) -> str:
248
- return self.string
310
+
311
+ def resolve_git_path(
312
+ path: str | None,
313
+ default: str | None = None,
314
+ base: GitPath | str | None = None,
315
+ variables: dict[str, str] | None = None,
316
+ ) -> GitPath:
317
+ """
318
+ Resolve a path as a GitPath with optional variable substitution and base path joining.
319
+
320
+ Args:
321
+ path: The path string from configuration
322
+ default: Default value if path is None
323
+ base: Base path to join with (must be GitPath or str)
324
+ apply_variables: Whether to apply variable substitution using VARIABLES
325
+ variables: Dictionary of variable substitutions
326
+
327
+ Returns:
328
+ Resolved GitPath object
329
+ """
330
+ if isinstance(base, str):
331
+ base = GitPath(base)
332
+
333
+ resolved_value = path or default
334
+ if resolved_value is None:
335
+ raise ValueError("path and default cannot both be None")
336
+
337
+ if variables:
338
+ return GitPath.from_uri(resolved_value, regex=variables)
339
+
340
+ if base:
341
+ return base.joinpath(resolved_value)
342
+
343
+ return GitPath(resolved_value)
344
+
345
+
346
+ def resolve_fileshare_path(
347
+ path: str | None,
348
+ default: str | None = None,
349
+ base: FileSharePath | str | None = None,
350
+ variables: dict[str, str] | None = None,
351
+ ) -> FileSharePath:
352
+ """
353
+ Resolve a path as a FileSharePath with optional variable substitution and base path joining.
354
+
355
+ Args:
356
+ path: The path string from configuration
357
+ default: Default value if path is None
358
+ base: Base path to join with (must be FileSharePath or str)
359
+ apply_variables: Whether to apply variable substitution using VARIABLES
360
+ variables: Dictionary of variable substitutions
361
+
362
+ Returns:
363
+ Resolved FileSharePath object
364
+ """
365
+ if isinstance(base, str):
366
+ base = FileSharePath(base)
367
+
368
+ resolved_value = path or default
369
+ if resolved_value is None:
370
+ raise ValueError("path and default cannot both be None")
371
+
372
+ if variables:
373
+ return FileSharePath.from_uri(resolved_value, regex=variables)
374
+
375
+ if base:
376
+ return base.joinpath(resolved_value)
377
+
378
+ return FileSharePath(resolved_value)
379
+
380
+
381
+ @deprecated("Use GitPath or FileSharePath directly instead.")
382
+ class Path:
383
+ """
384
+ Legacy Path class with assume_git flag for backward compatibility.
385
+ """
386
+
387
+ def __new__(cls, path: Union[str, PathlibPath], assume_git: bool = False):
388
+ if assume_git:
389
+ return GitPath(path)
390
+ else:
391
+ return FileSharePath(path)
392
+
393
+ @classmethod
394
+ def from_uri(
395
+ cls,
396
+ uri: str,
397
+ regex: Optional[dict[str, str]] = None,
398
+ assume_git: Optional[bool] = False,
399
+ ):
400
+ """
401
+ Create a path from a URI with optional regex substitution.
402
+
403
+ Args:
404
+ uri: The URI string
405
+ regex: Dictionary of regex patterns to substitute
406
+ assume_git: If True, return GitPath; otherwise FileSharePath
407
+
408
+ Returns:
409
+ GitPath if assume_git is True, FileSharePath otherwise
410
+ """
411
+ if assume_git is None:
412
+ assume_git = False
413
+
414
+ path_class = GitPath if assume_git else FileSharePath
415
+ return path_class.from_uri(uri, regex=regex)
fabricks/utils/pip.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import subprocess
2
2
  from typing import List, Optional, Union
3
3
 
4
- from fabricks.utils.path import Path
4
+ from fabricks.utils.path import FileSharePath
5
5
 
6
6
 
7
7
  def pip_package(
8
8
  package: Union[str, List[str]],
9
- whl_path: Optional[Path] = None,
10
- tgt_path: Optional[Path] = None,
9
+ whl_path: Optional[FileSharePath] = None,
10
+ tgt_path: Optional[FileSharePath] = None,
11
11
  ):
12
12
  if isinstance(package, str):
13
13
  package = [package]
@@ -29,9 +29,9 @@ def pip_package(
29
29
 
30
30
 
31
31
  def pip_requirements(
32
- requirements_path: Path,
33
- whl_path: Optional[Path] = None,
34
- tgt_path: Optional[Path] = None,
32
+ requirements_path: FileSharePath,
33
+ whl_path: Optional[FileSharePath] = None,
34
+ tgt_path: Optional[FileSharePath] = None,
35
35
  ):
36
36
  r = requirements_path.string
37
37
 
@@ -50,7 +50,7 @@ def pip_requirements(
50
50
  raise ValueError(r, out.stderr)
51
51
 
52
52
 
53
- def pip_wheel(requirement_path: Path, whl_path: Path):
53
+ def pip_wheel(requirement_path: FileSharePath, whl_path: FileSharePath):
54
54
  import subprocess
55
55
 
56
56
  r = requirement_path.string
@@ -4,12 +4,12 @@ from pyspark.sql import DataFrame, SparkSession
4
4
  from pyspark.sql.types import StructType
5
5
 
6
6
  from fabricks.context import SPARK
7
- from fabricks.utils.path import Path
7
+ from fabricks.utils.path import FileSharePath
8
8
 
9
9
 
10
10
  @overload
11
11
  def read_stream(
12
- src: Union[Path, str],
12
+ src: Union[FileSharePath, str],
13
13
  file_format: str,
14
14
  *,
15
15
  schema: StructType,
@@ -20,9 +20,9 @@ def read_stream(
20
20
 
21
21
  @overload
22
22
  def read_stream(
23
- src: Union[Path, str],
23
+ src: Union[FileSharePath, str],
24
24
  file_format: str,
25
- schema_path: Union[Path, str],
25
+ schema_path: Union[FileSharePath, str],
26
26
  *,
27
27
  options: Optional[dict[str, str]] = None,
28
28
  spark: Optional[SparkSession] = None,
@@ -31,7 +31,7 @@ def read_stream(
31
31
 
32
32
  @overload
33
33
  def read_stream(
34
- src: Union[Path, str],
34
+ src: Union[FileSharePath, str],
35
35
  file_format: str,
36
36
  *,
37
37
  options: Optional[dict[str, str]] = None,
@@ -40,9 +40,9 @@ def read_stream(
40
40
 
41
41
 
42
42
  def read_stream(
43
- src: Union[Path, str],
43
+ src: Union[FileSharePath, str],
44
44
  file_format: str,
45
- schema_path: Optional[Union[Path, str]] = None,
45
+ schema_path: Optional[Union[FileSharePath, str]] = None,
46
46
  hints: Optional[Union[str, List[str]]] = None,
47
47
  schema: Optional[StructType] = None,
48
48
  options: Optional[dict[str, str]] = None,
@@ -60,9 +60,9 @@ def read_stream(
60
60
 
61
61
 
62
62
  def _read_stream(
63
- src: Union[Path, str],
63
+ src: Union[FileSharePath, str],
64
64
  file_format: str,
65
- schema_path: Optional[Union[Path, str]] = None,
65
+ schema_path: Optional[Union[FileSharePath, str]] = None,
66
66
  hints: Optional[Union[str, List[str]]] = None,
67
67
  schema: Optional[StructType] = None,
68
68
  options: Optional[dict[str, str]] = None,
@@ -78,7 +78,7 @@ def _read_stream(
78
78
  else:
79
79
  file_format = "binaryFile" if file_format == "pdf" else file_format
80
80
  if isinstance(src, str):
81
- src = Path(src)
81
+ src = FileSharePath(src)
82
82
  if file_format == "delta":
83
83
  reader = spark.readStream.format("delta")
84
84
  else:
@@ -89,7 +89,8 @@ def _read_stream(
89
89
  else:
90
90
  assert schema_path
91
91
  if isinstance(schema_path, str):
92
- schema_path = Path(schema_path)
92
+ schema_path = FileSharePath(schema_path)
93
+
93
94
  reader.option("cloudFiles.inferColumnTypes", "true")
94
95
  reader.option("cloudFiles.useIncrementalListing", "true")
95
96
  reader.option("cloudFiles.schemaEvolutionMode", "addNewColumns")
@@ -117,7 +118,7 @@ def _read_stream(
117
118
 
118
119
  @overload
119
120
  def read_batch(
120
- src: Union[Path, str],
121
+ src: Union[FileSharePath, str],
121
122
  file_format: str,
122
123
  schema: StructType,
123
124
  options: Optional[dict[str, str]] = None,
@@ -127,7 +128,7 @@ def read_batch(
127
128
 
128
129
  @overload
129
130
  def read_batch(
130
- src: Union[Path, str],
131
+ src: Union[FileSharePath, str],
131
132
  file_format: str,
132
133
  *,
133
134
  options: Optional[dict[str, str]] = None,
@@ -136,7 +137,7 @@ def read_batch(
136
137
 
137
138
 
138
139
  def read_batch(
139
- src: Union[Path, str],
140
+ src: Union[FileSharePath, str],
140
141
  file_format: str,
141
142
  schema: Optional[StructType] = None,
142
143
  options: Optional[dict[str, str]] = None,
@@ -152,7 +153,7 @@ def read_batch(
152
153
 
153
154
 
154
155
  def _read_batch(
155
- src: Union[Path, str],
156
+ src: Union[FileSharePath, str],
156
157
  file_format: str,
157
158
  schema: Optional[StructType] = None,
158
159
  options: Optional[dict[str, str]] = None,
@@ -169,7 +170,7 @@ def _read_batch(
169
170
  path_glob_filter = file_format
170
171
  file_format = "binaryFile" if file_format == "pdf" else file_format
171
172
  if isinstance(src, str):
172
- src = Path(src)
173
+ src = FileSharePath(src)
173
174
  reader = spark.read.format(file_format)
174
175
  reader = reader.option("pathGlobFilter", f"*.{path_glob_filter}")
175
176
  if schema:
@@ -201,7 +202,7 @@ def read(
201
202
  def read(
202
203
  stream: bool,
203
204
  *,
204
- path: Union[Path, str],
205
+ path: Union[FileSharePath, str],
205
206
  file_format: str = "delta",
206
207
  metadata: Optional[bool] = False,
207
208
  spark: Optional[SparkSession] = None,
@@ -212,7 +213,7 @@ def read(
212
213
  def read(
213
214
  stream: bool,
214
215
  *,
215
- path: Union[Path, str],
216
+ path: Union[FileSharePath, str],
216
217
  file_format: str,
217
218
  schema: StructType,
218
219
  options: Optional[dict[str, str]] = None,
@@ -225,9 +226,9 @@ def read(
225
226
  def read(
226
227
  stream: bool,
227
228
  *,
228
- path: Union[Path, str],
229
+ path: Union[FileSharePath, str],
229
230
  file_format: str,
230
- schema_path: Union[Path, str],
231
+ schema_path: Union[FileSharePath, str],
231
232
  options: Optional[dict[str, str]] = None,
232
233
  metadata: Optional[bool] = True,
233
234
  spark: Optional[SparkSession] = None,
@@ -237,9 +238,9 @@ def read(
237
238
  def read(
238
239
  stream: bool,
239
240
  table: Optional[str] = None,
240
- path: Optional[Union[Path, str]] = None,
241
+ path: Optional[Union[FileSharePath, str]] = None,
241
242
  file_format: Optional[str] = None,
242
- schema_path: Optional[Union[Path, str]] = None,
243
+ schema_path: Optional[Union[FileSharePath, str]] = None,
243
244
  schema: Optional[StructType] = None,
244
245
  hints: Optional[Union[str, List[str]]] = None,
245
246
  options: Optional[dict[str, str]] = None,
@@ -2,11 +2,11 @@ from typing import Iterable, Optional, cast
2
2
 
3
3
  import yaml
4
4
 
5
- from fabricks.utils.path import Path
5
+ from fabricks.utils.path import BasePath
6
6
 
7
7
 
8
8
  def read_yaml(
9
- path: Path,
9
+ path: BasePath,
10
10
  root: Optional[str] = None,
11
11
  preferred_file_name: Optional[str] = None,
12
12
  ) -> Iterable[dict]: