deltacat 2.0.0b7__py3-none-any.whl → 2.0.0b10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. deltacat/__init__.py +27 -6
  2. deltacat/api.py +478 -123
  3. deltacat/aws/s3u.py +2 -2
  4. deltacat/benchmarking/conftest.py +1 -1
  5. deltacat/catalog/main/impl.py +12 -6
  6. deltacat/catalog/model/catalog.py +65 -47
  7. deltacat/catalog/model/properties.py +1 -3
  8. deltacat/compute/__init__.py +14 -0
  9. deltacat/compute/converter/constants.py +5 -0
  10. deltacat/compute/converter/converter_session.py +78 -36
  11. deltacat/compute/converter/model/convert_input.py +24 -4
  12. deltacat/compute/converter/model/convert_result.py +61 -0
  13. deltacat/compute/converter/model/converter_session_params.py +52 -10
  14. deltacat/compute/converter/pyiceberg/overrides.py +181 -62
  15. deltacat/compute/converter/steps/convert.py +84 -36
  16. deltacat/compute/converter/steps/dedupe.py +25 -4
  17. deltacat/compute/converter/utils/convert_task_options.py +42 -13
  18. deltacat/compute/converter/utils/iceberg_columns.py +5 -0
  19. deltacat/compute/converter/utils/io.py +82 -11
  20. deltacat/compute/converter/utils/s3u.py +13 -4
  21. deltacat/compute/jobs/__init__.py +0 -0
  22. deltacat/compute/jobs/client.py +404 -0
  23. deltacat/constants.py +4 -4
  24. deltacat/daft/daft_scan.py +7 -3
  25. deltacat/daft/translator.py +126 -0
  26. deltacat/examples/basic_logging.py +5 -3
  27. deltacat/examples/hello_world.py +4 -2
  28. deltacat/examples/indexer/__init__.py +0 -0
  29. deltacat/examples/indexer/aws/__init__.py +0 -0
  30. deltacat/examples/indexer/gcp/__init__.py +0 -0
  31. deltacat/examples/indexer/indexer.py +163 -0
  32. deltacat/examples/indexer/job_runner.py +199 -0
  33. deltacat/io/__init__.py +13 -0
  34. deltacat/io/dataset/__init__.py +0 -0
  35. deltacat/io/dataset/deltacat_dataset.py +91 -0
  36. deltacat/io/datasink/__init__.py +0 -0
  37. deltacat/io/datasink/deltacat_datasink.py +207 -0
  38. deltacat/io/datasource/__init__.py +0 -0
  39. deltacat/io/datasource/deltacat_datasource.py +580 -0
  40. deltacat/io/reader/__init__.py +0 -0
  41. deltacat/io/reader/deltacat_read_api.py +172 -0
  42. deltacat/storage/__init__.py +2 -0
  43. deltacat/storage/model/expression/__init__.py +47 -0
  44. deltacat/storage/model/expression/expression.py +656 -0
  45. deltacat/storage/model/expression/visitor.py +248 -0
  46. deltacat/storage/model/metafile.py +74 -42
  47. deltacat/storage/model/scan/push_down.py +32 -5
  48. deltacat/storage/model/types.py +5 -3
  49. deltacat/storage/rivulet/__init__.py +4 -4
  50. deltacat/tests/_io/reader/__init__.py +0 -0
  51. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  52. deltacat/tests/compute/converter/test_convert_session.py +209 -46
  53. deltacat/tests/local_deltacat_storage/__init__.py +1 -0
  54. deltacat/tests/storage/model/test_expression.py +327 -0
  55. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +2 -1
  56. deltacat/tests/storage/rivulet/test_dataset.py +1 -1
  57. deltacat/tests/storage/rivulet/test_manifest.py +1 -1
  58. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +1 -1
  59. deltacat/tests/test_deltacat_api.py +50 -9
  60. deltacat/types/media.py +141 -43
  61. deltacat/types/tables.py +35 -7
  62. deltacat/utils/daft.py +2 -2
  63. deltacat/utils/filesystem.py +39 -9
  64. deltacat/utils/polars.py +128 -0
  65. deltacat/utils/pyarrow.py +151 -15
  66. deltacat/utils/ray_utils/concurrency.py +1 -1
  67. deltacat/utils/ray_utils/runtime.py +56 -4
  68. deltacat/utils/url.py +1284 -0
  69. {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/METADATA +9 -6
  70. {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/RECORD +73 -48
  71. {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/LICENSE +0 -0
  72. {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/WHEEL +0 -0
  73. {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,248 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Generic, TypeVar, Callable, Optional
3
+ from functools import singledispatchmethod
4
+ import re
5
+
6
+ from deltacat.storage.model.expression import (
7
+ Expression,
8
+ Reference,
9
+ Literal,
10
+ BinaryExpression,
11
+ UnaryExpression,
12
+ In,
13
+ Between,
14
+ Like,
15
+ )
16
+
17
+
18
+ C = TypeVar("C") # Context type
19
+ R = TypeVar("R") # Return type
20
+
21
+
22
+ class ExpressionVisitor(ABC, Generic[C, R]):
23
+ """
24
+ Visitor pattern for deltacat expressions.
25
+
26
+ This base class provides two ways to implement visitors:
27
+ 1. Using a procedure dictionary (_PROCEDURES) - for simple, declarative visitors
28
+ 2. Using specialized visit_xyz methods with snake_case naming - for more control
29
+
30
+ Subclasses need only implement visit_reference and visit_literal, plus either:
31
+ - Define _PROCEDURES dictionary with functions for handling different expression types
32
+ - Implement specific visit_xyz methods (using snake_case) for individual expressions
33
+ """
34
+
35
+ # Default procedure dictionary for subclasses to override
36
+ _PROCEDURES: Dict[str, Callable] = {}
37
+
38
+ def __init__(self):
39
+ """Initialize visitor and validate required methods."""
40
+ # Pre-check for required methods
41
+ if not hasattr(self, "visit_reference") or not callable(
42
+ getattr(self, "visit_reference")
43
+ ):
44
+ raise NotImplementedError("Subclasses must implement visit_reference")
45
+ if not hasattr(self, "visit_literal") or not callable(
46
+ getattr(self, "visit_literal")
47
+ ):
48
+ raise NotImplementedError("Subclasses must implement visit_literal")
49
+ self._setup_default_procedure_handlers()
50
+
51
+ def _to_snake_case(self, name: str) -> str:
52
+ """Convert PascalCase or camelCase to snake_case."""
53
+ pattern = re.compile(r"(?<!^)(?=[A-Z])")
54
+ return pattern.sub("_", name).lower()
55
+
56
+ def _setup_default_procedure_handlers(self):
57
+ """Set up default procedure application methods if not overridden."""
58
+ if not hasattr(self, "_apply_binary") or not callable(
59
+ getattr(self, "_apply_binary")
60
+ ):
61
+ self._apply_binary = lambda proc, left, right: proc(left, right)
62
+ if not hasattr(self, "_apply_unary") or not callable(
63
+ getattr(self, "_apply_unary")
64
+ ):
65
+ self._apply_unary = lambda proc, operand: proc(operand)
66
+ if not hasattr(self, "_apply_in") or not callable(getattr(self, "_apply_in")):
67
+ self._apply_in = lambda proc, value, values: proc(value, values)
68
+ if not hasattr(self, "_apply_between") or not callable(
69
+ getattr(self, "_apply_between")
70
+ ):
71
+ self._apply_between = lambda proc, value, lower, upper: proc(
72
+ value, lower, upper
73
+ )
74
+ if not hasattr(self, "_apply_like") or not callable(
75
+ getattr(self, "_apply_like")
76
+ ):
77
+ self._apply_like = lambda proc, value, pattern: proc(value, pattern)
78
+
79
+ @singledispatchmethod
80
+ def visit(self, expr: Expression, context: Optional[C] = None) -> R:
81
+ """
82
+ Generic visit method that dispatches to specific methods based on expression type.
83
+
84
+ Args:
85
+ expr: The expression to visit
86
+ context: Optional context to pass through the visitor
87
+
88
+ Returns:
89
+ Result of visiting the expression
90
+ """
91
+ expr_type = type(expr).__name__
92
+ raise NotImplementedError(f"No visit method for type {expr_type}")
93
+
94
+ @visit.register
95
+ def _visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
96
+ """Visit a Reference expression."""
97
+ return self.visit_reference(expr, context)
98
+
99
+ @visit.register
100
+ def _visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
101
+ """Visit a Literal expression."""
102
+ return self.visit_literal(expr, context)
103
+
104
+ @visit.register
105
+ def _visit_binary(self, expr: BinaryExpression, context: Optional[C] = None) -> R:
106
+ """Visit a binary expression using method specialization or procedures."""
107
+ expr_type = type(expr).__name__
108
+
109
+ left_result = self.visit(expr.left, context)
110
+ right_result = self.visit(expr.right, context)
111
+
112
+ method_name = f"visit_{self._to_snake_case(expr_type)}"
113
+ if hasattr(self, method_name):
114
+ method = getattr(self, method_name)
115
+ return method(expr, context)
116
+
117
+ if expr_type in self._PROCEDURES:
118
+ return self._apply_binary(
119
+ self._PROCEDURES[expr_type], left_result, right_result
120
+ )
121
+
122
+ try:
123
+ return self.visit_binary_expression(
124
+ expr, left_result, right_result, context
125
+ )
126
+ except NotImplementedError:
127
+ raise NotImplementedError(f"No handler for {expr_type}")
128
+
129
+ @visit.register
130
+ def _visit_unary(self, expr: UnaryExpression, context: Optional[C] = None) -> R:
131
+ """Visit a unary expression using method specialization or procedures."""
132
+ expr_type = type(expr).__name__
133
+
134
+ operand_result = self.visit(expr.operand, context)
135
+
136
+ method_name = f"visit_{self._to_snake_case(expr_type)}"
137
+ if hasattr(self, method_name):
138
+ method = getattr(self, method_name)
139
+ return method(expr, context)
140
+
141
+ if expr_type in self._PROCEDURES:
142
+ return self._apply_unary(self._PROCEDURES[expr_type], operand_result)
143
+
144
+ try:
145
+ return self.visit_unary_expression(expr, operand_result, context)
146
+ except NotImplementedError:
147
+ raise NotImplementedError(f"No handler for {expr_type}")
148
+
149
+ @visit.register
150
+ def _visit_in(self, expr: In, context: Optional[C] = None) -> R:
151
+ """Visit an In expression."""
152
+ if hasattr(self, "visit_in"):
153
+ return self.visit_in(expr, context)
154
+
155
+ if "In" in self._PROCEDURES:
156
+ value_result = self.visit(expr.value, context)
157
+ values_results = [self.visit(v, context) for v in expr.values]
158
+ return self._apply_in(self._PROCEDURES["In"], value_result, values_results)
159
+
160
+ raise NotImplementedError("No handler for In expression")
161
+
162
+ @visit.register
163
+ def _visit_between(self, expr: Between, context: Optional[C] = None) -> R:
164
+ """Visit a Between expression."""
165
+ if hasattr(self, "visit_between"):
166
+ return self.visit_between(expr, context)
167
+
168
+ if "Between" in self._PROCEDURES:
169
+ value_result = self.visit(expr.value, context)
170
+ lower_result = self.visit(expr.lower, context)
171
+ upper_result = self.visit(expr.upper, context)
172
+ return self._apply_between(
173
+ self._PROCEDURES["Between"], value_result, lower_result, upper_result
174
+ )
175
+
176
+ raise NotImplementedError("No handler for Between expression")
177
+
178
+ @visit.register
179
+ def _visit_like(self, expr: Like, context: Optional[C] = None) -> R:
180
+ """Visit a Like expression."""
181
+ if hasattr(self, "visit_like"):
182
+ return self.visit_like(expr, context)
183
+
184
+ if "Like" in self._PROCEDURES:
185
+ value_result = self.visit(expr.value, context)
186
+ pattern_result = self.visit(expr.pattern, context)
187
+ return self._apply_like(
188
+ self._PROCEDURES["Like"], value_result, pattern_result
189
+ )
190
+
191
+ raise NotImplementedError("No handler for Like expression")
192
+
193
+ @abstractmethod
194
+ def visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
195
+ """Visit a Reference expression."""
196
+ pass
197
+
198
+ @abstractmethod
199
+ def visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
200
+ """Visit a Literal expression."""
201
+ pass
202
+
203
+ def visit_binary_expression(
204
+ self, expr: BinaryExpression, left: R, right: R, context: Optional[C] = None
205
+ ) -> R:
206
+ """Default fallback handler for binary expressions."""
207
+ raise NotImplementedError(f"No handler for {type(expr).__name__}")
208
+
209
+ def visit_unary_expression(
210
+ self, expr: UnaryExpression, operand: R, context: Optional[C] = None
211
+ ) -> R:
212
+ """Default fallback handler for unary expressions."""
213
+ raise NotImplementedError(f"No handler for {type(expr).__name__}")
214
+
215
+
216
+ class DisplayVisitor(ExpressionVisitor[Expression, str]):
217
+ """
218
+ Visitor implementation that formats expressions in standard infix notation.
219
+ For example: "a = b AND c > d" instead of "(AND (= a b) (> c d))".
220
+ """
221
+
222
+ # Map all expression types to their string formatting procedures with infix notation
223
+ _PROCEDURES = {
224
+ # Binary operations with infix notation
225
+ "Equal": lambda left, right: f"{left} = {right}",
226
+ "NotEqual": lambda left, right: f"{left} <> {right}",
227
+ "GreaterThan": lambda left, right: f"{left} > {right}",
228
+ "LessThan": lambda left, right: f"{left} < {right}",
229
+ "GreaterThanEqual": lambda left, right: f"{left} >= {right}",
230
+ "LessThanEqual": lambda left, right: f"{left} <= {right}",
231
+ "And": lambda left, right: f"({left} AND {right})",
232
+ "Or": lambda left, right: f"({left} OR {right})",
233
+ # Unary operations
234
+ "Not": lambda operand: f"NOT ({operand})",
235
+ "IsNull": lambda operand: f"({operand}) IS NULL",
236
+ # Special operations
237
+ "In": lambda value, values: f"{value} IN ({', '.join(values)})",
238
+ "Between": lambda value, lower, upper: f"{value} BETWEEN {lower} AND {upper}",
239
+ "Like": lambda value, pattern: f"{value} LIKE {pattern}",
240
+ }
241
+
242
+ def visit_reference(self, expr: Reference, context=None) -> str:
243
+ """Format a field reference."""
244
+ return expr.field
245
+
246
+ def visit_literal(self, expr: Literal, context=None) -> str:
247
+ """Format a literal value using its PyArrow representation."""
248
+ return str(expr.value)
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  import copy
5
5
 
6
- from typing import Optional, Tuple, List
6
+ from typing import Optional, Tuple, List, Union
7
7
 
8
8
  import base64
9
9
  import json
@@ -412,7 +412,7 @@ class Metafile(dict):
412
412
  @staticmethod
413
413
  def based_on(
414
414
  other: Optional[Metafile],
415
- new_id: Optional[Locator] = None,
415
+ new_id: Optional[str] = None,
416
416
  ) -> Optional[Metafile]:
417
417
  """
418
418
  Returns a new metafile equivalent to the input metafile, but with a new
@@ -539,29 +539,31 @@ class Metafile(dict):
539
539
  f"${serialized_dict}"
540
540
  )
541
541
 
542
+ @staticmethod
543
+ def get_type_name(serialized_dict: dict):
544
+ """
545
+ Given a serialized dictionary of Metafile data, gets the type name of
546
+ the metafile class.
547
+ """
548
+ return Metafile.get_class(serialized_dict).__name__
549
+
542
550
  @classmethod
543
- def read(
551
+ def deserialize(
544
552
  cls,
545
- path: str,
546
- filesystem: Optional[pyarrow.fs.FileSystem] = None,
547
- format: Optional[str] = METAFILE_FORMAT,
553
+ serialized: Union[bytes, str],
554
+ meta_format: Optional[str] = METAFILE_FORMAT,
548
555
  ) -> Metafile:
549
556
  """
550
- Read a metadata file and return the deserialized object.
551
- :param path: Metadata file path to read.
552
- :param filesystem: File system to use for reading the metadata file.
553
- :param format: Format to use for deserializing the metadata file.
554
- :return: Deserialized object from the metadata file.
557
+ Deserialize a metadata file from the given bytes or string.
558
+ :param serialized: Serialized metadata file data.
559
+ :param meta_format: Format to use for deserializing the metadata file.
560
+ :return: Deserialized metadata file.
555
561
  """
556
- if format not in SUPPORTED_METAFILE_FORMATS:
562
+ if meta_format not in SUPPORTED_METAFILE_FORMATS:
557
563
  raise ValueError(
558
- f"Unsupported format '{format}'. Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
564
+ f"Unsupported format '{meta_format}'. "
565
+ f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
559
566
  )
560
-
561
- if not filesystem:
562
- path, filesystem = resolve_path_and_filesystem(path, filesystem)
563
- with filesystem.open_input_stream(path) as file:
564
- binary = file.readall()
565
567
  reader = {
566
568
  "json": lambda b: json.loads(
567
569
  b.decode("utf-8"),
@@ -573,12 +575,32 @@ class Metafile(dict):
573
575
  },
574
576
  ),
575
577
  "msgpack": msgpack.loads,
576
- }[format]
577
- data = reader(binary)
578
+ }[meta_format]
579
+ data = reader(serialized)
578
580
  # cast this Metafile into the appropriate child class type
579
581
  clazz = Metafile.get_class(data)
580
- obj = clazz(**data).from_serializable(path, filesystem)
581
- return obj
582
+ return clazz(**data)
583
+
584
+ @classmethod
585
+ def read(
586
+ cls,
587
+ path: str,
588
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
589
+ meta_format: Optional[str] = METAFILE_FORMAT,
590
+ ) -> Metafile:
591
+ """
592
+ Read a metadata file and return the deserialized object.
593
+ :param path: Metadata file path to read.
594
+ :param filesystem: File system to use for reading the metadata file.
595
+ :param meta_format: Format to use for deserializing the metadata file.
596
+ :return: Deserialized object from the metadata file.
597
+ """
598
+ if not filesystem:
599
+ path, filesystem = resolve_path_and_filesystem(path, filesystem)
600
+ with filesystem.open_input_stream(path) as file:
601
+ serialized = file.readall()
602
+ metafile = Metafile.deserialize(serialized, meta_format)
603
+ return metafile.from_serializable(path, filesystem)
582
604
 
583
605
  def write_txn(
584
606
  self,
@@ -616,11 +638,37 @@ class Metafile(dict):
616
638
  filesystem=filesystem,
617
639
  )
618
640
 
641
+ def serialize(
642
+ self,
643
+ meta_format: Optional[str] = METAFILE_FORMAT,
644
+ ) -> Union[bytes, str]:
645
+ """
646
+ Serialize this object to the given metafile format.
647
+ :param meta_format: Format to use for serializing the metadata file.
648
+ :return: Serialized metadata file bytes or string (format dependent).
649
+ """
650
+ if meta_format not in SUPPORTED_METAFILE_FORMATS:
651
+ raise ValueError(
652
+ f"Unsupported format '{meta_format}'. "
653
+ f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
654
+ )
655
+ serializer = {
656
+ "json": lambda data: json.dumps(
657
+ data,
658
+ indent=4,
659
+ default=lambda b: base64.b64encode(b).decode("utf-8")
660
+ if isinstance(b, bytes)
661
+ else b,
662
+ ).encode("utf-8"),
663
+ "msgpack": msgpack.dumps,
664
+ }[meta_format]
665
+ return serializer(self.to_serializable())
666
+
619
667
  def write(
620
668
  self,
621
669
  path: str,
622
670
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
623
- format: Optional[str] = METAFILE_FORMAT,
671
+ meta_format: Optional[str] = METAFILE_FORMAT,
624
672
  ) -> None:
625
673
  """
626
674
  Serialize and write this object to a metadata file.
@@ -628,31 +676,15 @@ class Metafile(dict):
628
676
  :param filesystem: File system to use for writing the metadata file. If
629
677
  not given, a default filesystem will be automatically selected based on
630
678
  the catalog root path.
631
- param: format: Format to use for serializing the metadata file.
679
+ :param meta_format: Format to use for serializing the metadata file.
632
680
  """
633
- if format not in SUPPORTED_METAFILE_FORMATS:
634
- raise ValueError(
635
- f"Unsupported format '{format}'. Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
636
- )
637
-
681
+ serialized = self.serialize(meta_format)
638
682
  if not filesystem:
639
683
  path, filesystem = resolve_path_and_filesystem(path, filesystem)
640
684
  revision_dir_path = posixpath.dirname(path)
641
685
  filesystem.create_dir(revision_dir_path, recursive=True)
642
-
643
- writer = {
644
- "json": lambda data: json.dumps(
645
- data,
646
- indent=4,
647
- default=lambda b: base64.b64encode(b).decode("utf-8")
648
- if isinstance(b, bytes)
649
- else b,
650
- ).encode("utf-8"),
651
- "msgpack": msgpack.dumps,
652
- }[format]
653
-
654
686
  with filesystem.open_output_stream(path) as file:
655
- file.write(writer(self.to_serializable()))
687
+ file.write(serialized)
656
688
 
657
689
  def equivalent_to(self, other: Metafile) -> bool:
658
690
  """
@@ -1,3 +1,10 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from deltacat.storage.model.expression import Expression
6
+
7
+
1
8
  class RowFilter:
2
9
  ...
3
10
 
@@ -6,14 +13,34 @@ class ColumnFilter:
6
13
  ...
7
14
 
8
15
 
16
+ @dataclass
9
17
  class PartitionFilter:
10
- ...
18
+ expr: Expression
19
+
20
+ @staticmethod
21
+ def of(expr: Expression) -> PartitionFilter:
22
+ return PartitionFilter(expr)
11
23
 
12
24
 
25
+ @dataclass
13
26
  class Pushdown:
14
27
  """Represents pushdown predicates to be applied for DeltaCAT Tables"""
15
28
 
16
- row_filter: RowFilter
17
- column_filter: ColumnFilter
18
- partition_filter: PartitionFilter
19
- limit: int
29
+ row_filter: Optional[RowFilter]
30
+ column_filter: Optional[ColumnFilter]
31
+ partition_filter: Optional[PartitionFilter]
32
+ limit: Optional[int]
33
+
34
+ @staticmethod
35
+ def of(
36
+ row_filter: Optional[RowFilter],
37
+ column_filter: Optional[ColumnFilter],
38
+ partition_filter: Optional[PartitionFilter],
39
+ limit: Optional[int],
40
+ ) -> Pushdown:
41
+ return Pushdown(
42
+ row_filter=row_filter,
43
+ column_filter=column_filter,
44
+ partition_filter=partition_filter,
45
+ limit=limit,
46
+ )
@@ -6,7 +6,7 @@ from typing import List, Union
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  import pyarrow as pa
9
- from ray.data.dataset import Dataset
9
+ from ray.data.dataset import Dataset as RayDataset
10
10
  from daft import DataFrame as DaftDataFrame
11
11
 
12
12
 
@@ -16,13 +16,15 @@ LocalTable = Union[
16
16
  np.ndarray,
17
17
  pa.parquet.ParquetFile,
18
18
  ]
19
- LocalDataset = List[LocalTable]
20
- DistributedDataset = Union[Dataset, DaftDataFrame]
19
+ LocalDataset = Union[LocalTable, List[LocalTable]]
20
+ DistributedDataset = Union[RayDataset, DaftDataFrame]
21
+ Dataset = Union[LocalDataset, DistributedDataset]
21
22
 
22
23
 
23
24
  class StreamFormat(str, Enum):
24
25
  DELTACAT = "deltacat"
25
26
  ICEBERG = "iceberg"
27
+ HIVE = "hive"
26
28
  HUDI = "hudi"
27
29
  DELTA_LAKE = "delta_lake"
28
30
  SQLITE3 = "SQLITE3" # used by tests
@@ -1,7 +1,7 @@
1
- from .schema.schema import Schema
2
- from .schema.schema import Field
3
- from .dataset import Dataset
4
- from .schema.schema import Datatype
1
+ from deltacat.storage.rivulet.schema.schema import Schema
2
+ from deltacat.storage.rivulet.schema.schema import Field
3
+ from deltacat.storage.rivulet.dataset import Dataset
4
+ from deltacat.storage.rivulet.schema.schema import Datatype
5
5
 
6
6
  __all__ = [
7
7
  "Schema",
File without changes
File without changes