deltacat 1.1.20__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.20"
47
+ __version__ = "1.1.22"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -365,7 +365,6 @@ def _run_hash_and_merge(
365
365
  mutable_compaction_audit.set_telemetry_time_in_seconds(
366
366
  telemetry_this_round + previous_telemetry
367
367
  )
368
- params.object_store.clear()
369
368
 
370
369
  return merge_results
371
370
 
File without changes
@@ -0,0 +1,98 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
7
+ from deltacat.compute.stats.models.stats_result import StatsResult
8
+ from deltacat.compute.stats.types import StatsType
9
+
10
+
11
+ class DeltaColumnStats(dict):
12
+ """
13
+ Stats container for an individual column of a Delta.
14
+ Provides distinct stats results for each manifest entry of the Delta.
15
+
16
+ Example:
17
+ Manifest Entry 1
18
+ =======
19
+ foo bar baz
20
+ A B C
21
+ D E F
22
+
23
+ Manifest Entry 2
24
+ =======
25
+ foo bar baz
26
+ G H I
27
+ J K L
28
+
29
+ DeltaColumnStats("foo",
30
+ ManifestEntryStats([
31
+ StatsResult([A, D]), # Manifest Entry 1
32
+ StatsResult([G, J]), # Manifest Entry 2
33
+ ]))
34
+ DeltaColumnStats("bar",
35
+ ManifestEntryStats([
36
+ StatsResult([B, E]), # Manifest Entry 1
37
+ StatsResult([H, K]), # Manifest Entry 2
38
+ ]))
39
+ DeltaColumnStats("baz",
40
+ ManifestEntryStats([
41
+ StatsResult([C, F]), # Manifest Entry 1
42
+ StatsResult([I, L]), # Manifest Entry 2
43
+ ]))
44
+ """
45
+
46
+ @staticmethod
47
+ def of(column: str, manifest_stats: ManifestEntryStats) -> DeltaColumnStats:
48
+ """
49
+ Creates a container of a column name and the column stats for one or more manifest entries.
50
+ """
51
+ dcs = DeltaColumnStats()
52
+ dcs["column"] = column
53
+ dcs["manifestStats"] = manifest_stats
54
+
55
+ if manifest_stats:
56
+ # Omit row count for columnar-centric stats
57
+ dcs["stats"] = dcs._merge_manifest_stats()
58
+
59
+ return dcs
60
+
61
+ @staticmethod
62
+ def build_from_dict(delta_column_stats: List[str, Any]) -> List[DeltaColumnStats]:
63
+ return DeltaColumnStats.of(
64
+ delta_column_stats["column"],
65
+ ManifestEntryStats.build_from_dict(delta_column_stats["manifestStats"]),
66
+ )
67
+
68
+ @property
69
+ def column(self) -> str:
70
+ """Returns the column name."""
71
+ return self.get("column")
72
+
73
+ @property
74
+ def manifest_stats(self) -> Optional[ManifestEntryStats]:
75
+ """Returns a container that represents stats at the manifest level.
76
+
77
+ A container holds a list of computed stats for each manifest entry.
78
+ """
79
+ val: Dict[str, Any] = self.get("manifestStats")
80
+ if val is not None and not isinstance(val, ManifestEntryStats):
81
+ self["manifestStats"] = val = ManifestEntryStats(val)
82
+ return val
83
+
84
+ @property
85
+ def stats(self) -> Optional[StatsResult]:
86
+ """Combines the numerical stats for every manifest entry and returns it."""
87
+ val: Dict[str, Any] = self.get("stats")
88
+ if val is not None and not isinstance(val, StatsResult):
89
+ self["stats"] = val = StatsResult(val)
90
+ elif val is None and self.manifest_stats:
91
+ self["stats"] = val = self._merge_manifest_stats()
92
+
93
+ return val
94
+
95
+ def _merge_manifest_stats(self) -> StatsResult:
96
+ return StatsResult.merge(
97
+ self.manifest_stats.stats, {StatsType.PYARROW_TABLE_BYTES}
98
+ )
@@ -0,0 +1,233 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from collections import defaultdict
5
+ from typing import Any, Dict, List, NamedTuple, Optional, Set
6
+
7
+ from deltacat.compute.stats.models.delta_column_stats import DeltaColumnStats
8
+ from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
9
+ from deltacat.compute.stats.models.stats_result import StatsResult
10
+ from deltacat.compute.stats.types import StatsType
11
+ from deltacat.storage import DeltaLocator
12
+
13
+
14
+ class DeltaStats(dict):
15
+ """
16
+ Stats container for all columns of a delta.
17
+
18
+ Provides distinct stats for each delta manifest entry, aggregate stats across all manifest entries,
19
+ and a DeltaColumnStats reference for each column.
20
+
21
+ Each DeltaColumnStats has a column name and a ManifestEntryStats object,
22
+ which contains column-level stats for each delta manifest entry.
23
+
24
+ Example of visual representation:
25
+ Manifest Entry 1
26
+ =======
27
+ foo bar baz
28
+ A B C
29
+ D E F
30
+
31
+ Manifest Entry 2
32
+ =======
33
+ foo bar baz
34
+ G H I
35
+ J K L
36
+
37
+ DeltaStats([
38
+ DeltaColumnStats("foo",
39
+ ManifestEntryStats([
40
+ StatsResult([A, D]), # Manifest Entry 1
41
+ StatsResult([G, J]), # Manifest Entry 2
42
+ ]))
43
+ DeltaColumnStats("bar",
44
+ ManifestEntryStats([
45
+ StatsResult([B, E]), # Manifest Entry 1
46
+ StatsResult([H, K]), # Manifest Entry 2
47
+ ]))
48
+ DeltaColumnStats("baz",
49
+ ManifestEntryStats([
50
+ StatsResult([C, F]), # Manifest Entry 1
51
+ StatsResult([I, L]), # Manifest Entry 2
52
+ ]))
53
+ ], Stats(AllDeltaColumnStats))
54
+ """
55
+
56
+ @staticmethod
57
+ def of(column_stats: List[DeltaColumnStats]) -> DeltaStats:
58
+ ds = DeltaStats()
59
+ ds["column_stats"] = column_stats
60
+ ds["stats"] = DeltaStats.get_delta_stats(column_stats)
61
+ return ds
62
+
63
+ @staticmethod
64
+ def build_from_dict(delta_stats: dict) -> DeltaStats:
65
+ delta_column_stats_list = []
66
+ for dcs in delta_stats["column_stats"]:
67
+ delta_column_stats_list.append(DeltaColumnStats.build_from_dict(dcs))
68
+ return DeltaStats.of(delta_column_stats_list)
69
+
70
+ @property
71
+ def column_stats(self) -> List[DeltaColumnStats]:
72
+ """
73
+ Returns a list of stats associated to each column in this delta.
74
+ """
75
+ return self["column_stats"]
76
+
77
+ @property
78
+ def stats(self) -> Optional[StatsResult]:
79
+ """Returns a StatsResult object that represents this delta, aggregated by the column stats of this delta."""
80
+ val: Dict[str, Any] = self.get("stats")
81
+ if val is not None and not isinstance(val, StatsResult):
82
+ self["stats"] = val = StatsResult(val)
83
+ elif val is None and self.column_stats:
84
+ self["stats"] = val = DeltaStats.get_delta_stats(self.column_stats)
85
+
86
+ return val
87
+
88
+ @property
89
+ def columns(self) -> List[str]:
90
+ """Returns a list of column names associated to this delta.
91
+
92
+ Returns:
93
+ A list of column names
94
+ """
95
+ return DeltaStats.get_column_names(self.column_stats)
96
+
97
+ def manifest_entry_stats(self, manifest_entry_idx: int) -> StatsResult:
98
+ """Calculate the stats of a manifest entry by combining its columnar stats.
99
+
100
+ Args:
101
+ manifest_entry_idx: The manifest entry table to calculate stats for
102
+
103
+ Returns:
104
+ Stats for the manifest entry.
105
+ """
106
+ return StatsResult.merge(
107
+ DeltaStats.get_manifest_entry_column_stats(
108
+ self.column_stats, manifest_entry_idx
109
+ ),
110
+ record_row_count_once=True,
111
+ )
112
+
113
+ def manifest_entry_column_stats(self, manifest_entry_idx: int) -> List[StatsResult]:
114
+ """Fetch a list of stats for each column in a manifest entry.
115
+
116
+ Args:
117
+ manifest_entry_idx: The manifest entry table to calculate stats for
118
+
119
+ Returns:
120
+ A list of columnar stats for the manifest entry
121
+ """
122
+ return DeltaStats.get_manifest_entry_column_stats(
123
+ self.column_stats, manifest_entry_idx
124
+ )
125
+
126
+ @staticmethod
127
+ def get_manifest_entry_column_stats(
128
+ columns: List[DeltaColumnStats], manifest_entry_idx: int
129
+ ) -> List[StatsResult]:
130
+ """Helper method to provide a list of columnar stats for a specific manifest entry.
131
+
132
+ Returns:
133
+ A list of columnar stats for the manifest entry
134
+ """
135
+ dataset_columnar_stats_list: List[ManifestEntryStats] = [
136
+ column.manifest_stats
137
+ for column in columns
138
+ if column.manifest_stats is not None
139
+ ]
140
+ try:
141
+ return [
142
+ stats.stats[manifest_entry_idx] for stats in dataset_columnar_stats_list
143
+ ]
144
+ except IndexError:
145
+ sci: ManifestEntryStats = dataset_columnar_stats_list[0]
146
+ raise ValueError(
147
+ f"Table index {manifest_entry_idx} is not present in this dataset of {sci.delta_locator} "
148
+ f"with manifest table count of {len(sci.stats)}"
149
+ )
150
+
151
+ @staticmethod
152
+ def get_column_names(columns: List[DeltaColumnStats]) -> List[str]:
153
+ """Helper method to get the names of each column from a list of delta column stats
154
+
155
+ Args:
156
+ columns: A list of delta column stats
157
+
158
+ Returns:
159
+ A list of column names
160
+ """
161
+ return [column_stats.column for column_stats in columns] if columns else []
162
+
163
+ @staticmethod
164
+ def get_delta_stats(
165
+ columns: List[DeltaColumnStats], stat_types: Optional[Set[StatsType]] = None
166
+ ) -> Optional[StatsResult]:
167
+ """Calculate the sum of provided column stats and return it
168
+
169
+ Args:
170
+ columns: A list of delta column stats
171
+
172
+ Returns:
173
+ Stats for the calculated sum
174
+ """
175
+ assert columns and len(columns) > 0, (
176
+ f"Expected columns `{columns}` of type `{type(columns)}` "
177
+ f"to be a non-empty list of DeltaColumnStats"
178
+ )
179
+
180
+ assert all(
181
+ [col.manifest_stats for col in columns]
182
+ ), f"Expected stats completion info to be present in each item of {columns} "
183
+
184
+ manifest_entry_count = len(columns[0].manifest_stats.stats)
185
+ column_stats_map: Dict[str, List[Optional[StatsResult]]] = defaultdict(
186
+ lambda: [None] * manifest_entry_count
187
+ )
188
+
189
+ for column_stats in columns:
190
+ for file_idx, entry_stats in enumerate(column_stats.manifest_stats.stats):
191
+ column_stats_map[column_stats.column][file_idx] = entry_stats
192
+
193
+ return DeltaStats._merge_stats_from_columns_to_dataset(
194
+ DeltaStats.get_column_names(columns),
195
+ column_stats_map,
196
+ manifest_entry_count,
197
+ stat_types,
198
+ )
199
+
200
+ @staticmethod
201
+ def _merge_stats_from_columns_to_dataset(
202
+ column_names: List[str],
203
+ column_stats: Dict[str, List[Optional[StatsResult]]],
204
+ manifest_entries_size: int,
205
+ stat_types: Optional[Set[StatsType]] = None,
206
+ ) -> StatsResult:
207
+ manifest_entry_stats_summary_list: List[StatsResult] = []
208
+ for manifest_entry_idx in range(manifest_entries_size):
209
+ curr_manifest_entry_column_stats_list: List[StatsResult] = []
210
+ for column_name in column_names:
211
+ current_table_column_stats: StatsResult = column_stats[column_name][
212
+ manifest_entry_idx
213
+ ]
214
+ curr_manifest_entry_column_stats_list.append(current_table_column_stats)
215
+
216
+ curr_manifest_entry_stats_summary = StatsResult.merge(
217
+ curr_manifest_entry_column_stats_list,
218
+ stat_types,
219
+ record_row_count_once=True,
220
+ )
221
+ manifest_entry_stats_summary_list.append(curr_manifest_entry_stats_summary)
222
+ return StatsResult.merge(manifest_entry_stats_summary_list, stat_types)
223
+
224
+
225
+ class DeltaStatsCacheMiss(NamedTuple):
226
+ """A helper class for cache miss results from DeltaStatsCacheResult.
227
+
228
+ `column_names` represents missing dataset column names from the file system (ex: S3).
229
+ delta_locator` is tied to the missing dataset columns and provided for future calculations.
230
+ """
231
+
232
+ column_names: List[str]
233
+ delta_locator: DeltaLocator
@@ -0,0 +1,49 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional
5
+
6
+ from deltacat.compute.stats.models.delta_stats import DeltaStats, DeltaStatsCacheMiss
7
+
8
+
9
+ class DeltaStatsCacheResult(dict):
10
+ """A helper class containing the results from a cache query.
11
+
12
+ Stats are fetched and cached at the column level, and each column may represent one
13
+ or more manifest entries.
14
+ """
15
+
16
+ @staticmethod
17
+ def of(
18
+ hits: Optional[DeltaStats], misses: Optional[DeltaStatsCacheMiss]
19
+ ) -> DeltaStatsCacheResult:
20
+ cds = DeltaStatsCacheResult()
21
+ cds["hits"] = hits
22
+ cds["misses"] = misses
23
+ return cds
24
+
25
+ @property
26
+ def hits(self) -> Optional[DeltaStats]:
27
+ """Retrieve stats that were found in the cache
28
+
29
+ `hits` represents a DeltaStats object that contains dataset-wide statistics across
30
+ many of its tables (or manifest entries) and is composed of one or more column-wide
31
+ DeltaColumnStats.
32
+
33
+ Returns:
34
+ A delta wide stats container
35
+ """
36
+ return self["hits"]
37
+
38
+ @property
39
+ def misses(self) -> Optional[DeltaStatsCacheMiss]:
40
+ """Retrieve stats that were missing from the cache
41
+
42
+ `misses` represents a DeltaStatsCacheMiss object that contains a list of
43
+ column names that were not found in the file system (ex: S3) and a `delta_locator`
44
+ as a reference to the delta metadata tied to the missing dataset columns.
45
+
46
+ Returns:
47
+ A tuple with metadata regarding the cache miss
48
+ """
49
+ return self["misses"]
@@ -0,0 +1,72 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Dict, List
5
+
6
+ import pyarrow as pa
7
+
8
+ from deltacat.compute.stats.models.stats_result import StatsResult
9
+ from deltacat.storage import DeltaLocator
10
+
11
+
12
+ class ManifestEntryStats(dict):
13
+ """Holds computed statistics for one or more manifest entries (tables) and their corresponding delta locator.
14
+
15
+ To be stored/retrieved from a file system (ex: S3).
16
+ """
17
+
18
+ @staticmethod
19
+ def of(
20
+ manifest_entries_stats: List[StatsResult], delta_locator: DeltaLocator
21
+ ) -> ManifestEntryStats:
22
+ """
23
+ Creates a stats container that represents a particular manifest.
24
+
25
+ `manifest_entries_stats` are a list of distinct stats for each manifest entry file
26
+ tied to this manifest. `delta_locator` is provided as a reference to the delta where the
27
+ manifest entries reside.
28
+ """
29
+
30
+ mes = ManifestEntryStats()
31
+ mes["deltaLocator"] = delta_locator
32
+ mes["stats"] = manifest_entries_stats
33
+ mes["pyarrowVersion"] = pa.__version__
34
+ return mes
35
+
36
+ @staticmethod
37
+ def build_from_dict(manifest_entries_stats: dict) -> ManifestEntryStats:
38
+ stats_res_list = []
39
+ for stats_res in manifest_entries_stats["stats"]:
40
+ stats_res_list.append(
41
+ StatsResult.of(stats_res["rowCount"], stats_res["pyarrowTableBytes"])
42
+ )
43
+ return ManifestEntryStats.of(
44
+ stats_res_list, manifest_entries_stats["deltaLocator"]
45
+ )
46
+
47
+ @property
48
+ def delta_locator(self) -> DeltaLocator:
49
+ """Reference to the delta that holds the manifest entries
50
+
51
+ Returns:
52
+ A delta locator object
53
+ """
54
+ val: Dict[str, Any] = self.get("deltaLocator")
55
+ if val is not None and not isinstance(val, DeltaLocator):
56
+ self["deltaLocator"] = val = DeltaLocator(val)
57
+ return val
58
+
59
+ @property
60
+ def stats(self) -> List[StatsResult]:
61
+ """
62
+ Returns a list of distinct stats for each manifest entry file.
63
+ """
64
+ val = self["stats"]
65
+ return [StatsResult(_) for _ in val] if val else []
66
+
67
+ @property
68
+ def pyarrow_version(self) -> str:
69
+ """
70
+ Read-only property which returns the PyArrow version number as it was written into a file system.
71
+ """
72
+ return self.get("pyarrowVersion")
@@ -0,0 +1,104 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from collections import defaultdict
5
+ from typing import Any, Dict, List, Optional, Set
6
+
7
+ from deltacat.compute.stats.types import ALL_STATS_TYPES, StatsType
8
+
9
+
10
+ class StatsResult(dict):
11
+ """A generic container that holds stats for a single manifest entry file."""
12
+
13
+ @staticmethod
14
+ def of(
15
+ row_count: Optional[int] = 0, pyarrow_table_bytes: Optional[int] = 0
16
+ ) -> StatsResult:
17
+ """Static factory for building a stats result object
18
+
19
+ Args:
20
+ row_count: The total number of rows of a manifest entry
21
+ pyarrow_table_bytes: The total number of bytes when loaded into memory as a PyArrow Table
22
+
23
+ Returns:
24
+ A stats result object
25
+ """
26
+ sr = StatsResult()
27
+ sr[StatsType.ROW_COUNT.value] = row_count
28
+ sr[StatsType.PYARROW_TABLE_BYTES.value] = pyarrow_table_bytes
29
+ return sr
30
+
31
+ @property
32
+ def row_count(self) -> int:
33
+ """Represents the row count of a manifest entry file.
34
+
35
+ Returns:
36
+ The total number of rows of a manifest entry
37
+ """
38
+ return self[StatsType.ROW_COUNT.value]
39
+
40
+ @property
41
+ def pyarrow_table_bytes(self) -> int:
42
+ """Represents the size of a manifest entry file (in bytes) as it was loaded into a PyArrow table.
43
+
44
+ Returns:
45
+ The total number of bytes when loaded into memory as a PyArrow Table
46
+ """
47
+ return self[StatsType.PYARROW_TABLE_BYTES.value]
48
+
49
+ @staticmethod
50
+ def from_stats_types(stats_types: Dict[StatsType, Any]) -> StatsResult:
51
+ """A helper method to filter a dictionary by supported stats and returns a stats result object.
52
+
53
+ Args:
54
+ stats_types: Stats that should be included for constructing a stats result
55
+
56
+ Returns:
57
+ A stats result object
58
+ """
59
+ return StatsResult(
60
+ {
61
+ k: v
62
+ for k, v in stats_types.items()
63
+ if k in [StatsType.ROW_COUNT, StatsType.PYARROW_TABLE_BYTES]
64
+ }
65
+ )
66
+
67
+ @staticmethod
68
+ def merge(
69
+ stats_list: List[StatsResult],
70
+ stat_types: Optional[Set[StatsType]] = None,
71
+ record_row_count_once: bool = False,
72
+ ) -> StatsResult:
73
+ """Helper method to merge any list of StatsResult objects into one.
74
+
75
+ StatsResult objects are merged by adding up their numerical stats.
76
+ TODO (ricmiyam): Handle non-numerical stats when they are added
77
+
78
+ Args:
79
+ stat_types: If provided, the calculation will only include the requested stats.
80
+ record_row_count_once: If optionally set to `True`, then row counts are only added
81
+ from the first stats entry. One use case for this is merging table-centric stats
82
+ by columns, since the row count is expected to be the same across different columns.
83
+
84
+ Returns:
85
+ A stats result object
86
+ """
87
+ assert isinstance(stats_list, list) and len(stats_list) > 0, (
88
+ f"Expected stats list: {stats_list} of type {type(stats_list)} to be a "
89
+ f"non-empty list of StatsResult objects."
90
+ )
91
+
92
+ # Fallback to all stat types if not provided
93
+ stats_to_collect: Set = stat_types or ALL_STATS_TYPES
94
+
95
+ merged_stats: Dict[StatsType, int] = defaultdict(int)
96
+ for stats_result in stats_list:
97
+ for stat_type in stats_to_collect:
98
+ if stats_result:
99
+ merged_stats[stat_type.value] += stats_result[stat_type.value]
100
+
101
+ if record_row_count_once and StatsType.ROW_COUNT in stats_to_collect:
102
+ merged_stats[StatsType.ROW_COUNT.value] = stats_list[0].row_count
103
+
104
+ return StatsResult.from_stats_types(merged_stats)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.20
3
+ Version: 1.1.22
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=zFGIgjOwr8hWxHVCYMBLdLZVeRScD36BMr8lXcBQj1o,1778
1
+ deltacat/__init__.py,sha256=DA3ai-LdX6TopktWC4tQPRs9GXGxAjSkEz-TeJbnWdE,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -66,7 +66,7 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
66
66
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
67
67
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
68
  deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=u8SwKc5JTcTHu1TjHQdEw366mwRUU8cThyJp7D1wZrg,30448
69
+ deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=QKGekJQWL_S1DifnENSQ7PQm5k7x27CoDT0m4QQWBIk,30416
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
72
  deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
@@ -91,6 +91,12 @@ deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXC
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
92
92
  deltacat/compute/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
93
  deltacat/compute/stats/types.py,sha256=cp0lT8nITTKbnkc03OysRjXfcfXzQml9a4wqCnR6kqs,215
94
+ deltacat/compute/stats/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
+ deltacat/compute/stats/models/delta_column_stats.py,sha256=-wXjB2c0BC1RDheumjL_j5-DfRNql4WsK9GpMFQI1cg,3300
96
+ deltacat/compute/stats/models/delta_stats.py,sha256=hBith8_hbF9TVr6HocLAt6RJ_kZZKO4zrGP8VOP05vA,8556
97
+ deltacat/compute/stats/models/delta_stats_cache_result.py,sha256=mbJYxpZd5jaER_BWrCD2hROFy3p1nNdBrj66nUpc6io,1624
98
+ deltacat/compute/stats/models/manifest_entry_stats.py,sha256=NCDAe2nPDEI4kOkuwNkRFgGPS-rqQaQqLuaLoKk20KQ,2419
99
+ deltacat/compute/stats/models/stats_result.py,sha256=XQAlmzhUqRmg4jzEMUAOqcYn1HUOBTMryBH1CCVlet8,3820
94
100
  deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
101
  deltacat/io/dataset.py,sha256=pFU5UfK-fD9C4fIeffJtrA6yVQSgAx2UPbxzQ4GMFL8,3203
96
102
  deltacat/io/file_object_store.py,sha256=HCFeXu9cWXPXVk54MHel_nw3-wIuzhMt2RI6jKzjRYM,1346
@@ -204,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
204
210
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
205
211
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
206
212
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
207
- deltacat-1.1.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
208
- deltacat-1.1.20.dist-info/METADATA,sha256=AkZB0iLFMFCTtfb7RxGTTK0Dl5TyIp5WvKHCAfWJ3Ok,1733
209
- deltacat-1.1.20.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
210
- deltacat-1.1.20.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
211
- deltacat-1.1.20.dist-info/RECORD,,
213
+ deltacat-1.1.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
+ deltacat-1.1.22.dist-info/METADATA,sha256=uaAF2wK6KSi4pooJGoqJERmH_zBIz7WZgUgORbQCnaA,1733
215
+ deltacat-1.1.22.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
+ deltacat-1.1.22.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
+ deltacat-1.1.22.dist-info/RECORD,,