deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deltacat/__init__.py +41 -15
  2. deltacat/aws/clients.py +12 -31
  3. deltacat/aws/constants.py +1 -1
  4. deltacat/aws/redshift/__init__.py +7 -2
  5. deltacat/aws/redshift/model/manifest.py +54 -50
  6. deltacat/aws/s3u.py +176 -187
  7. deltacat/catalog/delegate.py +151 -185
  8. deltacat/catalog/interface.py +78 -97
  9. deltacat/catalog/model/catalog.py +21 -21
  10. deltacat/catalog/model/table_definition.py +11 -9
  11. deltacat/compute/compactor/__init__.py +12 -16
  12. deltacat/compute/compactor/compaction_session.py +237 -166
  13. deltacat/compute/compactor/model/delta_annotated.py +60 -44
  14. deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
  15. deltacat/compute/compactor/model/delta_file_locator.py +10 -8
  16. deltacat/compute/compactor/model/materialize_result.py +6 -7
  17. deltacat/compute/compactor/model/primary_key_index.py +38 -34
  18. deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
  19. deltacat/compute/compactor/model/round_completion_info.py +25 -19
  20. deltacat/compute/compactor/model/sort_key.py +18 -15
  21. deltacat/compute/compactor/steps/dedupe.py +119 -94
  22. deltacat/compute/compactor/steps/hash_bucket.py +48 -47
  23. deltacat/compute/compactor/steps/materialize.py +86 -92
  24. deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
  25. deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
  26. deltacat/compute/compactor/utils/io.py +59 -47
  27. deltacat/compute/compactor/utils/primary_key_index.py +91 -80
  28. deltacat/compute/compactor/utils/round_completion_file.py +22 -23
  29. deltacat/compute/compactor/utils/system_columns.py +33 -45
  30. deltacat/compute/metastats/meta_stats.py +235 -157
  31. deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
  32. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
  33. deltacat/compute/metastats/stats.py +95 -64
  34. deltacat/compute/metastats/utils/io.py +100 -53
  35. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
  36. deltacat/compute/metastats/utils/ray_utils.py +38 -33
  37. deltacat/compute/stats/basic.py +107 -69
  38. deltacat/compute/stats/models/delta_column_stats.py +11 -8
  39. deltacat/compute/stats/models/delta_stats.py +59 -32
  40. deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
  41. deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
  42. deltacat/compute/stats/models/stats_result.py +24 -14
  43. deltacat/compute/stats/utils/intervals.py +16 -9
  44. deltacat/compute/stats/utils/io.py +86 -51
  45. deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
  46. deltacat/constants.py +4 -13
  47. deltacat/io/__init__.py +2 -2
  48. deltacat/io/aws/redshift/redshift_datasource.py +157 -143
  49. deltacat/io/dataset.py +14 -17
  50. deltacat/io/read_api.py +36 -33
  51. deltacat/logs.py +94 -42
  52. deltacat/storage/__init__.py +18 -8
  53. deltacat/storage/interface.py +196 -213
  54. deltacat/storage/model/delta.py +45 -51
  55. deltacat/storage/model/list_result.py +12 -8
  56. deltacat/storage/model/namespace.py +4 -5
  57. deltacat/storage/model/partition.py +42 -42
  58. deltacat/storage/model/stream.py +29 -30
  59. deltacat/storage/model/table.py +14 -14
  60. deltacat/storage/model/table_version.py +32 -31
  61. deltacat/storage/model/types.py +1 -0
  62. deltacat/tests/stats/test_intervals.py +11 -24
  63. deltacat/tests/utils/__init__.py +0 -0
  64. deltacat/tests/utils/test_record_batch_tables.py +284 -0
  65. deltacat/types/media.py +3 -4
  66. deltacat/types/tables.py +31 -21
  67. deltacat/utils/common.py +5 -11
  68. deltacat/utils/numpy.py +20 -22
  69. deltacat/utils/pandas.py +73 -100
  70. deltacat/utils/performance.py +3 -9
  71. deltacat/utils/placement.py +259 -230
  72. deltacat/utils/pyarrow.py +302 -89
  73. deltacat/utils/ray_utils/collections.py +2 -1
  74. deltacat/utils/ray_utils/concurrency.py +27 -28
  75. deltacat/utils/ray_utils/dataset.py +28 -28
  76. deltacat/utils/ray_utils/performance.py +5 -9
  77. deltacat/utils/ray_utils/runtime.py +9 -10
  78. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
  79. deltacat-0.1.12.dist-info/RECORD +110 -0
  80. deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
  81. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
  82. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
  83. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
@@ -1,31 +1,31 @@
1
1
  import logging
2
+ from collections import defaultdict
3
+ from typing import Dict, List, Optional
2
4
 
3
5
  import pyarrow
4
6
  import ray
5
- from collections import defaultdict
6
7
 
7
- from deltacat.compute.stats.utils.manifest_stats_file import read_manifest_stats_by_columns, write_manifest_stats_file
8
- from deltacat.compute.stats.models.delta_stats_cache_result import DeltaStatsCacheResult
9
- from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
8
+ from deltacat import LocalTable, TableType, logs
10
9
  from deltacat.compute.stats.models.delta_column_stats import DeltaColumnStats
11
10
  from deltacat.compute.stats.models.delta_stats import DeltaStats, DeltaStatsCacheMiss
12
-
11
+ from deltacat.compute.stats.models.delta_stats_cache_result import DeltaStatsCacheResult
12
+ from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
13
13
  from deltacat.compute.stats.models.stats_result import StatsResult
14
14
  from deltacat.compute.stats.utils.intervals import DeltaRange
15
- from deltacat.storage import PartitionLocator, Delta, DeltaLocator
16
- from deltacat import logs, LocalTable, TableType
15
+ from deltacat.compute.stats.utils.manifest_stats_file import (
16
+ read_manifest_stats_by_columns,
17
+ write_manifest_stats_file,
18
+ )
19
+ from deltacat.storage import Delta, DeltaLocator, PartitionLocator
17
20
  from deltacat.storage import interface as unimplemented_deltacat_storage
18
- from deltacat.compute.compactor import DeltaAnnotated
19
-
20
- from typing import Dict, List, Optional, Any
21
21
 
22
22
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
23
23
 
24
24
 
25
25
  @ray.remote
26
- def read_cached_delta_stats(delta: Delta,
27
- columns_to_fetch: List[str],
28
- stat_results_s3_bucket: str):
26
+ def read_cached_delta_stats(
27
+ delta: Delta, columns_to_fetch: List[str], stat_results_s3_bucket: str
28
+ ):
29
29
  """Read delta stats that are cached in S3
30
30
 
31
31
  This Ray distributed task reads delta stats from a file system (i.e. S3) based on specified columns.
@@ -39,8 +39,11 @@ def read_cached_delta_stats(delta: Delta,
39
39
  """
40
40
 
41
41
  delta_locator = DeltaLocator.of(delta.partition_locator, delta.stream_position)
42
- column_stats_completion_info: List[DeltaColumnStats] = \
43
- read_manifest_stats_by_columns(stat_results_s3_bucket, columns_to_fetch, delta_locator)
42
+ column_stats_completion_info: List[
43
+ DeltaColumnStats
44
+ ] = read_manifest_stats_by_columns(
45
+ stat_results_s3_bucket, columns_to_fetch, delta_locator
46
+ )
44
47
 
45
48
  found_columns_stats: List[DeltaColumnStats] = []
46
49
  missed_columns: List[str] = []
@@ -50,29 +53,37 @@ def read_cached_delta_stats(delta: Delta,
50
53
  else:
51
54
  missed_columns.append(column_stats.column)
52
55
 
53
- found_stats: Optional[DeltaStats] = DeltaStats.of(found_columns_stats) if found_columns_stats else None
54
- missed_stats: Optional[DeltaStatsCacheMiss] = DeltaStatsCacheMiss(missed_columns, delta.locator) \
55
- if missed_columns else None
56
+ found_stats: Optional[DeltaStats] = (
57
+ DeltaStats.of(found_columns_stats) if found_columns_stats else None
58
+ )
59
+ missed_stats: Optional[DeltaStatsCacheMiss] = (
60
+ DeltaStatsCacheMiss(missed_columns, delta.locator) if missed_columns else None
61
+ )
56
62
 
57
63
  return DeltaStatsCacheResult.of(found_stats, missed_stats)
58
64
 
59
65
 
60
66
  @ray.remote
61
- def cache_delta_column_stats(stat_results_s3_bucket: str,
62
- dataset_column: DeltaColumnStats) -> None:
67
+ def cache_delta_column_stats(
68
+ stat_results_s3_bucket: str, dataset_column: DeltaColumnStats
69
+ ) -> None:
63
70
  """Ray distributed task to cache the delta column stats into a file system (i.e. S3).
64
71
 
65
72
  Args:
66
73
  stat_results_s3_bucket: The S3 bucket name
67
74
  dataset_column: Column-oriented stats for a given delta
68
75
  """
69
- write_manifest_stats_file(stat_results_s3_bucket, dataset_column.column, dataset_column.manifest_stats)
76
+ write_manifest_stats_file(
77
+ stat_results_s3_bucket, dataset_column.column, dataset_column.manifest_stats
78
+ )
70
79
 
71
80
 
72
81
  @ray.remote
73
- def get_delta_stats(delta_locator: DeltaLocator,
74
- columns: Optional[List[str]] = None,
75
- deltacat_storage=unimplemented_deltacat_storage) -> DeltaStats:
82
+ def get_delta_stats(
83
+ delta_locator: DeltaLocator,
84
+ columns: Optional[List[str]] = None,
85
+ deltacat_storage=unimplemented_deltacat_storage,
86
+ ) -> DeltaStats:
76
87
  """Ray distributed task to compute and collect stats for a requested delta.
77
88
  If no columns are requested, stats will be computed for all columns.
78
89
  Args:
@@ -90,10 +101,11 @@ def get_delta_stats(delta_locator: DeltaLocator,
90
101
 
91
102
  @ray.remote
92
103
  def get_deltas_from_range(
93
- source_partition_locator: PartitionLocator,
94
- start_position_inclusive: DeltaRange,
95
- end_position_inclusive: DeltaRange,
96
- deltacat_storage=unimplemented_deltacat_storage) -> List[Delta]:
104
+ source_partition_locator: PartitionLocator,
105
+ start_position_inclusive: DeltaRange,
106
+ end_position_inclusive: DeltaRange,
107
+ deltacat_storage=unimplemented_deltacat_storage,
108
+ ) -> List[Delta]:
97
109
  """Looks up deltas in the specified partition using Ray, given both starting and ending delta stream positions.
98
110
 
99
111
  Args:
@@ -108,8 +120,14 @@ def get_deltas_from_range(
108
120
  a list of delta objects
109
121
  """
110
122
 
111
- namespace, partition_values = source_partition_locator.namespace, source_partition_locator.partition_values
112
- table_name, table_version = source_partition_locator.table_name, source_partition_locator.table_version
123
+ namespace, partition_values = (
124
+ source_partition_locator.namespace,
125
+ source_partition_locator.partition_values,
126
+ )
127
+ table_name, table_version = (
128
+ source_partition_locator.table_name,
129
+ source_partition_locator.table_version,
130
+ )
113
131
  deltas_list_result = deltacat_storage.list_deltas(
114
132
  namespace,
115
133
  table_name,
@@ -118,14 +136,16 @@ def get_deltas_from_range(
118
136
  start_position_inclusive,
119
137
  end_position_inclusive,
120
138
  ascending_order=True,
121
- include_manifest=False
139
+ include_manifest=False,
122
140
  )
123
141
  return deltas_list_result.all_items()
124
142
 
125
143
 
126
- def _collect_stats_by_columns(delta: Delta,
127
- columns_to_compute: Optional[List[str]] = None,
128
- deltacat_storage=unimplemented_deltacat_storage) -> DeltaStats:
144
+ def _collect_stats_by_columns(
145
+ delta: Delta,
146
+ columns_to_compute: Optional[List[str]] = None,
147
+ deltacat_storage=unimplemented_deltacat_storage,
148
+ ) -> DeltaStats:
129
149
  """Materializes one manifest entry at a time to save memory usage and calculate stats from each of its columns.
130
150
  Args:
131
151
  delta: A delta object to calculate stats for
@@ -134,47 +154,62 @@ def _collect_stats_by_columns(delta: Delta,
134
154
  Returns:
135
155
  A delta wide stats container
136
156
  """
137
- assert delta.manifest is not None, f"Manifest should not be missing from delta for stats calculation: {delta}"
157
+ assert (
158
+ delta.manifest is not None
159
+ ), f"Manifest should not be missing from delta for stats calculation: {delta}"
138
160
 
139
161
  # Mapping of column_name -> [stats_file_idx_1, stats_file_idx_2, ... stats_file_idx_n]
140
- column_stats_map: Dict[str, List[Optional[StatsResult]]] = defaultdict(lambda: [None] * len(delta.manifest.entries))
162
+ column_stats_map: Dict[str, List[Optional[StatsResult]]] = defaultdict(
163
+ lambda: [None] * len(delta.manifest.entries)
164
+ )
141
165
 
142
166
  total_tables_size = 0
143
167
  for file_idx, manifest in enumerate(delta.manifest.entries):
144
- entry_pyarrow_table: LocalTable = \
145
- deltacat_storage.download_delta_manifest_entry(delta, file_idx, TableType.PYARROW, columns_to_compute)
146
- assert isinstance(entry_pyarrow_table, pyarrow.Table), \
147
- f"Stats collection is only supported for PyArrow tables, but received a table of " \
168
+ entry_pyarrow_table: LocalTable = (
169
+ deltacat_storage.download_delta_manifest_entry(
170
+ delta, file_idx, TableType.PYARROW, columns_to_compute
171
+ )
172
+ )
173
+ assert isinstance(entry_pyarrow_table, pyarrow.Table), (
174
+ f"Stats collection is only supported for PyArrow tables, but received a table of "
148
175
  f"type '{type(entry_pyarrow_table)}' for manifest entry {file_idx} of delta: {delta.locator}."
176
+ )
149
177
  total_tables_size += entry_pyarrow_table.nbytes
150
178
  if not columns_to_compute:
151
179
  columns_to_compute = entry_pyarrow_table.column_names
152
180
 
153
181
  for column_idx, pyarrow_column in enumerate(entry_pyarrow_table.columns):
154
182
  column_name = columns_to_compute[column_idx]
155
- column_stats_map[column_name][file_idx] = StatsResult.of(len(pyarrow_column), pyarrow_column.nbytes)
183
+ column_stats_map[column_name][file_idx] = StatsResult.of(
184
+ len(pyarrow_column), pyarrow_column.nbytes
185
+ )
156
186
 
157
187
  # Add column-wide stats for a list of tables, these will be used for caching and retrieving later
158
- delta_ds_column_stats: List[DeltaColumnStats] = \
159
- _to_dataset_column_stats(delta.locator, columns_to_compute, column_stats_map)
188
+ delta_ds_column_stats: List[DeltaColumnStats] = _to_dataset_column_stats(
189
+ delta.locator, columns_to_compute, column_stats_map
190
+ )
160
191
 
161
192
  dataset_stats: DeltaStats = DeltaStats.of(delta_ds_column_stats)
162
193
 
163
194
  # Quick validation for calculations
164
- assert dataset_stats.stats.pyarrow_table_bytes == total_tables_size, \
165
- f"Expected the size of all PyArrow tables ({total_tables_size} bytes) " \
195
+ assert dataset_stats.stats.pyarrow_table_bytes == total_tables_size, (
196
+ f"Expected the size of all PyArrow tables ({total_tables_size} bytes) "
166
197
  f"to match the sum of each of its columns ({dataset_stats.stats.pyarrow_table_bytes} bytes)"
198
+ )
167
199
 
168
200
  return dataset_stats
169
201
 
170
202
 
171
- def _to_dataset_column_stats(delta_locator: DeltaLocator,
172
- column_names: List[str],
173
- column_manifest_map: Dict[str, List[Optional[StatsResult]]]) \
174
- -> List[DeltaColumnStats]:
203
+ def _to_dataset_column_stats(
204
+ delta_locator: DeltaLocator,
205
+ column_names: List[str],
206
+ column_manifest_map: Dict[str, List[Optional[StatsResult]]],
207
+ ) -> List[DeltaColumnStats]:
175
208
  dataset_stats: List[DeltaColumnStats] = []
176
209
  for column_name in column_names:
177
- column_manifest_stats = ManifestEntryStats.of(column_manifest_map[column_name], delta_locator)
210
+ column_manifest_stats = ManifestEntryStats.of(
211
+ column_manifest_map[column_name], delta_locator
212
+ )
178
213
  dataset_column_stats = DeltaColumnStats.of(column_name, column_manifest_stats)
179
214
  dataset_stats.append(dataset_column_stats)
180
- return dataset_stats
215
+ return dataset_stats
@@ -1,21 +1,20 @@
1
- import logging
2
1
  import json
2
+ import logging
3
3
  from typing import List
4
4
 
5
- from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
6
- from deltacat.compute.stats.models.delta_column_stats import DeltaColumnStats
7
- from deltacat.storage import DeltaLocator
8
5
  from deltacat import logs
9
6
  from deltacat.aws import s3u as s3_utils
7
+ from deltacat.compute.stats.models.delta_column_stats import DeltaColumnStats
8
+ from deltacat.compute.stats.models.manifest_entry_stats import ManifestEntryStats
9
+ from deltacat.storage import DeltaLocator
10
10
  from deltacat.utils.common import sha1_hexdigest
11
11
 
12
12
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
13
13
 
14
14
 
15
15
  def get_manifest_stats_s3_url(
16
- bucket: str,
17
- column_name: str,
18
- delta_locator: DeltaLocator) -> str:
16
+ bucket: str, column_name: str, delta_locator: DeltaLocator
17
+ ) -> str:
19
18
  """Returns the S3 URL path to the column-oriented delta stats
20
19
 
21
20
  Args:
@@ -33,9 +32,8 @@ def get_manifest_stats_s3_url(
33
32
 
34
33
 
35
34
  def read_manifest_stats_by_columns(
36
- bucket: str,
37
- column_names: List[str],
38
- delta_locator: DeltaLocator) -> List[DeltaColumnStats]:
35
+ bucket: str, column_names: List[str], delta_locator: DeltaLocator
36
+ ) -> List[DeltaColumnStats]:
39
37
  """Fetch a list of delta column stats by reading each column-oriented delta stats file from S3
40
38
 
41
39
  Args:
@@ -46,14 +44,17 @@ def read_manifest_stats_by_columns(
46
44
  Returns:
47
45
  A list of delta column stats
48
46
  """
49
- return [DeltaColumnStats.of(column, read_manifest_stats_file(bucket, column, delta_locator))
50
- for column in column_names]
47
+ return [
48
+ DeltaColumnStats.of(
49
+ column, read_manifest_stats_file(bucket, column, delta_locator)
50
+ )
51
+ for column in column_names
52
+ ]
51
53
 
52
54
 
53
55
  def read_manifest_stats_file(
54
- bucket: str,
55
- column_name: str,
56
- delta_locator: DeltaLocator) -> ManifestEntryStats:
56
+ bucket: str, column_name: str, delta_locator: DeltaLocator
57
+ ) -> ManifestEntryStats:
57
58
  """Read a manifest entry stats from S3
58
59
 
59
60
  Args:
@@ -66,12 +67,9 @@ def read_manifest_stats_file(
66
67
  """
67
68
 
68
69
  stats_completion_file_url = get_manifest_stats_s3_url(
69
- bucket,
70
- column_name,
71
- delta_locator
70
+ bucket, column_name, delta_locator
72
71
  )
73
- logger.info(
74
- f"reading stats completion file from: {stats_completion_file_url}")
72
+ logger.info(f"reading stats completion file from: {stats_completion_file_url}")
75
73
  stats_completion_info_file = None
76
74
  result = s3_utils.download(stats_completion_file_url, fail_if_not_found=False)
77
75
  if result:
@@ -82,9 +80,8 @@ def read_manifest_stats_file(
82
80
 
83
81
 
84
82
  def write_manifest_stats_file(
85
- bucket: str,
86
- column_name: str,
87
- manifest_entry_stats: ManifestEntryStats) -> None:
83
+ bucket: str, column_name: str, manifest_entry_stats: ManifestEntryStats
84
+ ) -> None:
88
85
  """Write a manifest entry stats into S3
89
86
 
90
87
  Args:
@@ -92,18 +89,12 @@ def write_manifest_stats_file(
92
89
  column_name: The name of the column which represents this manifest entry stats
93
90
  manifest_entry_stats: The manifest entry stats to serialize and store into S3
94
91
  """
95
- logger.info(
96
- f"writing stats completion file contents: {manifest_entry_stats}")
92
+ logger.info(f"writing stats completion file contents: {manifest_entry_stats}")
97
93
  stats_completion_file_s3_url = get_manifest_stats_s3_url(
98
94
  bucket,
99
95
  column_name,
100
96
  manifest_entry_stats.delta_locator,
101
97
  )
102
- logger.info(
103
- f"writing stats completion file to: {stats_completion_file_s3_url}")
104
- s3_utils.upload(
105
- stats_completion_file_s3_url,
106
- str(json.dumps(manifest_entry_stats))
107
- )
108
- logger.info(
109
- f"stats completion file written to: {stats_completion_file_s3_url}")
98
+ logger.info(f"writing stats completion file to: {stats_completion_file_s3_url}")
99
+ s3_utils.upload(stats_completion_file_s3_url, str(json.dumps(manifest_entry_stats)))
100
+ logger.info(f"stats completion file written to: {stats_completion_file_s3_url}")
deltacat/constants.py CHANGED
@@ -1,14 +1,8 @@
1
1
  from deltacat.utils.common import env_string
2
2
 
3
3
  # Environment variables
4
- DELTACAT_LOG_LEVEL = env_string(
5
- "DELTACAT_LOG_LEVEL",
6
- "DEBUG"
7
- )
8
- APPLICATION_LOG_LEVEL = env_string(
9
- "APPLICATION_LOG_LEVEL",
10
- "DEBUG"
11
- )
4
+ DELTACAT_LOG_LEVEL = env_string("DELTACAT_LOG_LEVEL", "DEBUG")
5
+ APPLICATION_LOG_LEVEL = env_string("APPLICATION_LOG_LEVEL", "DEBUG")
12
6
 
13
7
  # Byte Units
14
8
  BYTES_PER_KIBIBYTE = 2**10
@@ -31,8 +25,5 @@ PYARROW_INFLATION_MULTIPLIER = 2.5
31
25
  PYARROW_INFLATION_MULTIPLIER_ALL_COLUMNS = 6
32
26
 
33
27
  PRIMARY_KEY_INDEX_WRITE_BOTO3_CONFIG = {
34
- "retries": {
35
- 'max_attempts': 25,
36
- 'mode': 'standard'
37
- }
38
- }
28
+ "retries": {"max_attempts": 25, "mode": "standard"}
29
+ }
deltacat/io/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
- from deltacat.io.read_api import read_redshift
2
1
  from deltacat.io.dataset import DeltacatDataset
2
+ from deltacat.io.read_api import read_redshift
3
3
 
4
4
  __all__ = [
5
5
  "DeltacatDataset",
6
6
  "read_redshift",
7
- ]
7
+ ]