deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Any, Callable, Dict, List, Optional, Set, Union, Tuple
1
+ from typing import Any, Callable, Dict, List, Optional, Union, Tuple
2
2
 
3
3
  import pyarrow as pa
4
4
  import daft
@@ -16,38 +16,39 @@ from deltacat.utils.common import current_time_ms
16
16
  from deltacat.storage import (
17
17
  Delta,
18
18
  DeltaLocator,
19
+ DeltaProperties,
19
20
  DeltaType,
20
21
  DistributedDataset,
21
22
  LifecycleState,
22
23
  ListResult,
23
24
  LocalDataset,
24
25
  LocalTable,
25
- Manifest,
26
26
  ManifestAuthor,
27
27
  Namespace,
28
28
  NamespaceLocator,
29
+ NamespaceProperties,
29
30
  Partition,
30
- SchemaConsistencyType,
31
+ PartitionScheme,
32
+ Schema,
31
33
  Stream,
32
34
  StreamLocator,
33
35
  Table,
34
36
  TableVersion,
35
37
  TableVersionLocator,
38
+ TableVersionProperties,
36
39
  TableLocator,
40
+ TableProperties,
37
41
  CommitState,
38
- SortKey,
42
+ SortScheme,
39
43
  PartitionLocator,
40
- ManifestMeta,
41
44
  ManifestEntry,
42
45
  ManifestEntryList,
43
- DeleteParameters,
44
- PartitionFilter,
46
+ EntryParams,
45
47
  PartitionValues,
46
- DeltaPartitionSpec,
47
- StreamPartitionSpec,
48
48
  TransformName,
49
- IdentityTransformParameters,
49
+ StreamFormat,
50
50
  )
51
+ from deltacat.storage.model.manifest import Manifest, ManifestMeta, EntryType
51
52
  from deltacat.types.media import (
52
53
  ContentType,
53
54
  StorageType,
@@ -65,7 +66,7 @@ SQLITE_CUR_ARG = "sqlite3_cur"
65
66
  SQLITE_CON_ARG = "sqlite3_con"
66
67
  DB_FILE_PATH_ARG = "db_file_path"
67
68
 
68
- STORAGE_TYPE = "SQLITE3"
69
+ STREAM_FORMAT = StreamFormat.SQLITE3
69
70
  STREAM_ID_PROPERTY = "stream_id"
70
71
  CREATE_NAMESPACES_TABLE = (
71
72
  "CREATE TABLE IF NOT EXISTS namespaces(locator, value, PRIMARY KEY (locator))"
@@ -206,7 +207,7 @@ def list_deltas(
206
207
  last_stream_position: Optional[int] = None,
207
208
  ascending_order: Optional[bool] = None,
208
209
  include_manifest: bool = False,
209
- partition_filter: Optional[PartitionFilter] = None,
210
+ partition_scheme_id: Optional[str] = None,
210
211
  *args,
211
212
  **kwargs,
212
213
  ) -> ListResult[Delta]:
@@ -214,13 +215,6 @@ def list_deltas(
214
215
  if stream is None:
215
216
  return ListResult.of([], None, None)
216
217
 
217
- if partition_values is not None and partition_filter is not None:
218
- raise ValueError(
219
- "Only one of partition_values or partition_filter must be provided"
220
- )
221
- if partition_filter is not None:
222
- partition_values = partition_filter.partition_values
223
-
224
218
  partition = get_partition(stream.locator, partition_values, *args, **kwargs)
225
219
 
226
220
  all_deltas = list_partition_deltas(
@@ -314,7 +308,7 @@ def get_delta(
314
308
  partition_values: Optional[PartitionValues] = None,
315
309
  table_version: Optional[str] = None,
316
310
  include_manifest: bool = False,
317
- partition_filter: Optional[PartitionFilter] = None,
311
+ partition_scheme_id: Optional[str] = None,
318
312
  *args,
319
313
  **kwargs,
320
314
  ) -> Optional[Delta]:
@@ -322,14 +316,6 @@ def get_delta(
322
316
 
323
317
  stream = get_stream(namespace, table_name, table_version, *args, **kwargs)
324
318
 
325
- if partition_values is not None and partition_filter is not None:
326
- raise ValueError(
327
- "Only one of partition_values or partition_filter must be provided"
328
- )
329
-
330
- if partition_filter is not None:
331
- partition_values = partition_filter.partition_values
332
-
333
319
  partition = get_partition(stream.locator, partition_values, *args, **kwargs)
334
320
  delta_locator = DeltaLocator.of(partition.locator, stream_position)
335
321
 
@@ -355,7 +341,7 @@ def get_latest_delta(
355
341
  partition_values: Optional[PartitionValues] = None,
356
342
  table_version: Optional[str] = None,
357
343
  include_manifest: bool = False,
358
- partition_filter: Optional[PartitionFilter] = None,
344
+ partition_scheme_id: Optional[str] = None,
359
345
  *args,
360
346
  **kwargs,
361
347
  ) -> Optional[Delta]:
@@ -369,7 +355,7 @@ def get_latest_delta(
369
355
  last_stream_position=None,
370
356
  ascending_order=False,
371
357
  include_manifest=include_manifest,
372
- partition_filter=partition_filter,
358
+ partition_scheme_id=partition_scheme_id,
373
359
  *args,
374
360
  **kwargs,
375
361
  ).all_items()
@@ -389,7 +375,6 @@ def download_delta(
389
375
  file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
390
376
  ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
391
377
  distributed_dataset_type: DistributedDatasetType = DistributedDatasetType.RAY_DATASET,
392
- partition_filter: Optional[PartitionFilter] = None,
393
378
  *args,
394
379
  **kwargs,
395
380
  ) -> Union[LocalDataset, DistributedDataset]: # type: ignore
@@ -398,16 +383,7 @@ def download_delta(
398
383
  manifest = Delta(delta_like).manifest
399
384
  else:
400
385
  manifest = get_delta_manifest(delta_like, *args, **kwargs)
401
- partition_values: PartitionValues = None
402
- if partition_filter is not None:
403
- partition_values = partition_filter.partition_values
404
386
  for entry_index in range(len(manifest.entries)):
405
- if (
406
- partition_values is not None
407
- and partition_values != manifest.entries[entry_index].meta.partition_values
408
- ):
409
- continue
410
-
411
387
  result.append(
412
388
  download_delta_manifest_entry(
413
389
  delta_like=delta_like,
@@ -515,11 +491,11 @@ def get_delta_manifest(
515
491
 
516
492
 
517
493
  def create_namespace(
518
- namespace: str, permissions: Dict[str, Any], *args, **kwargs
494
+ namespace: str, properties: NamespaceProperties, *args, **kwargs
519
495
  ) -> Namespace:
520
496
  cur, con = _get_sqlite3_cursor_con(kwargs)
521
497
  locator = NamespaceLocator.of(namespace)
522
- result = Namespace.of(locator, permissions)
498
+ result = Namespace.of(locator, properties)
523
499
  params = (locator.canonical_string(), json.dumps(result))
524
500
  cur.execute(CREATE_NAMESPACES_TABLE)
525
501
  cur.execute(CREATE_TABLES_TABLE)
@@ -535,7 +511,7 @@ def create_namespace(
535
511
 
536
512
  def update_namespace(
537
513
  namespace: str,
538
- permissions: Optional[Dict[str, Any]] = None,
514
+ properties: NamespaceProperties = None,
539
515
  new_namespace: Optional[str] = None,
540
516
  *args,
541
517
  **kwargs,
@@ -543,7 +519,7 @@ def update_namespace(
543
519
  assert new_namespace is None, "namespace name cannot be changed"
544
520
  cur, con = _get_sqlite3_cursor_con(kwargs)
545
521
  locator = NamespaceLocator.of(namespace)
546
- result = Namespace.of(locator, permissions)
522
+ result = Namespace.of(locator, properties)
547
523
  params = (json.dumps(result), locator.canonical_string())
548
524
  cur.execute("UPDATE namespaces SET value = ? WHERE locator = ?", params)
549
525
  con.commit()
@@ -553,39 +529,41 @@ def create_table_version(
553
529
  namespace: str,
554
530
  table_name: str,
555
531
  table_version: Optional[str] = None,
556
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
557
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
558
- partition_keys: Optional[List[Dict[str, Any]]] = None,
559
- primary_key_column_names: Optional[Set[str]] = None,
560
- sort_keys: Optional[List[SortKey]] = None,
532
+ schema: Optional[Union[pa.Schema, Any]] = None,
533
+ partition_scheme: Optional[PartitionScheme] = None,
534
+ sort_keys: Optional[SortScheme] = None,
561
535
  table_version_description: Optional[str] = None,
562
- table_version_properties: Optional[Dict[str, str]] = None,
563
- table_permissions: Optional[Dict[str, Any]] = None,
536
+ table_version_properties: Optional[TableVersionProperties] = None,
564
537
  table_description: Optional[str] = None,
565
- table_properties: Optional[Dict[str, str]] = None,
538
+ table_properties: Optional[TableProperties] = None,
566
539
  supported_content_types: Optional[List[ContentType]] = None,
567
- partition_spec: Optional[StreamPartitionSpec] = None,
568
540
  *args,
569
541
  **kwargs,
570
542
  ) -> Stream:
571
543
  cur, con = _get_sqlite3_cursor_con(kwargs)
572
544
 
573
- if partition_keys is not None and partition_spec is not None:
574
- raise ValueError(
575
- "Only one of partition_keys or partition_spec must be provided"
576
- )
577
- if partition_spec is not None:
545
+ if partition_scheme is not None:
578
546
  assert (
579
- partition_spec.ordered_transforms is not None
580
- ), "Ordered transforms must be specified when partition_spec is specified"
581
- partition_keys = []
582
- for transform in partition_spec.ordered_transforms:
583
- assert transform.name == TransformName.IDENTITY, (
547
+ partition_scheme.keys is not None
548
+ ), "Partition keys must be specified with partition scheme"
549
+ for key in partition_scheme.keys:
550
+ assert (
551
+ key.transform is None or key.transform.name == TransformName.IDENTITY
552
+ ), (
584
553
  "Local DeltaCAT storage does not support creating table versions "
585
554
  "with non identity transform partition spec"
586
555
  )
587
- transform_params: IdentityTransformParameters = transform.parameters
588
- partition_keys.append(transform_params.column_name)
556
+ if sort_keys is not None:
557
+ assert (
558
+ sort_keys.keys is not None
559
+ ), "Sort keys must be specified with sort scheme"
560
+ for key in sort_keys.keys:
561
+ assert (
562
+ key.transform is None or key.transform.name == TransformName.IDENTITY
563
+ ), (
564
+ "Local DeltaCAT storage does not support creating table versions "
565
+ "with non identity transform sort spec"
566
+ )
589
567
 
590
568
  latest_version = get_latest_table_version(namespace, table_name, *args, **kwargs)
591
569
  if (
@@ -602,9 +580,7 @@ def create_table_version(
602
580
  )
603
581
 
604
582
  table_locator = TableLocator.of(NamespaceLocator.of(namespace), table_name)
605
- table_obj = Table.of(
606
- table_locator, table_permissions, table_description, table_properties
607
- )
583
+ table_obj = Table.of(table_locator, table_description, table_properties)
608
584
  table_version_locator = TableVersionLocator.of(
609
585
  table_locator=table_locator, table_version=table_version
610
586
  )
@@ -617,19 +593,18 @@ def create_table_version(
617
593
  properties = {**table_version_properties, STREAM_ID_PROPERTY: stream_id}
618
594
  table_version_obj = TableVersion.of(
619
595
  table_version_locator,
620
- schema=schema,
621
- partition_keys=partition_keys,
622
- primary_key_columns=primary_key_column_names,
596
+ schema=Schema.of(schema) if schema else None,
597
+ partition_scheme=partition_scheme,
623
598
  description=table_version_description,
624
599
  properties=properties,
625
- sort_keys=sort_keys,
600
+ sort_scheme=sort_keys,
626
601
  content_types=supported_content_types,
627
602
  )
628
603
  stream_locator = StreamLocator.of(
629
- table_version_obj.locator, stream_id=stream_id, storage_type=STORAGE_TYPE
604
+ table_version_obj.locator, stream_id=stream_id, stream_format=STREAM_FORMAT
630
605
  )
631
606
  result_stream = Stream.of(
632
- stream_locator, partition_keys=partition_keys, state=CommitState.COMMITTED
607
+ stream_locator, partition_scheme=partition_scheme, state=CommitState.COMMITTED
633
608
  )
634
609
 
635
610
  params = (
@@ -658,16 +633,15 @@ def create_table_version(
658
633
  def update_table(
659
634
  namespace: str,
660
635
  table_name: str,
661
- permissions: Optional[Dict[str, Any]] = None,
662
636
  description: Optional[str] = None,
663
- properties: Optional[Dict[str, str]] = None,
637
+ properties: Optional[TableProperties] = None,
664
638
  new_table_name: Optional[str] = None,
665
639
  *args,
666
640
  **kwargs,
667
641
  ) -> None:
668
642
  cur, con = _get_sqlite3_cursor_con(kwargs)
669
643
  table_locator = TableLocator.of(NamespaceLocator.of(namespace), table_name)
670
- table_obj = Table.of(table_locator, permissions, description, properties)
644
+ table_obj = Table.of(table_locator, description, properties)
671
645
 
672
646
  params = (table_locator.canonical_string(),)
673
647
  cur.execute("DELETE FROM tables WHERE locator = ?", params)
@@ -685,10 +659,9 @@ def update_table_version(
685
659
  table_name: str,
686
660
  table_version: str,
687
661
  lifecycle_state: Optional[LifecycleState] = None,
688
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
689
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
662
+ schema: Optional[Union[pa.Schema, Any]] = None,
690
663
  description: Optional[str] = None,
691
- properties: Optional[Dict[str, str]] = None,
664
+ properties: Optional[TableVersionProperties] = None,
692
665
  *args,
693
666
  **kwargs,
694
667
  ) -> None:
@@ -720,12 +693,11 @@ def update_table_version(
720
693
  tv_properties = {**properties, **current_props}
721
694
  table_version_obj = TableVersion.of(
722
695
  table_version_locator,
723
- schema=schema,
724
- partition_keys=current_table_version_obj.partition_keys,
725
- primary_key_columns=current_table_version_obj.primary_keys,
696
+ schema=Schema.of(schema) if schema else None,
697
+ partition_scheme=current_table_version_obj.partition_scheme,
726
698
  description=description,
727
699
  properties=tv_properties,
728
- sort_keys=current_table_version_obj.sort_keys,
700
+ sort_scheme=current_table_version_obj.sort_scheme,
729
701
  content_types=current_table_version_obj.content_types,
730
702
  )
731
703
 
@@ -757,11 +729,11 @@ def stage_stream(
757
729
 
758
730
  stream_id = uuid.uuid4().__str__()
759
731
  new_stream_locator = StreamLocator.of(
760
- existing_table_version.locator, stream_id, STORAGE_TYPE
732
+ existing_table_version.locator, stream_id, STREAM_FORMAT
761
733
  )
762
734
  new_stream = Stream.of(
763
735
  new_stream_locator,
764
- existing_stream.partition_keys,
736
+ existing_stream.partition_scheme,
765
737
  CommitState.STAGED,
766
738
  existing_stream.locator.canonical_string(),
767
739
  )
@@ -785,9 +757,9 @@ def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
785
757
  )
786
758
  stream_to_commit = Stream.of(
787
759
  stream.locator,
788
- stream.partition_keys,
760
+ stream.partition_scheme,
789
761
  CommitState.COMMITTED,
790
- stream.previous_stream_digest,
762
+ stream.previous_stream_id,
791
763
  )
792
764
 
793
765
  existing_table_version.properties[
@@ -989,12 +961,10 @@ def stage_delta(
989
961
  delta_type: DeltaType = DeltaType.UPSERT,
990
962
  max_records_per_entry: Optional[int] = None,
991
963
  author: Optional[ManifestAuthor] = None,
992
- properties: Optional[Dict[str, str]] = None,
964
+ properties: Optional[DeltaProperties] = None,
993
965
  s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
994
966
  content_type: ContentType = ContentType.PARQUET,
995
- delete_parameters: Optional[DeleteParameters] = None,
996
- partition_spec: Optional[DeltaPartitionSpec] = None,
997
- partition_values: Optional[PartitionValues] = None,
967
+ entry_params: Optional[EntryParams] = None,
998
968
  *args,
999
969
  **kwargs,
1000
970
  ) -> Delta:
@@ -1016,12 +986,6 @@ def stage_delta(
1016
986
  con.commit()
1017
987
  return delta
1018
988
 
1019
- if partition_spec:
1020
- assert partition_values is not None, (
1021
- "partition_values must be provided as local "
1022
- "storage does not support computing it from input data"
1023
- )
1024
-
1025
989
  serialized_data = None
1026
990
  if content_type == ContentType.PARQUET:
1027
991
  buffer = io.BytesIO()
@@ -1040,25 +1004,35 @@ def stage_delta(
1040
1004
  stream_position = current_time_ms()
1041
1005
  delta_locator = DeltaLocator.of(partition.locator, stream_position=stream_position)
1042
1006
 
1007
+ entry_type = (
1008
+ EntryType.EQUALITY_DELETE if delta_type is DeltaType.DELETE else EntryType.DATA
1009
+ )
1043
1010
  meta = ManifestMeta.of(
1044
1011
  len(data),
1045
1012
  len(serialized_data),
1046
1013
  content_type=content_type,
1047
1014
  content_encoding=ContentEncoding.IDENTITY,
1048
1015
  source_content_length=data.nbytes,
1049
- partition_values=partition_values,
1016
+ entry_type=entry_type,
1017
+ entry_params=entry_params,
1050
1018
  )
1051
1019
 
1052
1020
  manifest = Manifest.of(
1053
1021
  entries=ManifestEntryList.of(
1054
1022
  [
1055
1023
  ManifestEntry.of(
1056
- uri=uri, url=uri, meta=meta, mandatory=True, uuid=manifest_id
1024
+ uri=uri,
1025
+ url=uri,
1026
+ meta=meta,
1027
+ mandatory=True,
1028
+ uuid=manifest_id,
1057
1029
  )
1058
1030
  ]
1059
1031
  ),
1060
1032
  author=author,
1061
1033
  uuid=manifest_id,
1034
+ entry_type=entry_type,
1035
+ entry_params=entry_params,
1062
1036
  )
1063
1037
 
1064
1038
  delta = Delta.of(
@@ -1068,7 +1042,6 @@ def stage_delta(
1068
1042
  properties=properties,
1069
1043
  manifest=manifest,
1070
1044
  previous_stream_position=partition.stream_position,
1071
- delete_parameters=delete_parameters,
1072
1045
  )
1073
1046
 
1074
1047
  params = (uri, serialized_data)
@@ -1194,7 +1167,7 @@ def get_table_version_schema(
1194
1167
  table_version: Optional[str] = None,
1195
1168
  *args,
1196
1169
  **kwargs,
1197
- ) -> Optional[Union[pa.Schema, str, bytes]]:
1170
+ ) -> Optional[Union[pa.Schema, Any]]:
1198
1171
  obj = get_table_version(namespace, table_name, table_version, *args, **kwargs)
1199
1172
 
1200
1173
  return obj.schema
@@ -1227,7 +1200,7 @@ def get_stream(
1227
1200
 
1228
1201
  cur, con = _get_sqlite3_cursor_con(kwargs)
1229
1202
  stream_locator = StreamLocator.of(
1230
- obj.locator, stream_id=stream_id, storage_type=STORAGE_TYPE
1203
+ obj.locator, stream_id=stream_id, stream_format=STREAM_FORMAT
1231
1204
  )
1232
1205
  res = cur.execute(
1233
1206
  "SELECT * FROM streams WHERE locator = ?", (stream_locator.canonical_string(),)
File without changes
@@ -0,0 +1,25 @@
1
+ import tempfile
2
+
3
+ import pytest
4
+ from deltacat.catalog import CatalogProperties
5
+ from deltacat.tests.test_utils.filesystem import temp_dir_autocleanup
6
+
7
+
8
+ @pytest.fixture
9
+ def temp_dir():
10
+ """
11
+ Temp dir which is removed after usage
12
+ note that each method which is injected with temp_dir will get a separate new tmp directory
13
+ """
14
+ with temp_dir_autocleanup() as tmp_dir:
15
+ yield tmp_dir
16
+
17
+
18
+ @pytest.fixture
19
+ def keep_temp_dir():
20
+ return tempfile.mkdtemp()
21
+
22
+
23
+ @pytest.fixture
24
+ def temp_catalog(temp_dir):
25
+ return CatalogProperties(root=temp_dir)
File without changes