deltacat 2.0.0b9__py3-none-any.whl → 2.0.0b10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. deltacat/__init__.py +27 -6
  2. deltacat/api.py +478 -123
  3. deltacat/aws/s3u.py +2 -2
  4. deltacat/benchmarking/conftest.py +1 -1
  5. deltacat/catalog/main/impl.py +12 -6
  6. deltacat/catalog/model/catalog.py +65 -47
  7. deltacat/catalog/model/properties.py +1 -3
  8. deltacat/compute/__init__.py +14 -0
  9. deltacat/compute/converter/constants.py +5 -0
  10. deltacat/compute/converter/converter_session.py +78 -36
  11. deltacat/compute/converter/model/convert_input.py +24 -4
  12. deltacat/compute/converter/model/convert_result.py +61 -0
  13. deltacat/compute/converter/model/converter_session_params.py +52 -10
  14. deltacat/compute/converter/pyiceberg/overrides.py +181 -62
  15. deltacat/compute/converter/steps/convert.py +84 -36
  16. deltacat/compute/converter/steps/dedupe.py +25 -4
  17. deltacat/compute/converter/utils/convert_task_options.py +42 -13
  18. deltacat/compute/converter/utils/iceberg_columns.py +5 -0
  19. deltacat/compute/converter/utils/io.py +82 -11
  20. deltacat/compute/converter/utils/s3u.py +13 -4
  21. deltacat/compute/jobs/__init__.py +0 -0
  22. deltacat/compute/jobs/client.py +404 -0
  23. deltacat/constants.py +4 -4
  24. deltacat/daft/daft_scan.py +7 -3
  25. deltacat/daft/translator.py +126 -0
  26. deltacat/examples/basic_logging.py +5 -3
  27. deltacat/examples/hello_world.py +4 -2
  28. deltacat/examples/indexer/__init__.py +0 -0
  29. deltacat/examples/indexer/aws/__init__.py +0 -0
  30. deltacat/examples/indexer/gcp/__init__.py +0 -0
  31. deltacat/examples/indexer/indexer.py +163 -0
  32. deltacat/examples/indexer/job_runner.py +199 -0
  33. deltacat/io/__init__.py +13 -0
  34. deltacat/io/dataset/__init__.py +0 -0
  35. deltacat/io/dataset/deltacat_dataset.py +91 -0
  36. deltacat/io/datasink/__init__.py +0 -0
  37. deltacat/io/datasink/deltacat_datasink.py +207 -0
  38. deltacat/io/datasource/__init__.py +0 -0
  39. deltacat/io/datasource/deltacat_datasource.py +580 -0
  40. deltacat/io/reader/__init__.py +0 -0
  41. deltacat/io/reader/deltacat_read_api.py +172 -0
  42. deltacat/storage/__init__.py +2 -0
  43. deltacat/storage/model/expression/__init__.py +47 -0
  44. deltacat/storage/model/expression/expression.py +656 -0
  45. deltacat/storage/model/expression/visitor.py +248 -0
  46. deltacat/storage/model/metafile.py +74 -42
  47. deltacat/storage/model/scan/push_down.py +32 -5
  48. deltacat/storage/model/types.py +5 -3
  49. deltacat/storage/rivulet/__init__.py +4 -4
  50. deltacat/tests/_io/reader/__init__.py +0 -0
  51. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  52. deltacat/tests/compute/converter/test_convert_session.py +209 -46
  53. deltacat/tests/local_deltacat_storage/__init__.py +1 -0
  54. deltacat/tests/storage/model/test_expression.py +327 -0
  55. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +2 -1
  56. deltacat/tests/storage/rivulet/test_dataset.py +1 -1
  57. deltacat/tests/storage/rivulet/test_manifest.py +1 -1
  58. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +1 -1
  59. deltacat/tests/test_deltacat_api.py +50 -9
  60. deltacat/types/media.py +141 -43
  61. deltacat/types/tables.py +35 -7
  62. deltacat/utils/daft.py +2 -2
  63. deltacat/utils/filesystem.py +39 -9
  64. deltacat/utils/polars.py +128 -0
  65. deltacat/utils/pyarrow.py +151 -15
  66. deltacat/utils/ray_utils/concurrency.py +1 -1
  67. deltacat/utils/ray_utils/runtime.py +56 -4
  68. deltacat/utils/url.py +1284 -0
  69. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b10.dist-info}/METADATA +9 -6
  70. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b10.dist-info}/RECORD +73 -48
  71. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b10.dist-info}/LICENSE +0 -0
  72. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b10.dist-info}/WHEEL +0 -0
  73. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b10.dist-info}/top_level.txt +0 -0
deltacat/__init__.py CHANGED
@@ -5,6 +5,7 @@ import deltacat.logs # noqa: F401
5
5
  from deltacat.api import (
6
6
  copy,
7
7
  get,
8
+ list,
8
9
  put,
9
10
  )
10
11
  from deltacat.catalog.delegate import (
@@ -30,13 +31,19 @@ from deltacat.catalog.delegate import (
30
31
  from deltacat.catalog.model.catalog import ( # noqa: F401
31
32
  Catalog,
32
33
  Catalogs,
34
+ raise_if_not_initialized,
33
35
  is_initialized,
34
36
  init,
35
37
  get_catalog,
36
38
  put_catalog,
37
39
  )
38
40
  from deltacat.catalog.model.table_definition import TableDefinition
41
+ from deltacat.compute import (
42
+ job_client,
43
+ local_job_client,
44
+ )
39
45
  from deltacat.storage import (
46
+ Dataset,
40
47
  DistributedDataset,
41
48
  Field,
42
49
  LifecycleState,
@@ -53,9 +60,16 @@ from deltacat.storage import (
53
60
  SortScheme,
54
61
  NullOrder,
55
62
  )
56
- from deltacat.storage.rivulet import Dataset, Datatype
57
- from deltacat.types.media import ContentEncoding, ContentType, TableType
63
+ from deltacat.storage.rivulet import Dataset as RivDataset, Datatype as RivDatatype
64
+ from deltacat.types.media import (
65
+ ContentEncoding,
66
+ ContentType,
67
+ DatasetType,
68
+ DatastoreType,
69
+ )
70
+
58
71
  from deltacat.types.tables import TableWriteMode
72
+ from deltacat.utils.url import DeltaCatUrl
59
73
 
60
74
  __iceberg__ = []
61
75
  if importlib.util.find_spec("pyiceberg") is not None:
@@ -67,13 +81,16 @@ if importlib.util.find_spec("pyiceberg") is not None:
67
81
 
68
82
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
69
83
 
70
- __version__ = "2.0.0b9"
84
+ __version__ = "2.0.0b10"
71
85
 
72
86
 
73
87
  __all__ = [
74
88
  "__version__",
89
+ "job_client",
90
+ "local_job_client",
75
91
  "copy",
76
92
  "get",
93
+ "list",
77
94
  "put",
78
95
  "alter_table",
79
96
  "create_table",
@@ -95,14 +112,19 @@ __all__ = [
95
112
  "read_table",
96
113
  "get_catalog",
97
114
  "put_catalog",
115
+ "raise_if_not_initialized",
98
116
  "is_initialized",
99
117
  "init",
100
118
  "Catalog",
101
119
  "ContentType",
102
120
  "ContentEncoding",
103
- "DistributedDataset",
104
121
  "Dataset",
105
- "Datatype",
122
+ "DatasetType",
123
+ "DatastoreType",
124
+ "DeltaCatUrl",
125
+ "DistributedDataset",
126
+ "RivDataset",
127
+ "RivDatatype",
106
128
  "Field",
107
129
  "LifecycleState",
108
130
  "ListResult",
@@ -118,7 +140,6 @@ __all__ = [
118
140
  "SortOrder",
119
141
  "SortScheme",
120
142
  "TableDefinition",
121
- "TableType",
122
143
  "TableWriteMode",
123
144
  ]
124
145