deltacat 2.0.0b11__py3-none-any.whl → 2.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0.post1.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,886 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Comprehensive test suite for transaction history queries (dc.transactions function).
4
+ Tests all functionality to prevent regression and ensure robust behavior.
5
+ """
6
+
7
+ import time
8
+ import inspect
9
+ import pytest
10
+
11
+ import pandas as pd
12
+ import pyarrow as pa
13
+ import polars as pl
14
+ import daft
15
+
16
+ import deltacat as dc
17
+ from deltacat.types.tables import DatasetType
18
+ from deltacat.catalog.model.catalog import Catalog
19
+ from deltacat.storage.model.types import TransactionStatus, TransactionState
20
+
21
+
22
+ class TestTransactionHistory:
23
+ """Comprehensive test suite for transaction history queries."""
24
+
25
+ def setup_method(self):
26
+ """Set up fresh catalog for each test."""
27
+ dc.clear_catalogs() # Clear any existing catalogs
28
+
29
+ def teardown_method(self):
30
+ """Clean up after each test."""
31
+ dc.clear_catalogs()
32
+
33
+ def create_test_transactions(self):
34
+ """Create a variety of test transactions with different characteristics."""
35
+ transactions_created = []
36
+
37
+ # Transaction 1: Simple single-table
38
+ data1 = pd.DataFrame({"id": [1, 2], "name": ["Alice", "Bob"]})
39
+ with dc.transaction(commit_message="Initial user data") as txn:
40
+ dc.write(data1, "users")
41
+ transactions_created.append(
42
+ {
43
+ "id": txn.id,
44
+ "commit_message": "Initial user data",
45
+ "expected_tables": 1,
46
+ "start_time": txn.start_time,
47
+ }
48
+ )
49
+
50
+ time.sleep(0.01) # Ensure different timestamps
51
+
52
+ # Transaction 2: Multi-table transaction
53
+ products = pd.DataFrame({"product_id": [101, 102], "name": ["Laptop", "Phone"]})
54
+ orders = pd.DataFrame(
55
+ {"order_id": [1, 2], "user_id": [1, 2], "product_id": [101, 102]}
56
+ )
57
+
58
+ with dc.transaction(commit_message="Create products and orders") as txn:
59
+ dc.write(products, "products", namespace="inventory")
60
+ dc.write(orders, "orders", namespace="sales")
61
+ transactions_created.append(
62
+ {
63
+ "id": txn.id,
64
+ "commit_message": "Create products and orders",
65
+ "expected_tables": 2,
66
+ "start_time": txn.start_time,
67
+ }
68
+ )
69
+
70
+ time.sleep(0.01)
71
+
72
+ # Transaction 3: Update existing table
73
+ more_users = pd.DataFrame({"id": [3, 4], "name": ["Charlie", "Diana"]})
74
+ with dc.transaction(commit_message="Add more users") as txn:
75
+ dc.write(more_users, "users")
76
+ transactions_created.append(
77
+ {
78
+ "id": txn.id,
79
+ "commit_message": "Add more users",
80
+ "expected_tables": 1,
81
+ "start_time": txn.start_time,
82
+ }
83
+ )
84
+
85
+ time.sleep(0.01)
86
+
87
+ # Transaction 4: Large transaction (for operation count testing)
88
+ analytics = pd.DataFrame(
89
+ {"metric": ["page_views", "clicks"], "value": [1000, 150]}
90
+ )
91
+ reports = pd.DataFrame({"report_id": [1], "status": ["complete"]})
92
+
93
+ with dc.transaction(commit_message="Analytics and reporting") as txn:
94
+ dc.write(analytics, "metrics", namespace="analytics")
95
+ dc.write(reports, "reports", namespace="analytics")
96
+ transactions_created.append(
97
+ {
98
+ "id": txn.id,
99
+ "commit_message": "Analytics and reporting",
100
+ "expected_tables": 2,
101
+ "start_time": txn.start_time,
102
+ }
103
+ )
104
+
105
+ return transactions_created
106
+
107
+ def test_basic_transaction_history_query(self, temp_catalog_properties):
108
+ """Test basic transaction history querying with default parameters."""
109
+ # Initialize catalog using the fixture
110
+ dc.init()
111
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
112
+
113
+ created_txns = self.create_test_transactions()
114
+
115
+ # Basic query - should return all SUCCESS transactions
116
+ result = dc.transactions(read_as=DatasetType.PANDAS)
117
+
118
+ assert isinstance(result, pd.DataFrame)
119
+ assert len(result) == 4, f"Expected 4 transactions, got {len(result)}"
120
+
121
+ # Verify schema
122
+ expected_columns = [
123
+ "transaction_id",
124
+ "commit_message",
125
+ "start_time",
126
+ "end_time",
127
+ "status",
128
+ "operation_count",
129
+ "operation_types",
130
+ "namespace_count",
131
+ "table_count",
132
+ "table_version_count",
133
+ "stream_count",
134
+ "partition_count",
135
+ "delta_count",
136
+ ]
137
+ assert list(result.columns) == expected_columns
138
+
139
+ # Verify all are SUCCESS status
140
+ assert all(result["status"] == "SUCCESS")
141
+
142
+ # Verify sorting (most recent first)
143
+ start_times = result["start_time"].tolist()
144
+ assert start_times == sorted(start_times, reverse=True)
145
+
146
+ # Verify commit messages are preserved
147
+ commit_messages = set(result["commit_message"])
148
+ expected_messages = {txn["commit_message"] for txn in created_txns}
149
+ assert commit_messages == expected_messages
150
+
151
+ def test_all_dataset_types_output(self, temp_catalog_properties):
152
+ """Test that all supported dataset types work correctly."""
153
+ # Initialize catalog using the fixture
154
+ dc.init()
155
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
156
+
157
+ self.create_test_transactions()
158
+
159
+ # Test each dataset type
160
+ dataset_types = [
161
+ DatasetType.PANDAS,
162
+ DatasetType.PYARROW,
163
+ DatasetType.POLARS,
164
+ DatasetType.RAY_DATASET,
165
+ DatasetType.DAFT,
166
+ ]
167
+
168
+ for dataset_type in dataset_types:
169
+ result = dc.transactions(read_as=dataset_type, limit=2)
170
+
171
+ # Verify basic properties based on type
172
+ if dataset_type == DatasetType.PANDAS:
173
+ assert isinstance(result, pd.DataFrame)
174
+ assert len(result) == 2
175
+ assert list(result.columns)[0] == "transaction_id"
176
+
177
+ elif dataset_type == DatasetType.PYARROW:
178
+ assert isinstance(result, pa.Table)
179
+ assert result.num_rows == 2
180
+ assert result.column_names[0] == "transaction_id"
181
+
182
+ elif dataset_type == DatasetType.POLARS:
183
+ assert isinstance(result, pl.DataFrame)
184
+ assert len(result) == 2
185
+ assert result.columns[0] == "transaction_id"
186
+
187
+ elif dataset_type == DatasetType.RAY_DATASET:
188
+ # Ray dataset might be returned as different types
189
+ assert result is not None
190
+ # Convert to check count
191
+ df = result.to_pandas()
192
+ assert len(df) == 2
193
+
194
+ elif dataset_type == DatasetType.DAFT:
195
+ assert isinstance(result, daft.DataFrame)
196
+ # Convert to check count
197
+ df = result.to_pandas()
198
+ assert len(df) == 2
199
+
200
+ def test_transaction_state_filtering(self, temp_catalog_properties):
201
+ """Test filtering by different transaction states."""
202
+ # Initialize catalog using the fixture
203
+ dc.init()
204
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
205
+
206
+ self.create_test_transactions()
207
+
208
+ # Test default (SUCCESS only)
209
+ success_only = dc.transactions(read_as=DatasetType.PANDAS)
210
+ assert all(success_only["status"] == "SUCCESS")
211
+ base_count = len(success_only)
212
+
213
+ # Test with RUNNING status (should be same as default since no running transactions)
214
+ with_running = dc.transactions(
215
+ read_as=DatasetType.PANDAS,
216
+ status_in=[TransactionStatus.SUCCESS, TransactionStatus.RUNNING],
217
+ )
218
+ assert len(with_running) == base_count
219
+ assert all(
220
+ status in ["SUCCESS", "RUNNING"] for status in with_running["status"]
221
+ )
222
+
223
+ # Test with FAILED status
224
+ with_failed = dc.transactions(
225
+ read_as=DatasetType.PANDAS,
226
+ status_in=[TransactionStatus.SUCCESS, TransactionStatus.FAILED],
227
+ )
228
+ assert len(with_failed) == base_count
229
+ assert all(status in ["SUCCESS", "FAILED"] for status in with_failed["status"])
230
+
231
+ # Test with PAUSED status
232
+ with_paused = dc.transactions(
233
+ read_as=DatasetType.PANDAS,
234
+ status_in=[TransactionStatus.SUCCESS, TransactionStatus.PAUSED],
235
+ )
236
+ assert len(with_paused) == base_count
237
+ assert all(status in ["SUCCESS", "PAUSED"] for status in with_paused["status"])
238
+
239
+ # Test all states
240
+ all_states = dc.transactions(
241
+ read_as=DatasetType.PANDAS,
242
+ status_in=[
243
+ TransactionStatus.SUCCESS,
244
+ TransactionStatus.RUNNING,
245
+ TransactionStatus.FAILED,
246
+ TransactionStatus.PAUSED,
247
+ ],
248
+ )
249
+ assert len(all_states) == base_count
250
+
251
+ # Test multiple states
252
+ multi_state = dc.transactions(
253
+ read_as=DatasetType.PANDAS,
254
+ status_in=[
255
+ TransactionStatus.SUCCESS,
256
+ TransactionStatus.RUNNING,
257
+ TransactionStatus.FAILED,
258
+ TransactionStatus.PAUSED,
259
+ ],
260
+ )
261
+ assert len(multi_state) == base_count
262
+
263
+ def test_time_based_filtering(self, temp_catalog_properties):
264
+ """Test filtering transactions by time ranges."""
265
+ # Initialize catalog using the fixture
266
+ dc.init()
267
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
268
+
269
+ created_txns = self.create_test_transactions()
270
+
271
+ # Get all transactions first
272
+ all_txns = dc.transactions(read_as=DatasetType.PANDAS)
273
+
274
+ # Basic verification - we should have 4 transactions
275
+ assert len(all_txns) == 4, f"Expected 4 transactions, got {len(all_txns)}"
276
+
277
+ # Test simple case: start_time that should include all transactions
278
+ earliest_time = min(txn["start_time"] for txn in created_txns)
279
+ all_from_start = dc.transactions(
280
+ read_as=DatasetType.PANDAS, start_time=earliest_time
281
+ )
282
+ assert len(all_from_start) == 4
283
+
284
+ # Test start_time filtering after some transactions
285
+ # Get the second-to-last transaction"s start time
286
+ sorted_times = sorted([txn["start_time"] for txn in created_txns])
287
+ mid_time = sorted_times[1] # Second earliest
288
+ recent_txns = dc.transactions(read_as=DatasetType.PANDAS, start_time=mid_time)
289
+
290
+ # Should get at least the transactions at or after that time
291
+ assert (
292
+ len(recent_txns) >= 2
293
+ ), f"Expected at least 2 transactions after mid_time, got {len(recent_txns)}"
294
+ assert all(t >= mid_time for t in recent_txns["start_time"])
295
+
296
+ # Test future start_time (should return empty)
297
+ future_time = time.time_ns() + 1000000000 # 1 second in future
298
+ future_txns = dc.transactions(
299
+ read_as=DatasetType.PANDAS, start_time=future_time
300
+ )
301
+ assert len(future_txns) == 0
302
+
303
+ def test_limit_and_pagination(self, temp_catalog_properties):
304
+ """Test limiting results and pagination behavior."""
305
+ # Initialize catalog using the fixture
306
+ dc.init()
307
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
308
+
309
+ self.create_test_transactions()
310
+
311
+ # Test limit
312
+ limited = dc.transactions(read_as=DatasetType.PANDAS, limit=2)
313
+ assert len(limited) == 2
314
+
315
+ # Test limit larger than available
316
+ all_limited = dc.transactions(read_as=DatasetType.PANDAS, limit=10)
317
+ assert len(all_limited) == 4 # Only 4 transactions exist
318
+
319
+ # Test limit=0 (should raise ValueError for invalid limit)
320
+ with pytest.raises(ValueError):
321
+ dc.transactions(read_as=DatasetType.PANDAS, limit=0)
322
+
323
+ # Test limit=1 with different states
324
+ single_all_states = dc.transactions(
325
+ read_as=DatasetType.PANDAS,
326
+ limit=1,
327
+ status_in=[
328
+ TransactionStatus.SUCCESS,
329
+ TransactionStatus.RUNNING,
330
+ TransactionStatus.FAILED,
331
+ TransactionStatus.PAUSED,
332
+ ],
333
+ )
334
+ assert len(single_all_states) == 1
335
+
336
+ # Verify limit respects sorting (most recent first)
337
+ all_txns = dc.transactions(read_as=DatasetType.PANDAS)
338
+ limited_txns = dc.transactions(read_as=DatasetType.PANDAS, limit=2)
339
+
340
+ # Limited should be the first 2 from the full list
341
+ assert (
342
+ limited_txns["transaction_id"].tolist()
343
+ == all_txns["transaction_id"].head(2).tolist()
344
+ )
345
+
346
+ def test_commit_message_functionality(self, temp_catalog_properties):
347
+ """Test commit message storage and retrieval."""
348
+ # Initialize catalog using the fixture
349
+ dc.init()
350
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
351
+
352
+ # Create transactions with various commit message patterns
353
+ test_messages = [
354
+ "Simple commit message",
355
+ "Multi-word commit with special chars: @#$%",
356
+ "", # Empty commit message
357
+ "Very long commit message that contains lots of text to test handling of lengthy descriptions and ensure they are properly stored and retrieved without truncation or corruption",
358
+ "Commit with\nmultiple\nlines",
359
+ "Unicode test: 🚀 ñ ü é",
360
+ ]
361
+
362
+ created_txn_ids = []
363
+ for msg in test_messages:
364
+ data = pd.DataFrame({"id": [1], "value": [f"data_{len(created_txn_ids)}"]})
365
+ with dc.transaction(commit_message=msg) as txn:
366
+ dc.write(data, f"table_{len(created_txn_ids)}")
367
+ created_txn_ids.append(txn.id)
368
+
369
+ # Query and verify all commit messages
370
+ result = dc.transactions(read_as=DatasetType.PANDAS)
371
+
372
+ # Create mapping of transaction_id to commit_message
373
+ result_messages = {
374
+ row["transaction_id"]: row["commit_message"] for _, row in result.iterrows()
375
+ }
376
+
377
+ # Verify each commit message is preserved correctly
378
+ for txn_id, expected_msg in zip(created_txn_ids, test_messages):
379
+ assert txn_id in result_messages
380
+ actual_msg = result_messages[txn_id]
381
+ # Handle empty commit messages
382
+ if expected_msg == "" and actual_msg is None:
383
+ continue
384
+ assert (
385
+ actual_msg == expected_msg
386
+ ), f"Commit message mismatch for {txn_id}: expected {expected_msg!r}, got {actual_msg!r}"
387
+
388
+ def test_transaction_metadata_accuracy(self, temp_catalog_properties):
389
+ """Test accuracy of operation counts and table counts."""
390
+ # Initialize catalog using the fixture
391
+ dc.init()
392
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
393
+
394
+ # Create transactions with known characteristics
395
+ test_cases = []
396
+
397
+ # Single table, single operation
398
+ data1 = pd.DataFrame({"id": [1], "name": ["test"]})
399
+ with dc.transaction(commit_message="Single table") as txn:
400
+ dc.write(data1, "single_table")
401
+ test_cases.append({"id": txn.id, "expected_tables": 1})
402
+
403
+ # Multi-table, multiple operations
404
+ data2a = pd.DataFrame({"id": [1], "name": ["table_a"]})
405
+ data2b = pd.DataFrame({"id": [1], "name": ["table_b"]})
406
+ data2c = pd.DataFrame({"id": [1], "name": ["table_c"]})
407
+
408
+ with dc.transaction(commit_message="Multi table") as txn:
409
+ dc.write(data2a, "multi_a", namespace="test_ns")
410
+ dc.write(data2b, "multi_b", namespace="test_ns")
411
+ dc.write(data2c, "multi_c", namespace="another_ns")
412
+ test_cases.append({"id": txn.id, "expected_tables": 3})
413
+
414
+ # Query results
415
+ result = dc.transactions(read_as=DatasetType.PANDAS)
416
+
417
+ # Create mapping of transaction_id to metadata
418
+ result_metadata = {
419
+ row["transaction_id"]: {
420
+ "table_count": row["table_count"],
421
+ "operation_count": row["operation_count"],
422
+ }
423
+ for _, row in result.iterrows()
424
+ }
425
+
426
+ # Verify metadata for each test case
427
+ for test_case in test_cases:
428
+ txn_id = test_case["id"]
429
+ assert txn_id in result_metadata
430
+
431
+ metadata = result_metadata[txn_id]
432
+
433
+ # Verify table count
434
+ expected_table_count = test_case["expected_tables"]
435
+ actual_table_count = metadata["table_count"]
436
+ assert (
437
+ actual_table_count == expected_table_count
438
+ ), f"Table count mismatch for {txn_id}: expected {expected_table_count}, got {actual_table_count}"
439
+
440
+ # Verify operation count is reasonable (should be > table count due to internal operations)
441
+ assert (
442
+ metadata["operation_count"] > 0
443
+ ), f"Operation count should be > 0 for {txn_id}"
444
+ assert (
445
+ metadata["operation_count"] >= metadata["table_count"]
446
+ ), f"Operation count should be >= table count for {txn_id}"
447
+
448
+ def test_empty_catalog_graceful_handling(self, temp_catalog_properties):
449
+ """Test graceful handling of catalogs with no transactions."""
450
+ # Initialize catalog using the fixture
451
+ dc.init()
452
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
453
+
454
+ # Test all parameter combinations on empty catalog
455
+ test_cases = [
456
+ {},
457
+ {"limit": 5},
458
+ {"status_in": [TransactionStatus.SUCCESS, TransactionStatus.RUNNING]},
459
+ {"status_in": [TransactionStatus.SUCCESS, TransactionStatus.FAILED]},
460
+ {"status_in": [TransactionStatus.SUCCESS, TransactionStatus.PAUSED]},
461
+ {
462
+ "status_in": [
463
+ TransactionStatus.SUCCESS,
464
+ TransactionStatus.RUNNING,
465
+ TransactionStatus.FAILED,
466
+ TransactionStatus.PAUSED,
467
+ ]
468
+ },
469
+ {"start_time": time.time_ns() - 3600000000000}, # 1 hour ago
470
+ {"end_time": time.time_ns()},
471
+ {
472
+ "limit": 1,
473
+ "status_in": [
474
+ TransactionStatus.SUCCESS,
475
+ TransactionStatus.RUNNING,
476
+ TransactionStatus.FAILED,
477
+ TransactionStatus.PAUSED,
478
+ ],
479
+ },
480
+ ]
481
+
482
+ for params in test_cases:
483
+ result = dc.transactions(read_as=DatasetType.PANDAS, **params)
484
+
485
+ assert isinstance(result, pd.DataFrame), f"Failed for params {params}"
486
+ assert len(result) == 0, f"Expected empty result for params {params}"
487
+
488
+ # Verify schema is correct even for empty results
489
+ expected_columns = [
490
+ "transaction_id",
491
+ "commit_message",
492
+ "start_time",
493
+ "end_time",
494
+ "status",
495
+ "operation_count",
496
+ "operation_types",
497
+ "namespace_count",
498
+ "table_count",
499
+ "table_version_count",
500
+ "stream_count",
501
+ "partition_count",
502
+ "delta_count",
503
+ ]
504
+ assert (
505
+ list(result.columns) == expected_columns
506
+ ), f"Schema mismatch for params {params}"
507
+
508
+ def test_error_handling_and_edge_cases(self, temp_catalog_properties):
509
+ """Test error handling for various edge cases."""
510
+ # Initialize catalog using the fixture
511
+ dc.init()
512
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
513
+
514
+ self.create_test_transactions()
515
+
516
+ # Test invalid dataset type
517
+ with pytest.raises((ValueError, AttributeError)):
518
+ dc.transactions(read_as="INVALID_TYPE")
519
+
520
+ # Test negative limit (should raise ValueError for invalid limit)
521
+ with pytest.raises(ValueError):
522
+ dc.transactions(read_as=DatasetType.PANDAS, limit=-1)
523
+
524
+ # Test invalid time values
525
+ # Very old start_time (should work, return all transactions)
526
+ old_time_result = dc.transactions(
527
+ read_as=DatasetType.PANDAS, start_time=1000000000 # Very old timestamp
528
+ )
529
+ assert len(old_time_result) == 4 # All transactions
530
+
531
+ # start_time > end_time (should return empty)
532
+ invalid_time_result = dc.transactions(
533
+ read_as=DatasetType.PANDAS,
534
+ start_time=time.time_ns(),
535
+ end_time=time.time_ns() - 1000000000, # 1 second ago
536
+ )
537
+ assert len(invalid_time_result) == 0
538
+
539
+ def test_status_in_corner_cases(self, temp_catalog_properties):
540
+ """Test corner cases for status_in parameter."""
541
+ # Initialize catalog using the fixture
542
+ dc.init()
543
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
544
+
545
+ self.create_test_transactions()
546
+
547
+ # Get baseline count of SUCCESS transactions
548
+ baseline_result = dc.transactions(read_as=DatasetType.PANDAS)
549
+ baseline_count = len(baseline_result)
550
+ assert baseline_count == 4 # We created 4 transactions
551
+ assert all(baseline_result["status"] == "SUCCESS")
552
+
553
+ # Test status_in=None (should default to SUCCESS)
554
+ none_result = dc.transactions(read_as=DatasetType.PANDAS, status_in=None)
555
+ assert len(none_result) == baseline_count
556
+ assert all(none_result["status"] == "SUCCESS")
557
+
558
+ # Test status_in=[] (empty list - should default to SUCCESS)
559
+ empty_result = dc.transactions(read_as=DatasetType.PANDAS, status_in=[])
560
+ assert len(empty_result) == baseline_count
561
+ assert all(empty_result["status"] == "SUCCESS")
562
+
563
+ # Verify the results are identical
564
+ assert len(baseline_result) == len(none_result) == len(empty_result)
565
+
566
+ # Test that the transaction IDs are the same (same transactions returned)
567
+ baseline_ids = set(baseline_result["transaction_id"])
568
+ none_ids = set(none_result["transaction_id"])
569
+ empty_ids = set(empty_result["transaction_id"])
570
+
571
+ assert baseline_ids == none_ids == empty_ids
572
+
573
+ def test_concurrent_transaction_handling(self, temp_catalog_properties):
574
+ """Test behavior when transactions are created while querying."""
575
+ # Initialize catalog using the fixture
576
+ dc.init()
577
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
578
+
579
+ # Create initial transactions
580
+ initial_count = 2
581
+ for i in range(initial_count):
582
+ data = pd.DataFrame({"id": [i], "name": [f"user_{i}"]})
583
+ with dc.transaction(commit_message=f"Transaction {i}"):
584
+ dc.write(data, f"table_{i}")
585
+
586
+ # Query initial state
587
+ initial_result = dc.transactions(read_as=DatasetType.PANDAS)
588
+ assert len(initial_result) == initial_count
589
+
590
+ # Create another transaction
591
+ new_data = pd.DataFrame({"id": [999], "name": ["new_user"]})
592
+ with dc.transaction(commit_message="New transaction"):
593
+ dc.write(new_data, "new_table")
594
+
595
+ # Query updated state
596
+ updated_result = dc.transactions(read_as=DatasetType.PANDAS)
597
+ assert len(updated_result) == initial_count + 1
598
+
599
+ # Verify new transaction appears first (most recent)
600
+ assert updated_result.iloc[0]["commit_message"] == "New transaction"
601
+
602
+ def test_namespace_isolation_in_table_counting(self, temp_catalog_properties):
603
+ """Test that table counting correctly handles namespace isolation."""
604
+ # Initialize catalog using the fixture
605
+ dc.init()
606
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
607
+
608
+ # Create transaction with tables in different namespaces
609
+ data = pd.DataFrame({"id": [1], "value": ["test"]})
610
+
611
+ with dc.transaction(commit_message="Multi-namespace transaction") as txn:
612
+ # Same table name in different namespaces should count as different tables
613
+ dc.write(data, "shared_name", namespace="namespace_a")
614
+ dc.write(data, "shared_name", namespace="namespace_b")
615
+ dc.write(data, "unique_name", namespace="namespace_a")
616
+ expected_txn_id = txn.id
617
+
618
+ result = dc.transactions(read_as=DatasetType.PANDAS)
619
+
620
+ # Find our transaction
621
+ our_txn = result[result["transaction_id"] == expected_txn_id]
622
+ assert len(our_txn) == 1
623
+
624
+ # Should count distinct tables
625
+ table_count = our_txn.iloc[0]["table_count"]
626
+ assert table_count == 3, f"Expected 3 tables, got {table_count}"
627
+
628
+ table_count = our_txn.iloc[0]["table_version_count"]
629
+ assert table_count == 3, f"Expected 3 table versions, got {table_count}"
630
+
631
+ stream_count = our_txn.iloc[0]["stream_count"]
632
+ assert stream_count == 3, f"Expected 3 streams, got {stream_count}"
633
+
634
+ partition_count = our_txn.iloc[0]["partition_count"]
635
+ assert partition_count == 3, f"Expected 3 partitions, got {partition_count}"
636
+
637
+ delta_count = our_txn.iloc[0]["delta_count"]
638
+ assert delta_count == 3, f"Expected 3 deltas, got {delta_count}"
639
+
640
+ def test_parameter_combinations(self, temp_catalog_properties):
641
+ """Test various parameter combinations work correctly."""
642
+ # Initialize catalog using the fixture
643
+ dc.init()
644
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
645
+
646
+ self.create_test_transactions()
647
+
648
+ # Complex parameter combinations
649
+ test_combinations = [
650
+ # Time + limit
651
+ {"start_time": time.time_ns() - 3600000000000, "limit": 2},
652
+ # States + limit
653
+ {
654
+ "status_in": [
655
+ TransactionStatus.SUCCESS,
656
+ TransactionStatus.RUNNING,
657
+ TransactionStatus.FAILED,
658
+ TransactionStatus.PAUSED,
659
+ ],
660
+ "limit": 1,
661
+ },
662
+ # Time + states
663
+ {
664
+ "start_time": time.time_ns() - 3600000000000,
665
+ "status_in": [TransactionStatus.SUCCESS, TransactionStatus.RUNNING],
666
+ },
667
+ # Everything combined
668
+ {
669
+ "start_time": time.time_ns() - 3600000000000,
670
+ "end_time": time.time_ns(),
671
+ "limit": 3,
672
+ "status_in": [
673
+ TransactionStatus.SUCCESS,
674
+ TransactionStatus.RUNNING,
675
+ TransactionStatus.FAILED,
676
+ TransactionStatus.PAUSED,
677
+ ],
678
+ },
679
+ ]
680
+
681
+ for params in test_combinations:
682
+ result = dc.transactions(read_as=DatasetType.PANDAS, **params)
683
+
684
+ # Should not crash and should return valid DataFrame
685
+ assert isinstance(result, pd.DataFrame)
686
+ assert len(result) >= 0
687
+
688
+ # If limit is specified, result should not exceed it
689
+ if "limit" in params and params["limit"] > 0:
690
+ assert len(result) <= params["limit"]
691
+
692
+
693
+ class TestTransactionHistoryRegression:
694
+ """Regression tests to ensure consistent behavior over time."""
695
+
696
+ def setup_method(self):
697
+ """Set up fresh catalog for each test."""
698
+ dc.clear_catalogs()
699
+
700
+ def teardown_method(self):
701
+ """Clean up after each test."""
702
+ dc.clear_catalogs()
703
+
704
+ def test_schema_consistency(self, temp_catalog_properties):
705
+ """Ensure the output schema remains consistent."""
706
+ # Initialize catalog using the fixture
707
+ dc.init()
708
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
709
+
710
+ # Create a simple transaction
711
+ data = pd.DataFrame({"id": [1], "name": ["test"]})
712
+ with dc.transaction(commit_message="Schema test"):
713
+ dc.write(data, "test_table")
714
+
715
+ result = dc.transactions(read_as=DatasetType.PANDAS)
716
+
717
+ # Verify exact schema
718
+ expected_columns = [
719
+ "transaction_id",
720
+ "commit_message",
721
+ "start_time",
722
+ "end_time",
723
+ "status",
724
+ "operation_count",
725
+ "operation_types",
726
+ "namespace_count",
727
+ "table_count",
728
+ "table_version_count",
729
+ "stream_count",
730
+ "partition_count",
731
+ "delta_count",
732
+ ]
733
+ assert list(result.columns) == expected_columns
734
+
735
+ # Verify data types
736
+ assert result["transaction_id"].dtype == "object" # String
737
+ assert result["commit_message"].dtype == "object" # String
738
+ assert result["start_time"].dtype == "int64" # Integer timestamp
739
+ assert result["end_time"].dtype == "int64" # Integer timestamp
740
+ assert result["status"].dtype == "object" # String
741
+ assert result["operation_count"].dtype == "int64" # Integer
742
+ assert result["table_count"].dtype == "int64" # Integer
743
+
744
+ def test_sorting_consistency(self, temp_catalog_properties):
745
+ """Ensure transactions are consistently sorted by start_time descending."""
746
+ # Initialize catalog using the fixture
747
+ dc.init()
748
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
749
+
750
+ # Create transactions with deliberate timing
751
+ transaction_times = []
752
+ for i in range(5):
753
+ data = pd.DataFrame({"id": [i], "name": [f"user_{i}"]})
754
+ with dc.transaction(commit_message=f"Transaction {i}") as txn:
755
+ dc.write(data, f"table_{i}")
756
+ transaction_times.append(txn.start_time)
757
+ time.sleep(0.01) # Small delay
758
+
759
+ result = dc.transactions(read_as=DatasetType.PANDAS)
760
+
761
+ # Verify descending order by start_time
762
+ result_times = result["start_time"].tolist()
763
+ expected_times = sorted(transaction_times, reverse=True)
764
+
765
+ assert (
766
+ result_times == expected_times
767
+ ), "Transactions not properly sorted by start_time descending"
768
+
769
+ def test_function_signature_stability(self, temp_catalog_properties):
770
+ """Ensure function signature remains stable."""
771
+ # Initialize catalog using the fixture (though not needed for signature test)
772
+ dc.init()
773
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
774
+
775
+ sig = inspect.signature(dc.transactions)
776
+ expected_params = [
777
+ "catalog_name",
778
+ "read_as",
779
+ "start_time",
780
+ "end_time",
781
+ "limit",
782
+ "status_in",
783
+ ]
784
+
785
+ actual_params = list(sig.parameters.keys())
786
+ assert (
787
+ actual_params == expected_params
788
+ ), f"Function signature changed: {actual_params}"
789
+
790
+ # Verify default values
791
+ assert sig.parameters["catalog_name"].default is None
792
+ assert sig.parameters["read_as"].default is None
793
+ assert sig.parameters["start_time"].default is None
794
+ assert sig.parameters["end_time"].default is None
795
+ assert sig.parameters["limit"].default is None
796
+ assert sig.parameters["status_in"].default == [TransactionStatus.SUCCESS]
797
+
798
+ def test_read_transaction(self, temp_catalog_properties):
799
+ """Test the read_transaction() function for loading transactions returned by transactions()."""
800
+
801
+ # Initialize a clean catalog for testing using the fixture
802
+ dc.init()
803
+ dc.put_catalog("test", Catalog(temp_catalog_properties))
804
+
805
+ # Create multiple transactions with data
806
+ commit_msg_1 = "First test transaction"
807
+ commit_msg_2 = "Second test transaction"
808
+
809
+ # Create first transaction
810
+ data1 = pd.DataFrame({"id": [1, 2], "name": ["Alice", "Bob"]})
811
+ with dc.transaction(commit_message=commit_msg_1):
812
+ dc.write(data1, "users")
813
+
814
+ # Create second transaction
815
+ data2 = pd.DataFrame({"id": [3, 4], "name": ["Charlie", "Diana"]})
816
+ with dc.transaction(commit_message=commit_msg_2):
817
+ dc.write(data2, "customers")
818
+
819
+ # Test transactions() query functionality
820
+ result = dc.transactions(read_as=DatasetType.PANDAS)
821
+
822
+ # Verify we have the right number of transactions
823
+ assert len(result) == 2
824
+
825
+ # Verify column structure
826
+ expected_columns = [
827
+ "transaction_id",
828
+ "commit_message",
829
+ "start_time",
830
+ "end_time",
831
+ "status",
832
+ "operation_count",
833
+ "operation_types",
834
+ "namespace_count",
835
+ "table_count",
836
+ "table_version_count",
837
+ "stream_count",
838
+ "partition_count",
839
+ "delta_count",
840
+ ]
841
+ assert list(result.columns) == expected_columns
842
+
843
+ # Verify commit messages are preserved
844
+ commit_messages = set(result["commit_message"])
845
+ assert commit_msg_1 in commit_messages
846
+ assert commit_msg_2 in commit_messages
847
+
848
+ # Verify transaction metadata
849
+ assert all(result["status"] == "SUCCESS")
850
+ assert all(result["operation_count"] > 0)
851
+ assert all(result["namespace_count"] == 1)
852
+ assert all(result["table_count"] == 1)
853
+ assert all(result["table_version_count"] == 1)
854
+ assert all(result["stream_count"] == 1)
855
+ assert all(result["partition_count"] == 1)
856
+ assert all(result["delta_count"] == 1)
857
+
858
+ # Read and validate the transactions
859
+ transaction_id = result.iloc[0]["transaction_id"]
860
+ transaction_obj = dc.read_transaction(transaction_id)
861
+ assert transaction_obj.id == transaction_id
862
+ assert transaction_obj.commit_message == commit_msg_2
863
+ assert transaction_obj.start_time == result.iloc[0]["start_time"]
864
+ assert transaction_obj.end_time == result.iloc[0]["end_time"]
865
+ assert (
866
+ transaction_obj.state(temp_catalog_properties.root)
867
+ == TransactionState.SUCCESS
868
+ )
869
+ assert (
870
+ len(transaction_obj.operations) == 24 == result.iloc[0]["operation_count"]
871
+ )
872
+
873
+ transaction_id = result.iloc[1]["transaction_id"]
874
+ transaction_obj = dc.read_transaction(transaction_id)
875
+ assert transaction_obj.id == transaction_id
876
+ assert transaction_obj.commit_message == commit_msg_1
877
+ assert transaction_obj.start_time == result.iloc[1]["start_time"]
878
+ assert transaction_obj.end_time == result.iloc[1]["end_time"]
879
+ assert (
880
+ transaction_obj.state(temp_catalog_properties.root)
881
+ == TransactionState.SUCCESS
882
+ )
883
+ # 1st transaction contains more operations than 2nd since only it needed to create the namespace
884
+ assert (
885
+ len(transaction_obj.operations) == 26 == result.iloc[1]["operation_count"]
886
+ )