@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,1187 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use async_trait::async_trait;
5
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
6
+ use datafusion::arrow::compute::{and, filter_record_batch};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::RecordBatch;
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::dml::InsertOp;
14
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
15
+ use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
16
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
17
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
18
+ use datafusion::physical_plan::{
19
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
20
+ };
21
+ use futures_util::{stream, TryStreamExt};
22
+ use serde_json::Value as JsonValue;
23
+
24
+ use crate::live_state::{LiveStateFilter, LiveStateReader, LiveStateRow, LiveStateScanRequest};
25
+ use crate::sql2::dml::{InsertExec, InsertSink};
26
+ use crate::sql2::record_batch::record_batch_with_row_count;
27
+ use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
28
+ use crate::sql2::{
29
+ SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
30
+ };
31
+ use crate::transaction::types::{
32
+ LogicalPrimaryKey, StageRow, StageRowOrigin, StageWrite, StageWriteMode, StageWriteOperation,
33
+ };
34
+ use crate::version::{
35
+ version_descriptor_stage_row, version_descriptor_tombstone_row, version_ref_stage_row,
36
+ version_ref_tombstone_row, VersionRefReader,
37
+ };
38
+ use crate::LixError;
39
+ use crate::GLOBAL_VERSION_ID;
40
+
41
+ pub(crate) async fn register_lix_version_provider(
42
+ session: &datafusion::prelude::SessionContext,
43
+ live_state: Arc<dyn LiveStateReader>,
44
+ version_ref: Arc<dyn VersionRefReader>,
45
+ ) -> Result<(), LixError> {
46
+ session
47
+ .register_table(
48
+ "lix_version",
49
+ Arc::new(LixVersionProvider::new(live_state, version_ref)),
50
+ )
51
+ .map_err(datafusion_error_to_lix_error)?;
52
+ Ok(())
53
+ }
54
+
55
+ pub(crate) async fn register_lix_version_write_provider(
56
+ session: &datafusion::prelude::SessionContext,
57
+ write_ctx: SqlWriteContext,
58
+ ) -> Result<(), LixError> {
59
+ session
60
+ .register_table(
61
+ "lix_version",
62
+ Arc::new(LixVersionProvider::with_write(write_ctx)),
63
+ )
64
+ .map_err(datafusion_error_to_lix_error)?;
65
+ Ok(())
66
+ }
67
+
68
+ struct LixVersionProvider {
69
+ schema: SchemaRef,
70
+ live_state: Arc<dyn LiveStateReader>,
71
+ version_ref: Arc<dyn VersionRefReader>,
72
+ write_access: WriteAccess,
73
+ }
74
+
75
+ impl std::fmt::Debug for LixVersionProvider {
76
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77
+ f.debug_struct("LixVersionProvider").finish()
78
+ }
79
+ }
80
+
81
+ impl LixVersionProvider {
82
+ fn new(live_state: Arc<dyn LiveStateReader>, version_ref: Arc<dyn VersionRefReader>) -> Self {
83
+ Self {
84
+ schema: lix_version_schema(),
85
+ live_state,
86
+ version_ref,
87
+ write_access: WriteAccess::read_only(),
88
+ }
89
+ }
90
+
91
+ fn with_write(write_ctx: SqlWriteContext) -> Self {
92
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
93
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
94
+ Self {
95
+ schema: lix_version_schema(),
96
+ live_state,
97
+ version_ref,
98
+ write_access: WriteAccess::write(write_ctx),
99
+ }
100
+ }
101
+ }
102
+
103
+ #[async_trait]
104
+ impl TableProvider for LixVersionProvider {
105
+ fn as_any(&self) -> &dyn Any {
106
+ self
107
+ }
108
+
109
+ fn schema(&self) -> SchemaRef {
110
+ Arc::clone(&self.schema)
111
+ }
112
+
113
+ fn table_type(&self) -> TableType {
114
+ TableType::Base
115
+ }
116
+
117
+ fn supports_filters_pushdown(
118
+ &self,
119
+ filters: &[&Expr],
120
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
121
+ Ok(filters
122
+ .iter()
123
+ .map(|_| TableProviderFilterPushDown::Unsupported)
124
+ .collect())
125
+ }
126
+
127
+ async fn scan(
128
+ &self,
129
+ _state: &dyn Session,
130
+ projection: Option<&Vec<usize>>,
131
+ _filters: &[Expr],
132
+ _limit: Option<usize>,
133
+ ) -> Result<Arc<dyn ExecutionPlan>> {
134
+ Ok(Arc::new(LixVersionScanExec::new(
135
+ Arc::clone(&self.live_state),
136
+ Arc::clone(&self.version_ref),
137
+ projected_schema(&self.schema, projection),
138
+ projection.cloned(),
139
+ )))
140
+ }
141
+
142
+ async fn insert_into(
143
+ &self,
144
+ _state: &dyn Session,
145
+ input: Arc<dyn ExecutionPlan>,
146
+ insert_op: InsertOp,
147
+ ) -> Result<Arc<dyn ExecutionPlan>> {
148
+ if insert_op != InsertOp::Append {
149
+ return not_impl_err!("{insert_op} not implemented for lix_version yet");
150
+ }
151
+
152
+ let write_ctx = self.write_access.require_write("INSERT into lix_version")?;
153
+ let sink = LixVersionInsertSink::new(input.schema(), write_ctx);
154
+ Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
155
+ }
156
+
157
+ async fn delete_from(
158
+ &self,
159
+ state: &dyn Session,
160
+ filters: Vec<Expr>,
161
+ ) -> Result<Arc<dyn ExecutionPlan>> {
162
+ let write_ctx = self.write_access.require_write("DELETE FROM lix_version")?;
163
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
164
+ let physical_filters = filters
165
+ .iter()
166
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
167
+ .collect::<Result<Vec<_>>>()?;
168
+
169
+ Ok(Arc::new(LixVersionDeleteExec::new(
170
+ write_ctx,
171
+ Arc::clone(&self.live_state),
172
+ Arc::clone(&self.version_ref),
173
+ Arc::clone(&self.schema),
174
+ physical_filters,
175
+ )))
176
+ }
177
+
178
+ async fn update(
179
+ &self,
180
+ state: &dyn Session,
181
+ assignments: Vec<(String, Expr)>,
182
+ filters: Vec<Expr>,
183
+ ) -> Result<Arc<dyn ExecutionPlan>> {
184
+ let write_ctx = self.write_access.require_write("UPDATE lix_version")?;
185
+ validate_lix_version_update_assignments(&assignments)?;
186
+
187
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
188
+ let physical_assignments = assignments
189
+ .iter()
190
+ .map(|(column_name, expr)| {
191
+ Ok((
192
+ column_name.clone(),
193
+ create_physical_expr(expr, &df_schema, state.execution_props())?,
194
+ ))
195
+ })
196
+ .collect::<Result<Vec<_>>>()?;
197
+ let physical_filters = filters
198
+ .iter()
199
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
200
+ .collect::<Result<Vec<_>>>()?;
201
+
202
+ Ok(Arc::new(LixVersionUpdateExec::new(
203
+ write_ctx,
204
+ Arc::clone(&self.live_state),
205
+ Arc::clone(&self.version_ref),
206
+ Arc::clone(&self.schema),
207
+ physical_assignments,
208
+ physical_filters,
209
+ )))
210
+ }
211
+ }
212
+
213
+ struct LixVersionInsertSink {
214
+ write_ctx: SqlWriteContext,
215
+ }
216
+
217
+ impl std::fmt::Debug for LixVersionInsertSink {
218
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219
+ f.debug_struct("LixVersionInsertSink").finish()
220
+ }
221
+ }
222
+
223
+ impl LixVersionInsertSink {
224
+ fn new(_schema: SchemaRef, write_ctx: SqlWriteContext) -> Self {
225
+ Self { write_ctx }
226
+ }
227
+ }
228
+
229
+ impl DisplayAs for LixVersionInsertSink {
230
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231
+ match t {
232
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
233
+ write!(f, "LixVersionInsertSink")
234
+ }
235
+ DisplayFormatType::TreeRender => write!(f, "LixVersionInsertSink"),
236
+ }
237
+ }
238
+ }
239
+
240
+ #[async_trait]
241
+ impl InsertSink for LixVersionInsertSink {
242
+ async fn write_batches(
243
+ &self,
244
+ batches: Vec<RecordBatch>,
245
+ _context: &Arc<TaskContext>,
246
+ ) -> Result<u64> {
247
+ let default_commit_id = self
248
+ .write_ctx
249
+ .load_version_head(&self.write_ctx.active_version_id())
250
+ .await
251
+ .map_err(lix_error_to_datafusion_error)?
252
+ .ok_or_else(|| {
253
+ DataFusionError::Execution(
254
+ "INSERT into lix_version could not resolve active version head".to_string(),
255
+ )
256
+ })?;
257
+ let mut rows = Vec::new();
258
+ let mut count = 0u64;
259
+ for batch in batches {
260
+ let version_rows = version_insert_rows_from_batch(&batch, &default_commit_id)?;
261
+ count = count
262
+ .checked_add(u64::try_from(version_rows.len()).map_err(|_| {
263
+ DataFusionError::Execution("INSERT row count overflow".to_string())
264
+ })?)
265
+ .ok_or_else(|| DataFusionError::Execution("INSERT row count overflow".into()))?;
266
+ rows.extend(version_rows.into_iter().flat_map(version_insert_stage_rows));
267
+ }
268
+
269
+ if !rows.is_empty() {
270
+ self.write_ctx
271
+ .stage_write(StageWrite::Rows {
272
+ mode: StageWriteMode::Insert,
273
+ rows,
274
+ })
275
+ .await
276
+ .map_err(lix_error_to_datafusion_error)?;
277
+ }
278
+
279
+ Ok(count)
280
+ }
281
+ }
282
+
283
+ struct LixVersionDeleteExec {
284
+ write_ctx: SqlWriteContext,
285
+ active_version_id: String,
286
+ live_state: Arc<dyn LiveStateReader>,
287
+ version_ref: Arc<dyn VersionRefReader>,
288
+ table_schema: SchemaRef,
289
+ filters: Vec<Arc<dyn PhysicalExpr>>,
290
+ result_schema: SchemaRef,
291
+ properties: Arc<PlanProperties>,
292
+ }
293
+
294
+ impl std::fmt::Debug for LixVersionDeleteExec {
295
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
296
+ f.debug_struct("LixVersionDeleteExec").finish()
297
+ }
298
+ }
299
+
300
+ impl LixVersionDeleteExec {
301
+ fn new(
302
+ write_ctx: SqlWriteContext,
303
+ live_state: Arc<dyn LiveStateReader>,
304
+ version_ref: Arc<dyn VersionRefReader>,
305
+ table_schema: SchemaRef,
306
+ filters: Vec<Arc<dyn PhysicalExpr>>,
307
+ ) -> Self {
308
+ let result_schema = dml_count_schema();
309
+ let properties = dml_plan_properties(Arc::clone(&result_schema));
310
+ let active_version_id = write_ctx.active_version_id();
311
+ Self {
312
+ write_ctx,
313
+ active_version_id,
314
+ live_state,
315
+ version_ref,
316
+ table_schema,
317
+ filters,
318
+ result_schema,
319
+ properties: Arc::new(properties),
320
+ }
321
+ }
322
+ }
323
+
324
+ impl DisplayAs for LixVersionDeleteExec {
325
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326
+ match t {
327
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
328
+ write!(f, "LixVersionDeleteExec(filters={})", self.filters.len())
329
+ }
330
+ DisplayFormatType::TreeRender => write!(f, "LixVersionDeleteExec"),
331
+ }
332
+ }
333
+ }
334
+
335
+ impl ExecutionPlan for LixVersionDeleteExec {
336
+ fn name(&self) -> &str {
337
+ "LixVersionDeleteExec"
338
+ }
339
+
340
+ fn as_any(&self) -> &dyn Any {
341
+ self
342
+ }
343
+
344
+ fn properties(&self) -> &Arc<PlanProperties> {
345
+ &self.properties
346
+ }
347
+
348
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
349
+ Vec::new()
350
+ }
351
+
352
+ fn with_new_children(
353
+ self: Arc<Self>,
354
+ children: Vec<Arc<dyn ExecutionPlan>>,
355
+ ) -> Result<Arc<dyn ExecutionPlan>> {
356
+ if !children.is_empty() {
357
+ return Err(DataFusionError::Execution(
358
+ "LixVersionDeleteExec does not accept children".to_string(),
359
+ ));
360
+ }
361
+ Ok(self)
362
+ }
363
+
364
+ fn execute(
365
+ &self,
366
+ partition: usize,
367
+ _context: Arc<TaskContext>,
368
+ ) -> Result<SendableRecordBatchStream> {
369
+ if partition != 0 {
370
+ return Err(DataFusionError::Execution(format!(
371
+ "LixVersionDeleteExec only exposes one partition, got {partition}"
372
+ )));
373
+ }
374
+ let write_ctx = self.write_ctx.clone();
375
+ let active_version_id = self.active_version_id.clone();
376
+ let live_state = Arc::clone(&self.live_state);
377
+ let version_ref = Arc::clone(&self.version_ref);
378
+ let filters = self.filters.clone();
379
+ let table_schema = Arc::clone(&self.table_schema);
380
+ let result_schema = Arc::clone(&self.result_schema);
381
+ let stream_schema = Arc::clone(&result_schema);
382
+
383
+ let stream = stream::once(async move {
384
+ let rows = load_version_rows(live_state, version_ref)
385
+ .await
386
+ .map_err(lix_error_to_datafusion_error)?;
387
+ let source_batch = version_record_batch(&version_projection_for_scan(None), &rows)?;
388
+ let matched_batch = filter_version_batch(source_batch, &filters)?;
389
+ let version_rows = version_rows_from_batch(&matched_batch)?;
390
+ reject_protected_version_deletes(&version_rows, &active_version_id)?;
391
+ let count = u64::try_from(version_rows.len())
392
+ .map_err(|_| DataFusionError::Execution("DELETE row count overflow".to_string()))?;
393
+ let rows = version_rows
394
+ .into_iter()
395
+ .flat_map(version_tombstone_rows)
396
+ .collect::<Vec<_>>();
397
+
398
+ if !rows.is_empty() {
399
+ write_ctx
400
+ .stage_write(StageWrite::Rows {
401
+ mode: StageWriteMode::Replace,
402
+ rows,
403
+ })
404
+ .await
405
+ .map_err(lix_error_to_datafusion_error)?;
406
+ }
407
+
408
+ let _ = table_schema;
409
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
410
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
411
+ )]))
412
+ })
413
+ .try_flatten();
414
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
415
+ result_schema,
416
+ stream,
417
+ )))
418
+ }
419
+ }
420
+
421
+ struct LixVersionUpdateExec {
422
+ write_ctx: SqlWriteContext,
423
+ live_state: Arc<dyn LiveStateReader>,
424
+ version_ref: Arc<dyn VersionRefReader>,
425
+ table_schema: SchemaRef,
426
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
427
+ filters: Vec<Arc<dyn PhysicalExpr>>,
428
+ result_schema: SchemaRef,
429
+ properties: Arc<PlanProperties>,
430
+ }
431
+
432
+ impl std::fmt::Debug for LixVersionUpdateExec {
433
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434
+ f.debug_struct("LixVersionUpdateExec").finish()
435
+ }
436
+ }
437
+
438
+ impl LixVersionUpdateExec {
439
+ fn new(
440
+ write_ctx: SqlWriteContext,
441
+ live_state: Arc<dyn LiveStateReader>,
442
+ version_ref: Arc<dyn VersionRefReader>,
443
+ table_schema: SchemaRef,
444
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
445
+ filters: Vec<Arc<dyn PhysicalExpr>>,
446
+ ) -> Self {
447
+ let result_schema = dml_count_schema();
448
+ let properties = dml_plan_properties(Arc::clone(&result_schema));
449
+ Self {
450
+ write_ctx,
451
+ live_state,
452
+ version_ref,
453
+ table_schema,
454
+ assignments,
455
+ filters,
456
+ result_schema,
457
+ properties: Arc::new(properties),
458
+ }
459
+ }
460
+ }
461
+
462
+ impl DisplayAs for LixVersionUpdateExec {
463
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
464
+ match t {
465
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
466
+ write!(
467
+ f,
468
+ "LixVersionUpdateExec(assignments={}, filters={})",
469
+ self.assignments.len(),
470
+ self.filters.len()
471
+ )
472
+ }
473
+ DisplayFormatType::TreeRender => write!(f, "LixVersionUpdateExec"),
474
+ }
475
+ }
476
+ }
477
+
478
+ impl ExecutionPlan for LixVersionUpdateExec {
479
+ fn name(&self) -> &str {
480
+ "LixVersionUpdateExec"
481
+ }
482
+
483
+ fn as_any(&self) -> &dyn Any {
484
+ self
485
+ }
486
+
487
+ fn properties(&self) -> &Arc<PlanProperties> {
488
+ &self.properties
489
+ }
490
+
491
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
492
+ Vec::new()
493
+ }
494
+
495
+ fn with_new_children(
496
+ self: Arc<Self>,
497
+ children: Vec<Arc<dyn ExecutionPlan>>,
498
+ ) -> Result<Arc<dyn ExecutionPlan>> {
499
+ if !children.is_empty() {
500
+ return Err(DataFusionError::Execution(
501
+ "LixVersionUpdateExec does not accept children".to_string(),
502
+ ));
503
+ }
504
+ Ok(self)
505
+ }
506
+
507
+ fn execute(
508
+ &self,
509
+ partition: usize,
510
+ _context: Arc<TaskContext>,
511
+ ) -> Result<SendableRecordBatchStream> {
512
+ if partition != 0 {
513
+ return Err(DataFusionError::Execution(format!(
514
+ "LixVersionUpdateExec only exposes one partition, got {partition}"
515
+ )));
516
+ }
517
+ let write_ctx = self.write_ctx.clone();
518
+ let live_state = Arc::clone(&self.live_state);
519
+ let version_ref = Arc::clone(&self.version_ref);
520
+ let table_schema = Arc::clone(&self.table_schema);
521
+ let assignments = self.assignments.clone();
522
+ let filters = self.filters.clone();
523
+ let result_schema = Arc::clone(&self.result_schema);
524
+ let stream_schema = Arc::clone(&result_schema);
525
+
526
+ let stream = stream::once(async move {
527
+ let rows = load_version_rows(live_state, version_ref)
528
+ .await
529
+ .map_err(lix_error_to_datafusion_error)?;
530
+ let source_batch = version_record_batch(&version_projection_for_scan(None), &rows)?;
531
+ let matched_batch = filter_version_batch(source_batch, &filters)?;
532
+ let version_rows =
533
+ version_update_rows_from_batch(&matched_batch, &assignments, &table_schema)?;
534
+ let count = u64::try_from(version_rows.len())
535
+ .map_err(|_| DataFusionError::Execution("UPDATE row count overflow".to_string()))?;
536
+ let rows = version_rows
537
+ .into_iter()
538
+ .flat_map(version_update_stage_rows)
539
+ .collect::<Vec<_>>();
540
+
541
+ if !rows.is_empty() {
542
+ write_ctx
543
+ .stage_write(StageWrite::Rows {
544
+ mode: StageWriteMode::Replace,
545
+ rows,
546
+ })
547
+ .await
548
+ .map_err(lix_error_to_datafusion_error)?;
549
+ }
550
+
551
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
552
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
553
+ )]))
554
+ })
555
+ .try_flatten();
556
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
557
+ result_schema,
558
+ stream,
559
+ )))
560
+ }
561
+ }
562
+
563
+ struct LixVersionScanExec {
564
+ live_state: Arc<dyn LiveStateReader>,
565
+ version_ref: Arc<dyn VersionRefReader>,
566
+ schema: SchemaRef,
567
+ projection: Option<Vec<usize>>,
568
+ properties: Arc<PlanProperties>,
569
+ }
570
+
571
+ impl std::fmt::Debug for LixVersionScanExec {
572
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
573
+ f.debug_struct("LixVersionScanExec").finish()
574
+ }
575
+ }
576
+
577
+ impl LixVersionScanExec {
578
+ fn new(
579
+ live_state: Arc<dyn LiveStateReader>,
580
+ version_ref: Arc<dyn VersionRefReader>,
581
+ schema: SchemaRef,
582
+ projection: Option<Vec<usize>>,
583
+ ) -> Self {
584
+ let properties = PlanProperties::new(
585
+ EquivalenceProperties::new(schema.clone()),
586
+ Partitioning::UnknownPartitioning(1),
587
+ EmissionType::Incremental,
588
+ Boundedness::Bounded,
589
+ );
590
+ Self {
591
+ live_state,
592
+ version_ref,
593
+ schema,
594
+ projection,
595
+ properties: Arc::new(properties),
596
+ }
597
+ }
598
+ }
599
+
600
+ impl DisplayAs for LixVersionScanExec {
601
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
602
+ match t {
603
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
604
+ write!(f, "LixVersionScanExec")
605
+ }
606
+ DisplayFormatType::TreeRender => write!(f, "LixVersionScanExec"),
607
+ }
608
+ }
609
+ }
610
+
611
+ impl ExecutionPlan for LixVersionScanExec {
612
+ fn name(&self) -> &str {
613
+ "LixVersionScanExec"
614
+ }
615
+
616
+ fn as_any(&self) -> &dyn Any {
617
+ self
618
+ }
619
+
620
+ fn properties(&self) -> &Arc<PlanProperties> {
621
+ &self.properties
622
+ }
623
+
624
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
625
+ Vec::new()
626
+ }
627
+
628
+ fn with_new_children(
629
+ self: Arc<Self>,
630
+ children: Vec<Arc<dyn ExecutionPlan>>,
631
+ ) -> Result<Arc<dyn ExecutionPlan>> {
632
+ if !children.is_empty() {
633
+ return Err(DataFusionError::Execution(
634
+ "LixVersionScanExec does not accept children".to_string(),
635
+ ));
636
+ }
637
+ Ok(self)
638
+ }
639
+
640
+ fn execute(
641
+ &self,
642
+ partition: usize,
643
+ _context: Arc<TaskContext>,
644
+ ) -> Result<SendableRecordBatchStream> {
645
+ if partition != 0 {
646
+ return Err(DataFusionError::Execution(format!(
647
+ "LixVersionScanExec only exposes one partition, got {partition}"
648
+ )));
649
+ }
650
+
651
+ let live_state = Arc::clone(&self.live_state);
652
+ let version_ref = Arc::clone(&self.version_ref);
653
+ let projection = version_projection_for_scan(self.projection.as_ref());
654
+ let schema = Arc::clone(&self.schema);
655
+ let stream = stream::once(async move {
656
+ let rows = load_version_rows(live_state, version_ref)
657
+ .await
658
+ .map_err(lix_error_to_datafusion_error)?;
659
+ version_record_batch(&projection, &rows)
660
+ });
661
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
662
+ }
663
+ }
664
+
665
+ #[derive(Debug, Clone, PartialEq, Eq)]
666
+ struct VersionRow {
667
+ id: String,
668
+ name: String,
669
+ hidden: bool,
670
+ commit_id: String,
671
+ }
672
+
673
+ #[derive(Debug, Clone, Copy)]
674
+ enum VersionColumn {
675
+ Id,
676
+ Name,
677
+ Hidden,
678
+ CommitId,
679
+ }
680
+
681
+ async fn load_version_rows(
682
+ live_state: Arc<dyn LiveStateReader>,
683
+ version_ref: Arc<dyn VersionRefReader>,
684
+ ) -> Result<Vec<VersionRow>, LixError> {
685
+ let descriptor_rows = live_state
686
+ .scan_rows(&LiveStateScanRequest {
687
+ filter: LiveStateFilter {
688
+ schema_keys: vec!["lix_version_descriptor".to_string()],
689
+ version_ids: vec![GLOBAL_VERSION_ID.to_string()],
690
+ ..LiveStateFilter::default()
691
+ },
692
+ projection: Default::default(),
693
+ limit: None,
694
+ })
695
+ .await?;
696
+
697
+ let mut out = Vec::new();
698
+ for descriptor_row in descriptor_rows {
699
+ let descriptor = parse_descriptor(&descriptor_row)?;
700
+ let Some(commit_id) = version_ref.load_head_commit_id(&descriptor.id).await? else {
701
+ continue;
702
+ };
703
+ out.push(VersionRow {
704
+ commit_id,
705
+ id: descriptor.id,
706
+ name: descriptor.name,
707
+ hidden: descriptor.hidden,
708
+ });
709
+ }
710
+ Ok(out)
711
+ }
712
+
713
+ #[derive(Debug, Clone, PartialEq, Eq)]
714
+ struct VersionDescriptor {
715
+ id: String,
716
+ name: String,
717
+ hidden: bool,
718
+ }
719
+
720
+ fn parse_descriptor(row: &LiveStateRow) -> Result<VersionDescriptor, LixError> {
721
+ let snapshot = parse_snapshot(row, "lix_version_descriptor")?;
722
+ let id = snapshot
723
+ .get("id")
724
+ .and_then(JsonValue::as_str)
725
+ .ok_or_else(|| LixError::new("LIX_ERROR_UNKNOWN", "lix_version_descriptor is missing id"))?
726
+ .to_string();
727
+ let name = snapshot
728
+ .get("name")
729
+ .and_then(JsonValue::as_str)
730
+ .ok_or_else(|| {
731
+ LixError::new(
732
+ "LIX_ERROR_UNKNOWN",
733
+ "lix_version_descriptor is missing name",
734
+ )
735
+ })?
736
+ .to_string();
737
+ let hidden = snapshot
738
+ .get("hidden")
739
+ .and_then(JsonValue::as_bool)
740
+ .unwrap_or(false);
741
+ Ok(VersionDescriptor { id, name, hidden })
742
+ }
743
+
744
+ fn parse_snapshot(row: &LiveStateRow, schema_key: &str) -> Result<JsonValue, LixError> {
745
+ let snapshot_content = row.snapshot_content.as_deref().ok_or_else(|| {
746
+ LixError::new(
747
+ "LIX_ERROR_UNKNOWN",
748
+ format!("{schema_key} row is missing snapshot_content"),
749
+ )
750
+ })?;
751
+ serde_json::from_str(snapshot_content).map_err(|error| {
752
+ LixError::new(
753
+ "LIX_ERROR_UNKNOWN",
754
+ format!("{schema_key} snapshot_content is invalid JSON: {error}"),
755
+ )
756
+ })
757
+ }
758
+
759
+ fn validate_lix_version_update_assignments(assignments: &[(String, Expr)]) -> Result<()> {
760
+ for (column_name, _) in assignments {
761
+ match column_name.as_str() {
762
+ "name" | "hidden" | "commit_id" => {}
763
+ "id" => {
764
+ return Err(DataFusionError::Execution(
765
+ "UPDATE lix_version cannot change immutable column 'id'".to_string(),
766
+ ));
767
+ }
768
+ other => {
769
+ return Err(DataFusionError::Plan(format!(
770
+ "UPDATE lix_version failed: column '{other}' does not exist"
771
+ )));
772
+ }
773
+ }
774
+ }
775
+ Ok(())
776
+ }
777
+
778
+ fn filter_version_batch(
779
+ batch: RecordBatch,
780
+ filters: &[Arc<dyn PhysicalExpr>],
781
+ ) -> Result<RecordBatch> {
782
+ let Some(mask) = evaluate_version_filters(&batch, filters)? else {
783
+ return Ok(batch);
784
+ };
785
+ Ok(filter_record_batch(&batch, &mask)?)
786
+ }
787
+
788
+ fn evaluate_version_filters(
789
+ batch: &RecordBatch,
790
+ filters: &[Arc<dyn PhysicalExpr>],
791
+ ) -> Result<Option<BooleanArray>> {
792
+ if filters.is_empty() {
793
+ return Ok(None);
794
+ }
795
+
796
+ let mut combined_mask: Option<BooleanArray> = None;
797
+ for filter in filters {
798
+ let result = filter.evaluate(batch)?;
799
+ let array = result.into_array(batch.num_rows())?;
800
+ let bool_array = array
801
+ .as_any()
802
+ .downcast_ref::<BooleanArray>()
803
+ .ok_or_else(|| {
804
+ DataFusionError::Execution("lix_version filter was not boolean".to_string())
805
+ })?;
806
+ let normalized = bool_array
807
+ .iter()
808
+ .map(|value| Some(value == Some(true)))
809
+ .collect::<BooleanArray>();
810
+ combined_mask = Some(match combined_mask {
811
+ Some(existing) => and(&existing, &normalized)?,
812
+ None => normalized,
813
+ });
814
+ }
815
+ Ok(combined_mask)
816
+ }
817
+
818
+ fn version_insert_rows_from_batch(
819
+ batch: &RecordBatch,
820
+ default_commit_id: &str,
821
+ ) -> Result<Vec<VersionRow>> {
822
+ (0..batch.num_rows())
823
+ .map(|row_index| {
824
+ let id = required_string_value(batch, row_index, "id", "INSERT")?;
825
+ let name = required_string_value(batch, row_index, "name", "INSERT")?;
826
+ let hidden =
827
+ optional_bool_value(batch, row_index, "hidden", "INSERT")?.unwrap_or(false);
828
+ let commit_id = optional_string_value(batch, row_index, "commit_id", "INSERT")?
829
+ .unwrap_or_else(|| default_commit_id.to_string());
830
+ Ok(VersionRow {
831
+ id,
832
+ name,
833
+ hidden,
834
+ commit_id,
835
+ })
836
+ })
837
+ .collect()
838
+ }
839
+
840
+ fn version_rows_from_batch(batch: &RecordBatch) -> Result<Vec<VersionRow>> {
841
+ (0..batch.num_rows())
842
+ .map(|row_index| {
843
+ Ok(VersionRow {
844
+ id: required_string_value(batch, row_index, "id", "DELETE")?,
845
+ name: required_string_value(batch, row_index, "name", "DELETE")?,
846
+ hidden: required_bool_value(batch, row_index, "hidden", "DELETE")?,
847
+ commit_id: required_string_value(batch, row_index, "commit_id", "DELETE")?,
848
+ })
849
+ })
850
+ .collect()
851
+ }
852
+
853
+ fn reject_protected_version_deletes(rows: &[VersionRow], active_version_id: &str) -> Result<()> {
854
+ for row in rows {
855
+ if row.id == GLOBAL_VERSION_ID {
856
+ return Err(DataFusionError::Execution(
857
+ "DELETE FROM lix_version cannot delete the global version".to_string(),
858
+ ));
859
+ }
860
+ if row.id == active_version_id {
861
+ return Err(DataFusionError::Execution(format!(
862
+ "DELETE FROM lix_version cannot delete active version '{}'",
863
+ row.id
864
+ )));
865
+ }
866
+ }
867
+ Ok(())
868
+ }
869
+
870
+ fn version_update_rows_from_batch(
871
+ batch: &RecordBatch,
872
+ assignments: &[(String, Arc<dyn PhysicalExpr>)],
873
+ table_schema: &SchemaRef,
874
+ ) -> Result<Vec<VersionRow>> {
875
+ let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
876
+ (0..batch.num_rows())
877
+ .map(|row_index| {
878
+ Ok(VersionRow {
879
+ id: required_string_value(batch, row_index, "id", "UPDATE")?,
880
+ name: update_string_value(
881
+ batch,
882
+ &assignment_values,
883
+ table_schema,
884
+ row_index,
885
+ "name",
886
+ )?,
887
+ hidden: update_bool_value(
888
+ batch,
889
+ &assignment_values,
890
+ table_schema,
891
+ row_index,
892
+ "hidden",
893
+ )?,
894
+ commit_id: update_string_value(
895
+ batch,
896
+ &assignment_values,
897
+ table_schema,
898
+ row_index,
899
+ "commit_id",
900
+ )?,
901
+ })
902
+ })
903
+ .collect()
904
+ }
905
+
906
+ fn version_stage_rows(row: VersionRow, origin: Option<StageRowOrigin>) -> Vec<StageRow> {
907
+ vec![
908
+ with_origin(
909
+ version_descriptor_stage_row(&row.id, &row.name, row.hidden),
910
+ origin.clone(),
911
+ ),
912
+ with_origin(version_ref_stage_row(&row.id, &row.commit_id), origin),
913
+ ]
914
+ }
915
+
916
+ fn version_tombstone_rows(row: VersionRow) -> Vec<StageRow> {
917
+ let origin = Some(lix_version_origin(StageWriteOperation::Delete, &row.id));
918
+ vec![
919
+ with_origin(version_descriptor_tombstone_row(&row.id), origin.clone()),
920
+ with_origin(version_ref_tombstone_row(&row.id), origin),
921
+ ]
922
+ }
923
+
924
+ fn version_insert_stage_rows(row: VersionRow) -> Vec<StageRow> {
925
+ let origin = lix_version_origin(StageWriteOperation::Insert, &row.id);
926
+ version_stage_rows(row, Some(origin))
927
+ }
928
+
929
+ fn version_update_stage_rows(row: VersionRow) -> Vec<StageRow> {
930
+ let origin = lix_version_origin(StageWriteOperation::Update, &row.id);
931
+ version_stage_rows(row, Some(origin))
932
+ }
933
+
934
+ fn with_origin(mut row: StageRow, origin: Option<StageRowOrigin>) -> StageRow {
935
+ row.origin = origin;
936
+ row
937
+ }
938
+
939
+ fn lix_version_origin(operation: StageWriteOperation, version_id: &str) -> StageRowOrigin {
940
+ StageRowOrigin {
941
+ surface: "lix_version".to_string(),
942
+ operation,
943
+ primary_key: Some(LogicalPrimaryKey {
944
+ columns: vec!["id".to_string()],
945
+ values: vec![version_id.to_string()],
946
+ }),
947
+ }
948
+ }
949
+
950
+ fn update_string_value(
951
+ batch: &RecordBatch,
952
+ assignment_values: &UpdateAssignmentValues,
953
+ table_schema: &SchemaRef,
954
+ row_index: usize,
955
+ column_name: &str,
956
+ ) -> Result<String> {
957
+ let column_index = table_schema.index_of(column_name)?;
958
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
959
+ InsertCell::Omitted => required_string_value(batch, row_index, column_name, "UPDATE"),
960
+ InsertCell::Provided(SqlCell::Value(
961
+ ScalarValue::Utf8(Some(value))
962
+ | ScalarValue::Utf8View(Some(value))
963
+ | ScalarValue::LargeUtf8(Some(value)),
964
+ )) => Ok(value),
965
+ InsertCell::Provided(SqlCell::Null) => Err(DataFusionError::Execution(format!(
966
+ "UPDATE lix_version requires non-null text column '{column_name}'"
967
+ ))),
968
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
969
+ "UPDATE lix_version expected text-compatible column '{column_name}', got {other:?}"
970
+ ))),
971
+ }
972
+ .or_else(|error| {
973
+ if batch.column(column_index).is_null(row_index) {
974
+ Err(DataFusionError::Execution(format!(
975
+ "UPDATE lix_version requires non-null text column '{column_name}'"
976
+ )))
977
+ } else {
978
+ Err(error)
979
+ }
980
+ })
981
+ }
982
+
983
+ fn update_bool_value(
984
+ batch: &RecordBatch,
985
+ assignment_values: &UpdateAssignmentValues,
986
+ table_schema: &SchemaRef,
987
+ row_index: usize,
988
+ column_name: &str,
989
+ ) -> Result<bool> {
990
+ let column_index = table_schema.index_of(column_name)?;
991
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
992
+ InsertCell::Omitted => required_bool_value(batch, row_index, column_name, "UPDATE"),
993
+ InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(value),
994
+ InsertCell::Provided(SqlCell::Null) => Err(DataFusionError::Execution(format!(
995
+ "UPDATE lix_version requires non-null boolean column '{column_name}'"
996
+ ))),
997
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
998
+ "UPDATE lix_version expected boolean column '{column_name}', got {other:?}"
999
+ ))),
1000
+ }
1001
+ .or_else(|error| {
1002
+ if batch.column(column_index).is_null(row_index) {
1003
+ Err(DataFusionError::Execution(format!(
1004
+ "UPDATE lix_version requires non-null boolean column '{column_name}'"
1005
+ )))
1006
+ } else {
1007
+ Err(error)
1008
+ }
1009
+ })
1010
+ }
1011
+
1012
+ fn required_string_value(
1013
+ batch: &RecordBatch,
1014
+ row_index: usize,
1015
+ column_name: &str,
1016
+ operation: &str,
1017
+ ) -> Result<String> {
1018
+ optional_string_value(batch, row_index, column_name, operation)?.ok_or_else(|| {
1019
+ DataFusionError::Execution(format!(
1020
+ "{operation} lix_version requires non-null text column '{column_name}'"
1021
+ ))
1022
+ })
1023
+ }
1024
+
1025
+ fn optional_string_value(
1026
+ batch: &RecordBatch,
1027
+ row_index: usize,
1028
+ column_name: &str,
1029
+ operation: &str,
1030
+ ) -> Result<Option<String>> {
1031
+ match optional_scalar_value(batch, row_index, column_name)? {
1032
+ None
1033
+ | Some(ScalarValue::Null)
1034
+ | Some(ScalarValue::Utf8(None))
1035
+ | Some(ScalarValue::Utf8View(None))
1036
+ | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1037
+ Some(ScalarValue::Utf8(Some(value)))
1038
+ | Some(ScalarValue::Utf8View(Some(value)))
1039
+ | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1040
+ Some(other) => Err(DataFusionError::Execution(format!(
1041
+ "{operation} lix_version expected text-compatible column '{column_name}', got {other:?}"
1042
+ ))),
1043
+ }
1044
+ }
1045
+
1046
+ fn required_bool_value(
1047
+ batch: &RecordBatch,
1048
+ row_index: usize,
1049
+ column_name: &str,
1050
+ operation: &str,
1051
+ ) -> Result<bool> {
1052
+ optional_bool_value(batch, row_index, column_name, operation)?.ok_or_else(|| {
1053
+ DataFusionError::Execution(format!(
1054
+ "{operation} lix_version requires non-null boolean column '{column_name}'"
1055
+ ))
1056
+ })
1057
+ }
1058
+
1059
+ fn optional_bool_value(
1060
+ batch: &RecordBatch,
1061
+ row_index: usize,
1062
+ column_name: &str,
1063
+ operation: &str,
1064
+ ) -> Result<Option<bool>> {
1065
+ match optional_scalar_value(batch, row_index, column_name)? {
1066
+ None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1067
+ Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1068
+ Some(other) => Err(DataFusionError::Execution(format!(
1069
+ "{operation} lix_version expected boolean column '{column_name}', got {other:?}"
1070
+ ))),
1071
+ }
1072
+ }
1073
+
1074
+ fn optional_scalar_value(
1075
+ batch: &RecordBatch,
1076
+ row_index: usize,
1077
+ column_name: &str,
1078
+ ) -> Result<Option<ScalarValue>> {
1079
+ let Ok(column_index) = batch.schema().index_of(column_name) else {
1080
+ return Ok(None);
1081
+ };
1082
+ Ok(Some(ScalarValue::try_from_array(
1083
+ batch.column(column_index).as_ref(),
1084
+ row_index,
1085
+ )?))
1086
+ }
1087
+
1088
+ fn dml_count_schema() -> SchemaRef {
1089
+ Arc::new(Schema::new(vec![Field::new(
1090
+ "count",
1091
+ DataType::UInt64,
1092
+ false,
1093
+ )]))
1094
+ }
1095
+
1096
+ fn dml_plan_properties(schema: SchemaRef) -> PlanProperties {
1097
+ PlanProperties::new(
1098
+ EquivalenceProperties::new(schema),
1099
+ Partitioning::UnknownPartitioning(1),
1100
+ EmissionType::Final,
1101
+ Boundedness::Bounded,
1102
+ )
1103
+ }
1104
+
1105
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1106
+ RecordBatch::try_new(
1107
+ schema,
1108
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1109
+ )
1110
+ .map_err(DataFusionError::from)
1111
+ }
1112
+
1113
+ fn lix_version_schema() -> SchemaRef {
1114
+ Arc::new(Schema::new(vec![
1115
+ Field::new("id", DataType::Utf8, false),
1116
+ Field::new("name", DataType::Utf8, false),
1117
+ Field::new("hidden", DataType::Boolean, false),
1118
+ Field::new("commit_id", DataType::Utf8, false),
1119
+ ]))
1120
+ }
1121
+
1122
+ fn version_projection_for_scan(projection: Option<&Vec<usize>>) -> Vec<VersionColumn> {
1123
+ let all_columns = vec![
1124
+ VersionColumn::Id,
1125
+ VersionColumn::Name,
1126
+ VersionColumn::Hidden,
1127
+ VersionColumn::CommitId,
1128
+ ];
1129
+ projection.map_or(all_columns.clone(), |indices| {
1130
+ indices
1131
+ .iter()
1132
+ .filter_map(|index| all_columns.get(*index).copied())
1133
+ .collect()
1134
+ })
1135
+ }
1136
+
1137
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> SchemaRef {
1138
+ match projection {
1139
+ Some(projection) => Arc::new(schema.project(projection).expect("projection is valid")),
1140
+ None => Arc::clone(schema),
1141
+ }
1142
+ }
1143
+
1144
+ fn version_record_batch(projection: &[VersionColumn], rows: &[VersionRow]) -> Result<RecordBatch> {
1145
+ let arrays = projection
1146
+ .iter()
1147
+ .map(|column| match column {
1148
+ VersionColumn::Id => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
1149
+ VersionColumn::Name => string_array(rows.iter().map(|row| Some(row.name.as_str()))),
1150
+ VersionColumn::Hidden => Arc::new(BooleanArray::from(
1151
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
1152
+ )) as ArrayRef,
1153
+ VersionColumn::CommitId => {
1154
+ string_array(rows.iter().map(|row| Some(row.commit_id.as_str())))
1155
+ }
1156
+ })
1157
+ .collect::<Vec<_>>();
1158
+ record_batch_with_row_count(version_schema(projection), arrays, rows.len()).map_err(|error| {
1159
+ DataFusionError::Execution(format!("failed to build lix_version batch: {error}"))
1160
+ })
1161
+ }
1162
+
1163
+ fn version_schema(projection: &[VersionColumn]) -> SchemaRef {
1164
+ Arc::new(Schema::new(
1165
+ projection
1166
+ .iter()
1167
+ .map(|column| match column {
1168
+ VersionColumn::Id => Field::new("id", DataType::Utf8, false),
1169
+ VersionColumn::Name => Field::new("name", DataType::Utf8, false),
1170
+ VersionColumn::Hidden => Field::new("hidden", DataType::Boolean, false),
1171
+ VersionColumn::CommitId => Field::new("commit_id", DataType::Utf8, false),
1172
+ })
1173
+ .collect::<Vec<_>>(),
1174
+ ))
1175
+ }
1176
+
1177
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1178
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
1179
+ }
1180
+
1181
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1182
+ super::error::datafusion_error_to_lix_error(error)
1183
+ }
1184
+
1185
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1186
+ super::error::lix_error_to_datafusion_error(error)
1187
+ }