@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,2430 @@
1
+ use std::any::Any;
2
+ use std::collections::BTreeSet;
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
7
+ use datafusion::arrow::compute::{and, filter_record_batch};
8
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
9
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
10
+ use datafusion::catalog::{Session, TableProvider};
11
+ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, SchemaExt};
12
+ use datafusion::datasource::TableType;
13
+ use datafusion::execution::TaskContext;
14
+ use datafusion::logical_expr::dml::InsertOp;
15
+ use datafusion::logical_expr::expr::InList;
16
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
17
+ use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
18
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
19
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
20
+ use datafusion::physical_plan::{
21
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
22
+ };
23
+ use datafusion::prelude::SessionContext;
24
+ use datafusion::scalar::ScalarValue;
25
+ use futures_util::{stream, TryStreamExt};
26
+
27
+ use crate::entity_identity::EntityIdentity;
28
+ use crate::live_state::LiveStateRow;
29
+ use crate::live_state::{
30
+ LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
31
+ };
32
+ use crate::sql2::dml::{InsertExec, InsertSink};
33
+ use crate::sql2::read_only::reject_read_only_stage_rows;
34
+ use crate::sql2::version_scope::{resolve_provider_version_ids, VersionBinding};
35
+ use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
36
+ use crate::transaction::types::StageRow;
37
+ use crate::version::VersionRefReader;
38
+ use crate::GLOBAL_VERSION_ID;
39
+ use crate::{parse_row_metadata, serialize_row_metadata, LixError, NullableKeyFilter, RowMetadata};
40
+
41
+ use crate::sql2::{
42
+ SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
43
+ };
44
+ use crate::transaction::types::{StageWrite, StageWriteMode};
45
+
46
+ use super::result_metadata::json_field;
47
+
48
+ pub(crate) async fn register_lix_state_providers(
49
+ session: &SessionContext,
50
+ active_version_id: &str,
51
+ live_state: Arc<dyn LiveStateReader>,
52
+ version_ref: Arc<dyn VersionRefReader>,
53
+ ) -> Result<(), LixError> {
54
+ session
55
+ .register_table(
56
+ "lix_state_by_version",
57
+ Arc::new(LixStateProvider::by_version(
58
+ Arc::clone(&live_state),
59
+ Arc::clone(&version_ref),
60
+ )),
61
+ )
62
+ .map_err(datafusion_error_to_lix_error)?;
63
+ session
64
+ .register_table(
65
+ "lix_state",
66
+ Arc::new(LixStateProvider::active_version(
67
+ active_version_id,
68
+ live_state,
69
+ version_ref,
70
+ )),
71
+ )
72
+ .map_err(datafusion_error_to_lix_error)?;
73
+ Ok(())
74
+ }
75
+
76
+ pub(crate) async fn register_lix_state_write_providers(
77
+ session: &SessionContext,
78
+ write_ctx: SqlWriteContext,
79
+ ) -> Result<(), LixError> {
80
+ session
81
+ .register_table(
82
+ "lix_state_by_version",
83
+ Arc::new(LixStateProvider::by_version_with_write(write_ctx.clone())),
84
+ )
85
+ .map_err(datafusion_error_to_lix_error)?;
86
+ session
87
+ .register_table(
88
+ "lix_state",
89
+ Arc::new(LixStateProvider::active_version_with_write(write_ctx)),
90
+ )
91
+ .map_err(datafusion_error_to_lix_error)?;
92
+ Ok(())
93
+ }
94
+
95
+ pub(crate) struct LixStateProvider {
96
+ schema: SchemaRef,
97
+ live_state: Arc<dyn LiveStateReader>,
98
+ version_ref: Arc<dyn VersionRefReader>,
99
+ write_access: WriteAccess,
100
+ version_binding: VersionBinding,
101
+ }
102
+
103
+ impl std::fmt::Debug for LixStateProvider {
104
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105
+ f.debug_struct("LixStateProvider")
106
+ .field("write_access", &self.write_access.is_write())
107
+ .finish()
108
+ }
109
+ }
110
+
111
+ impl LixStateProvider {
112
+ pub(crate) fn active_version(
113
+ active_version_id: impl Into<String>,
114
+ live_state: Arc<dyn LiveStateReader>,
115
+ version_ref: Arc<dyn VersionRefReader>,
116
+ ) -> Self {
117
+ Self {
118
+ schema: lix_state_schema(),
119
+ live_state,
120
+ version_ref,
121
+ write_access: WriteAccess::read_only(),
122
+ version_binding: VersionBinding::active(active_version_id),
123
+ }
124
+ }
125
+
126
+ pub(crate) fn active_version_with_write(write_ctx: SqlWriteContext) -> Self {
127
+ let active_version_id = write_ctx.active_version_id();
128
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
129
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
130
+ Self {
131
+ schema: lix_state_schema(),
132
+ live_state,
133
+ version_ref,
134
+ write_access: WriteAccess::write(write_ctx),
135
+ version_binding: VersionBinding::active(active_version_id),
136
+ }
137
+ }
138
+
139
+ pub(crate) fn by_version(
140
+ live_state: Arc<dyn LiveStateReader>,
141
+ version_ref: Arc<dyn VersionRefReader>,
142
+ ) -> Self {
143
+ Self {
144
+ schema: lix_state_by_version_schema(),
145
+ live_state,
146
+ version_ref,
147
+ write_access: WriteAccess::read_only(),
148
+ version_binding: VersionBinding::explicit(),
149
+ }
150
+ }
151
+
152
+ pub(crate) fn by_version_with_write(write_ctx: SqlWriteContext) -> Self {
153
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
154
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
155
+ Self {
156
+ schema: lix_state_by_version_schema(),
157
+ live_state,
158
+ version_ref,
159
+ write_access: WriteAccess::write(write_ctx),
160
+ version_binding: VersionBinding::explicit(),
161
+ }
162
+ }
163
+ }
164
+
165
+ #[async_trait]
166
+ impl TableProvider for LixStateProvider {
167
+ fn as_any(&self) -> &dyn Any {
168
+ self
169
+ }
170
+
171
+ fn schema(&self) -> SchemaRef {
172
+ Arc::clone(&self.schema)
173
+ }
174
+
175
+ fn table_type(&self) -> TableType {
176
+ TableType::Base
177
+ }
178
+
179
+ fn supports_filters_pushdown(
180
+ &self,
181
+ filters: &[&Expr],
182
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
183
+ Ok(filters
184
+ .iter()
185
+ .map(|filter| {
186
+ if parse_lix_state_filter(filter).is_some() {
187
+ TableProviderFilterPushDown::Exact
188
+ } else {
189
+ TableProviderFilterPushDown::Unsupported
190
+ }
191
+ })
192
+ .collect())
193
+ }
194
+
195
+ async fn scan(
196
+ &self,
197
+ _state: &dyn Session,
198
+ projection: Option<&Vec<usize>>,
199
+ filters: &[Expr],
200
+ limit: Option<usize>,
201
+ ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
202
+ let route = LixStateByVersionRoute::from_filters(filters);
203
+ let projected_schema = projected_schema(&self.schema, projection)?;
204
+ let mut request = lix_state_scan_request(
205
+ &self.schema,
206
+ self.version_binding.active_version_id(),
207
+ projection,
208
+ &route,
209
+ limit,
210
+ );
211
+ if !route.contradictory {
212
+ request.filter.version_ids = resolve_provider_version_ids(
213
+ self.version_ref.as_ref(),
214
+ &self.version_binding,
215
+ request.filter.version_ids,
216
+ )
217
+ .await
218
+ .map_err(lix_error_to_datafusion_error)?;
219
+ }
220
+ Ok(Arc::new(LixStateScanExec::new(
221
+ Arc::clone(&self.live_state),
222
+ projected_schema,
223
+ request,
224
+ )))
225
+ }
226
+
227
+ async fn insert_into(
228
+ &self,
229
+ _state: &dyn Session,
230
+ input: Arc<dyn ExecutionPlan>,
231
+ insert_op: InsertOp,
232
+ ) -> Result<Arc<dyn ExecutionPlan>> {
233
+ if insert_op != InsertOp::Append {
234
+ return not_impl_err!("{insert_op} not implemented for lix_state yet");
235
+ }
236
+
237
+ let active_version_id = self
238
+ .version_binding
239
+ .require_active_version_id("INSERT")
240
+ .map_err(lix_error_to_datafusion_error)?;
241
+
242
+ let write_ctx = self.write_access.require_write("INSERT into lix_state")?;
243
+
244
+ self.schema
245
+ .logically_equivalent_names_and_types(&input.schema())?;
246
+
247
+ let sink = LixStateInsertSink::new(
248
+ Arc::clone(&self.schema),
249
+ write_ctx.clone(),
250
+ active_version_id,
251
+ );
252
+ Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
253
+ }
254
+
255
+ async fn delete_from(
256
+ &self,
257
+ state: &dyn Session,
258
+ filters: Vec<Expr>,
259
+ ) -> Result<Arc<dyn ExecutionPlan>> {
260
+ let active_version_id = self
261
+ .version_binding
262
+ .require_active_version_id("DELETE")
263
+ .map_err(lix_error_to_datafusion_error)?;
264
+
265
+ let write_ctx = self.write_access.require_write("DELETE FROM lix_state")?;
266
+
267
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
268
+ let physical_filters = filters
269
+ .iter()
270
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
271
+ .collect::<Result<Vec<_>>>()?;
272
+
273
+ let route = LixStateByVersionRoute::from_filters(&filters);
274
+ let request =
275
+ lix_state_scan_request(&self.schema, Some(&active_version_id), None, &route, None);
276
+
277
+ Ok(Arc::new(LixStateDeleteExec::new(
278
+ write_ctx.clone(),
279
+ Arc::clone(&self.schema),
280
+ active_version_id,
281
+ request,
282
+ physical_filters,
283
+ )))
284
+ }
285
+
286
+ async fn update(
287
+ &self,
288
+ state: &dyn Session,
289
+ assignments: Vec<(String, Expr)>,
290
+ filters: Vec<Expr>,
291
+ ) -> Result<Arc<dyn ExecutionPlan>> {
292
+ let active_version_id = self
293
+ .version_binding
294
+ .require_active_version_id("UPDATE")
295
+ .map_err(lix_error_to_datafusion_error)?;
296
+
297
+ let write_ctx = self.write_access.require_write("UPDATE lix_state")?;
298
+
299
+ validate_lix_state_update_assignments(&self.schema, &assignments)?;
300
+
301
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
302
+ let physical_assignments = assignments
303
+ .iter()
304
+ .map(|(column_name, expr)| {
305
+ Ok((
306
+ column_name.clone(),
307
+ create_physical_expr(expr, &df_schema, state.execution_props())?,
308
+ ))
309
+ })
310
+ .collect::<Result<Vec<_>>>()?;
311
+ let physical_filters = filters
312
+ .iter()
313
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
314
+ .collect::<Result<Vec<_>>>()?;
315
+
316
+ let route = LixStateByVersionRoute::from_filters(&filters);
317
+ let request =
318
+ lix_state_scan_request(&self.schema, Some(&active_version_id), None, &route, None);
319
+
320
+ Ok(Arc::new(LixStateUpdateExec::new(
321
+ write_ctx.clone(),
322
+ Arc::clone(&self.schema),
323
+ active_version_id,
324
+ request,
325
+ physical_assignments,
326
+ physical_filters,
327
+ )))
328
+ }
329
+ }
330
+
331
+ struct LixStateInsertSink {
332
+ write_ctx: SqlWriteContext,
333
+ version_binding: String,
334
+ }
335
+
336
+ impl std::fmt::Debug for LixStateInsertSink {
337
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338
+ f.debug_struct("LixStateInsertSink").finish()
339
+ }
340
+ }
341
+
342
+ impl LixStateInsertSink {
343
+ fn new(_schema: SchemaRef, write_ctx: SqlWriteContext, version_binding: String) -> Self {
344
+ Self {
345
+ write_ctx,
346
+ version_binding,
347
+ }
348
+ }
349
+ }
350
+
351
+ impl DisplayAs for LixStateInsertSink {
352
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
353
+ match t {
354
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
355
+ write!(f, "LixStateInsertSink")
356
+ }
357
+ DisplayFormatType::TreeRender => write!(f, "LixStateInsertSink"),
358
+ }
359
+ }
360
+ }
361
+
362
+ #[async_trait]
363
+ impl InsertSink for LixStateInsertSink {
364
+ async fn write_batches(
365
+ &self,
366
+ batches: Vec<RecordBatch>,
367
+ _context: &Arc<TaskContext>,
368
+ ) -> Result<u64> {
369
+ let mut rows = Vec::new();
370
+ for batch in batches {
371
+ rows.extend(lix_state_write_rows_from_batch(
372
+ &batch,
373
+ &self.version_binding,
374
+ )?);
375
+ }
376
+ reject_read_only_stage_rows(&rows, "INSERT into lix_state")?;
377
+ let count = u64::try_from(rows.len())
378
+ .map_err(|_| DataFusionError::Execution("INSERT row count overflow".into()))?;
379
+
380
+ self.write_ctx
381
+ .stage_write(StageWrite::Rows {
382
+ mode: StageWriteMode::Insert,
383
+ rows,
384
+ })
385
+ .await
386
+ .map_err(lix_error_to_datafusion_error)?;
387
+
388
+ Ok(count)
389
+ }
390
+ }
391
+
392
+ #[allow(dead_code)]
393
+ struct LixStateDeleteExec {
394
+ write_ctx: SqlWriteContext,
395
+ table_schema: SchemaRef,
396
+ version_binding: String,
397
+ request: LiveStateScanRequest,
398
+ filters: Vec<Arc<dyn PhysicalExpr>>,
399
+ result_schema: SchemaRef,
400
+ properties: Arc<PlanProperties>,
401
+ }
402
+
403
+ impl std::fmt::Debug for LixStateDeleteExec {
404
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
405
+ f.debug_struct("LixStateDeleteExec").finish()
406
+ }
407
+ }
408
+
409
+ impl LixStateDeleteExec {
410
+ fn new(
411
+ write_ctx: SqlWriteContext,
412
+ table_schema: SchemaRef,
413
+ version_binding: String,
414
+ request: LiveStateScanRequest,
415
+ filters: Vec<Arc<dyn PhysicalExpr>>,
416
+ ) -> Self {
417
+ let result_schema = dml_count_schema();
418
+ let properties = PlanProperties::new(
419
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
420
+ Partitioning::UnknownPartitioning(1),
421
+ EmissionType::Final,
422
+ Boundedness::Bounded,
423
+ );
424
+ Self {
425
+ write_ctx,
426
+ table_schema,
427
+ version_binding,
428
+ request,
429
+ filters,
430
+ result_schema,
431
+ properties: Arc::new(properties),
432
+ }
433
+ }
434
+ }
435
+
436
+ impl DisplayAs for LixStateDeleteExec {
437
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
438
+ match t {
439
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
440
+ write!(f, "LixStateDeleteExec(filters={})", self.filters.len())
441
+ }
442
+ DisplayFormatType::TreeRender => write!(f, "LixStateDeleteExec"),
443
+ }
444
+ }
445
+ }
446
+
447
+ impl ExecutionPlan for LixStateDeleteExec {
448
+ fn name(&self) -> &str {
449
+ "LixStateDeleteExec"
450
+ }
451
+
452
+ fn as_any(&self) -> &dyn Any {
453
+ self
454
+ }
455
+
456
+ fn properties(&self) -> &Arc<PlanProperties> {
457
+ &self.properties
458
+ }
459
+
460
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
461
+ Vec::new()
462
+ }
463
+
464
+ fn with_new_children(
465
+ self: Arc<Self>,
466
+ children: Vec<Arc<dyn ExecutionPlan>>,
467
+ ) -> Result<Arc<dyn ExecutionPlan>> {
468
+ if !children.is_empty() {
469
+ return Err(DataFusionError::Execution(
470
+ "LixStateDeleteExec does not accept children".to_string(),
471
+ ));
472
+ }
473
+ Ok(self)
474
+ }
475
+
476
+ fn execute(
477
+ &self,
478
+ partition: usize,
479
+ _context: Arc<TaskContext>,
480
+ ) -> Result<SendableRecordBatchStream> {
481
+ if partition != 0 {
482
+ return Err(DataFusionError::Execution(format!(
483
+ "LixStateDeleteExec only exposes one partition, got {partition}"
484
+ )));
485
+ }
486
+ let write_ctx = self.write_ctx.clone();
487
+ let table_schema = Arc::clone(&self.table_schema);
488
+ let version_binding = self.version_binding.clone();
489
+ let request = self.request.clone();
490
+ let filters = self.filters.clone();
491
+ let result_schema = Arc::clone(&self.result_schema);
492
+ let stream_schema = Arc::clone(&result_schema);
493
+
494
+ let stream = stream::once(async move {
495
+ let rows = if request.limit == Some(0) {
496
+ Vec::new()
497
+ } else {
498
+ write_ctx
499
+ .scan_live_state(&request)
500
+ .await
501
+ .map_err(lix_error_to_datafusion_error)?
502
+ };
503
+ let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
504
+ .map_err(lix_error_to_datafusion_error)?;
505
+ let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
506
+ let write_rows =
507
+ lix_state_deletable_write_rows_from_batch(&matched_batch, &version_binding)?;
508
+ reject_read_only_stage_rows(&write_rows, "DELETE FROM lix_state")?;
509
+ let count = u64::try_from(write_rows.len())
510
+ .map_err(|_| DataFusionError::Execution("DELETE row count overflow".to_string()))?;
511
+
512
+ if count > 0 {
513
+ write_ctx
514
+ .stage_write(StageWrite::Rows {
515
+ mode: StageWriteMode::Replace,
516
+ rows: write_rows,
517
+ })
518
+ .await
519
+ .map_err(lix_error_to_datafusion_error)?;
520
+ }
521
+
522
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
523
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
524
+ )]))
525
+ })
526
+ .try_flatten();
527
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
528
+ result_schema,
529
+ stream,
530
+ )))
531
+ }
532
+ }
533
+
534
+ #[allow(dead_code)]
535
+ struct LixStateUpdateExec {
536
+ write_ctx: SqlWriteContext,
537
+ table_schema: SchemaRef,
538
+ version_binding: String,
539
+ request: LiveStateScanRequest,
540
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
541
+ filters: Vec<Arc<dyn PhysicalExpr>>,
542
+ result_schema: SchemaRef,
543
+ properties: Arc<PlanProperties>,
544
+ }
545
+
546
+ impl std::fmt::Debug for LixStateUpdateExec {
547
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
548
+ f.debug_struct("LixStateUpdateExec").finish()
549
+ }
550
+ }
551
+
552
+ impl LixStateUpdateExec {
553
+ fn new(
554
+ write_ctx: SqlWriteContext,
555
+ table_schema: SchemaRef,
556
+ version_binding: String,
557
+ request: LiveStateScanRequest,
558
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
559
+ filters: Vec<Arc<dyn PhysicalExpr>>,
560
+ ) -> Self {
561
+ let result_schema = dml_count_schema();
562
+ let properties = PlanProperties::new(
563
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
564
+ Partitioning::UnknownPartitioning(1),
565
+ EmissionType::Final,
566
+ Boundedness::Bounded,
567
+ );
568
+ Self {
569
+ write_ctx,
570
+ table_schema,
571
+ version_binding,
572
+ request,
573
+ assignments,
574
+ filters,
575
+ result_schema,
576
+ properties: Arc::new(properties),
577
+ }
578
+ }
579
+ }
580
+
581
+ impl DisplayAs for LixStateUpdateExec {
582
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583
+ match t {
584
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
585
+ write!(
586
+ f,
587
+ "LixStateUpdateExec(assignments={}, filters={})",
588
+ self.assignments.len(),
589
+ self.filters.len()
590
+ )
591
+ }
592
+ DisplayFormatType::TreeRender => write!(f, "LixStateUpdateExec"),
593
+ }
594
+ }
595
+ }
596
+
597
+ impl ExecutionPlan for LixStateUpdateExec {
598
+ fn name(&self) -> &str {
599
+ "LixStateUpdateExec"
600
+ }
601
+
602
+ fn as_any(&self) -> &dyn Any {
603
+ self
604
+ }
605
+
606
+ fn properties(&self) -> &Arc<PlanProperties> {
607
+ &self.properties
608
+ }
609
+
610
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
611
+ Vec::new()
612
+ }
613
+
614
+ fn with_new_children(
615
+ self: Arc<Self>,
616
+ children: Vec<Arc<dyn ExecutionPlan>>,
617
+ ) -> Result<Arc<dyn ExecutionPlan>> {
618
+ if !children.is_empty() {
619
+ return Err(DataFusionError::Execution(
620
+ "LixStateUpdateExec does not accept children".to_string(),
621
+ ));
622
+ }
623
+ Ok(self)
624
+ }
625
+
626
+ fn execute(
627
+ &self,
628
+ partition: usize,
629
+ _context: Arc<TaskContext>,
630
+ ) -> Result<SendableRecordBatchStream> {
631
+ if partition != 0 {
632
+ return Err(DataFusionError::Execution(format!(
633
+ "LixStateUpdateExec only exposes one partition, got {partition}"
634
+ )));
635
+ }
636
+ let write_ctx = self.write_ctx.clone();
637
+ let table_schema = Arc::clone(&self.table_schema);
638
+ let version_binding = self.version_binding.clone();
639
+ let request = self.request.clone();
640
+ let assignments = self.assignments.clone();
641
+ let filters = self.filters.clone();
642
+ let result_schema = Arc::clone(&self.result_schema);
643
+ let stream_schema = Arc::clone(&result_schema);
644
+
645
+ let stream = stream::once(async move {
646
+ let rows = if request.limit == Some(0) {
647
+ Vec::new()
648
+ } else {
649
+ write_ctx
650
+ .scan_live_state(&request)
651
+ .await
652
+ .map_err(lix_error_to_datafusion_error)?
653
+ };
654
+ let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
655
+ .map_err(lix_error_to_datafusion_error)?;
656
+ let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
657
+ let write_rows = lix_state_update_write_rows_from_batch(
658
+ &matched_batch,
659
+ &assignments,
660
+ &version_binding,
661
+ )?;
662
+ reject_read_only_stage_rows(&write_rows, "UPDATE lix_state")?;
663
+ let count = u64::try_from(write_rows.len())
664
+ .map_err(|_| DataFusionError::Execution("UPDATE row count overflow".to_string()))?;
665
+
666
+ if count > 0 {
667
+ write_ctx
668
+ .stage_write(StageWrite::Rows {
669
+ mode: StageWriteMode::Replace,
670
+ rows: write_rows,
671
+ })
672
+ .await
673
+ .map_err(lix_error_to_datafusion_error)?;
674
+ }
675
+
676
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
677
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
678
+ )]))
679
+ })
680
+ .try_flatten();
681
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
682
+ result_schema,
683
+ stream,
684
+ )))
685
+ }
686
+ }
687
+
688
+ fn validate_lix_state_update_assignments(
689
+ schema: &SchemaRef,
690
+ assignments: &[(String, Expr)],
691
+ ) -> Result<()> {
692
+ for (column_name, _) in assignments {
693
+ schema.field_with_name(column_name).map_err(|_| {
694
+ DataFusionError::Plan(format!(
695
+ "UPDATE lix_state failed: column '{column_name}' does not exist"
696
+ ))
697
+ })?;
698
+ if !matches!(column_name.as_str(), "snapshot_content" | "metadata") {
699
+ return Err(DataFusionError::Execution(format!(
700
+ "UPDATE lix_state cannot stage read-only column '{column_name}'"
701
+ )));
702
+ }
703
+ }
704
+ Ok(())
705
+ }
706
+
707
+ fn filter_lix_state_batch(
708
+ batch: RecordBatch,
709
+ filters: &[Arc<dyn PhysicalExpr>],
710
+ ) -> Result<RecordBatch> {
711
+ let Some(mask) = evaluate_lix_state_filters(&batch, filters)? else {
712
+ return Ok(batch);
713
+ };
714
+ Ok(filter_record_batch(&batch, &mask)?)
715
+ }
716
+
717
+ fn evaluate_lix_state_filters(
718
+ batch: &RecordBatch,
719
+ filters: &[Arc<dyn PhysicalExpr>],
720
+ ) -> Result<Option<BooleanArray>> {
721
+ if filters.is_empty() {
722
+ return Ok(None);
723
+ }
724
+
725
+ let mut combined_mask: Option<BooleanArray> = None;
726
+ for filter in filters {
727
+ let result = filter.evaluate(batch)?;
728
+ let array = result.into_array(batch.num_rows())?;
729
+ let bool_array = array
730
+ .as_any()
731
+ .downcast_ref::<BooleanArray>()
732
+ .ok_or_else(|| {
733
+ DataFusionError::Execution("UPDATE lix_state filter was not boolean".to_string())
734
+ })?;
735
+ let normalized = bool_array
736
+ .iter()
737
+ .map(|value| Some(value == Some(true)))
738
+ .collect::<BooleanArray>();
739
+ combined_mask = Some(match combined_mask {
740
+ Some(existing) => and(&existing, &normalized)?,
741
+ None => normalized,
742
+ });
743
+ }
744
+ Ok(combined_mask)
745
+ }
746
+
747
+ fn lix_state_stageable_write_rows_from_batch(
748
+ batch: &RecordBatch,
749
+ version_binding: &str,
750
+ ) -> Result<Vec<StageRow>> {
751
+ let mut rows = lix_state_write_rows_from_batch(batch, version_binding)?;
752
+ for row in &mut rows {
753
+ row.created_at = None;
754
+ row.updated_at = None;
755
+ row.change_id = None;
756
+ row.commit_id = None;
757
+ }
758
+ Ok(rows)
759
+ }
760
+
761
+ fn lix_state_update_write_rows_from_batch(
762
+ batch: &RecordBatch,
763
+ assignments: &[(String, Arc<dyn PhysicalExpr>)],
764
+ version_binding: &str,
765
+ ) -> Result<Vec<StageRow>> {
766
+ let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
767
+ (0..batch.num_rows())
768
+ .map(|row_index| {
769
+ let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
770
+ let version_id =
771
+ optional_string_value(batch, row_index, "version_id")?.unwrap_or_else(|| {
772
+ if global {
773
+ GLOBAL_VERSION_ID.to_string()
774
+ } else {
775
+ version_binding.to_string()
776
+ }
777
+ });
778
+
779
+ Ok(StageRow {
780
+ entity_id: Some(
781
+ EntityIdentity::from_string(&required_string_value(
782
+ batch,
783
+ row_index,
784
+ "entity_id",
785
+ )?)
786
+ .map_err(|error| {
787
+ DataFusionError::Execution(format!(
788
+ "lix_state UPDATE has invalid entity_id: {error}"
789
+ ))
790
+ })?,
791
+ ),
792
+ schema_key: required_string_value(batch, row_index, "schema_key")?,
793
+ file_id: optional_string_value(batch, row_index, "file_id")?,
794
+ snapshot_content: update_optional_string_value(
795
+ batch,
796
+ &assignment_values,
797
+ row_index,
798
+ "snapshot_content",
799
+ )?,
800
+ metadata: update_optional_metadata_value(
801
+ batch,
802
+ &assignment_values,
803
+ row_index,
804
+ "metadata",
805
+ "lix_state",
806
+ )?,
807
+ origin: None,
808
+ schema_version: required_string_value(batch, row_index, "schema_version")?,
809
+ created_at: None,
810
+ updated_at: None,
811
+ global,
812
+ change_id: None,
813
+ commit_id: None,
814
+ untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
815
+ version_id,
816
+ })
817
+ })
818
+ .collect()
819
+ }
820
+
821
+ fn lix_state_deletable_write_rows_from_batch(
822
+ batch: &RecordBatch,
823
+ version_binding: &str,
824
+ ) -> Result<Vec<StageRow>> {
825
+ let mut rows = lix_state_stageable_write_rows_from_batch(batch, version_binding)?;
826
+ for row in &mut rows {
827
+ row.snapshot_content = None;
828
+ }
829
+ Ok(rows)
830
+ }
831
+
832
+ fn update_optional_string_value(
833
+ batch: &RecordBatch,
834
+ assignment_values: &UpdateAssignmentValues,
835
+ row_index: usize,
836
+ column_name: &str,
837
+ ) -> Result<Option<String>> {
838
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
839
+ InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
840
+ InsertCell::Provided(SqlCell::Value(
841
+ ScalarValue::Utf8(Some(value))
842
+ | ScalarValue::Utf8View(Some(value))
843
+ | ScalarValue::LargeUtf8(Some(value)),
844
+ )) => Ok(Some(value)),
845
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
846
+ "UPDATE lix_state expected text-compatible column '{column_name}', got {other:?}"
847
+ ))),
848
+ }
849
+ }
850
+
851
+ fn update_optional_metadata_value(
852
+ batch: &RecordBatch,
853
+ assignment_values: &UpdateAssignmentValues,
854
+ row_index: usize,
855
+ column_name: &str,
856
+ context: &str,
857
+ ) -> Result<Option<RowMetadata>> {
858
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?
859
+ .map(|value| {
860
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
861
+ })
862
+ .transpose()
863
+ }
864
+
865
+ fn dml_count_schema() -> SchemaRef {
866
+ Arc::new(Schema::new(vec![Field::new(
867
+ "count",
868
+ DataType::UInt64,
869
+ false,
870
+ )]))
871
+ }
872
+
873
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
874
+ RecordBatch::try_new(
875
+ schema,
876
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
877
+ )
878
+ .map_err(DataFusionError::from)
879
+ }
880
+
881
+ fn lix_state_write_rows_from_batch(
882
+ batch: &RecordBatch,
883
+ version_binding: &str,
884
+ ) -> Result<Vec<StageRow>> {
885
+ (0..batch.num_rows())
886
+ .map(|row_index| {
887
+ let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
888
+ let version_id =
889
+ optional_string_value(batch, row_index, "version_id")?.unwrap_or_else(|| {
890
+ if global {
891
+ GLOBAL_VERSION_ID.to_string()
892
+ } else {
893
+ version_binding.to_string()
894
+ }
895
+ });
896
+
897
+ Ok(StageRow {
898
+ entity_id: Some(
899
+ EntityIdentity::from_string(&required_string_value(
900
+ batch,
901
+ row_index,
902
+ "entity_id",
903
+ )?)
904
+ .map_err(|error| {
905
+ DataFusionError::Execution(format!(
906
+ "lix_state INSERT has invalid entity_id: {error}"
907
+ ))
908
+ })?,
909
+ ),
910
+ schema_key: required_string_value(batch, row_index, "schema_key")?,
911
+ file_id: optional_string_value(batch, row_index, "file_id")?,
912
+ snapshot_content: optional_string_value(batch, row_index, "snapshot_content")?,
913
+ metadata: optional_metadata_value(batch, row_index, "metadata", "lix_state")?,
914
+ origin: None,
915
+ schema_version: required_string_value(batch, row_index, "schema_version")?,
916
+ created_at: optional_string_value(batch, row_index, "created_at")?,
917
+ updated_at: optional_string_value(batch, row_index, "updated_at")?,
918
+ global,
919
+ change_id: optional_string_value(batch, row_index, "change_id")?,
920
+ commit_id: optional_string_value(batch, row_index, "commit_id")?,
921
+ untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
922
+ version_id,
923
+ })
924
+ })
925
+ .collect()
926
+ }
927
+
928
+ fn required_string_value(
929
+ batch: &RecordBatch,
930
+ row_index: usize,
931
+ column_name: &str,
932
+ ) -> Result<String> {
933
+ optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
934
+ DataFusionError::Execution(format!(
935
+ "INSERT into lix_state requires non-null text column '{column_name}'"
936
+ ))
937
+ })
938
+ }
939
+
940
+ fn optional_string_value(
941
+ batch: &RecordBatch,
942
+ row_index: usize,
943
+ column_name: &str,
944
+ ) -> Result<Option<String>> {
945
+ match optional_scalar_value(batch, row_index, column_name)? {
946
+ None
947
+ | Some(ScalarValue::Null)
948
+ | Some(ScalarValue::Utf8(None))
949
+ | Some(ScalarValue::Utf8View(None))
950
+ | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
951
+ Some(ScalarValue::Utf8(Some(value)))
952
+ | Some(ScalarValue::Utf8View(Some(value)))
953
+ | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
954
+ Some(other) => Err(DataFusionError::Execution(format!(
955
+ "INSERT into lix_state expected text-compatible column '{column_name}', got {other:?}"
956
+ ))),
957
+ }
958
+ }
959
+
960
+ fn optional_metadata_value(
961
+ batch: &RecordBatch,
962
+ row_index: usize,
963
+ column_name: &str,
964
+ context: &str,
965
+ ) -> Result<Option<RowMetadata>> {
966
+ optional_string_value(batch, row_index, column_name)?
967
+ .map(|value| {
968
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
969
+ })
970
+ .transpose()
971
+ }
972
+
973
+ fn optional_bool_value(
974
+ batch: &RecordBatch,
975
+ row_index: usize,
976
+ column_name: &str,
977
+ ) -> Result<Option<bool>> {
978
+ match optional_scalar_value(batch, row_index, column_name)? {
979
+ Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
980
+ None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
981
+ Some(other) => Err(DataFusionError::Execution(format!(
982
+ "INSERT into lix_state expected boolean column '{column_name}', got {other:?}"
983
+ ))),
984
+ }
985
+ }
986
+
987
+ fn optional_scalar_value(
988
+ batch: &RecordBatch,
989
+ row_index: usize,
990
+ column_name: &str,
991
+ ) -> Result<Option<ScalarValue>> {
992
+ let schema = batch.schema();
993
+ let column_index = match schema.index_of(column_name) {
994
+ Ok(column_index) => column_index,
995
+ Err(_) => return Ok(None),
996
+ };
997
+
998
+ if row_index >= batch.num_rows() {
999
+ return Err(DataFusionError::Execution(format!(
1000
+ "row index {row_index} out of bounds for lix_state batch with {} rows",
1001
+ batch.num_rows()
1002
+ )));
1003
+ }
1004
+
1005
+ ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1006
+ .map(Some)
1007
+ .map_err(|error| {
1008
+ DataFusionError::Execution(format!(
1009
+ "failed to decode lix_state column '{column_name}' at row {row_index}: {error}"
1010
+ ))
1011
+ })
1012
+ }
1013
+
1014
+ struct LixStateScanExec {
1015
+ live_state: Arc<dyn LiveStateReader>,
1016
+ schema: SchemaRef,
1017
+ request: LiveStateScanRequest,
1018
+ properties: Arc<PlanProperties>,
1019
+ }
1020
+
1021
+ impl std::fmt::Debug for LixStateScanExec {
1022
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1023
+ f.debug_struct("LixStateScanExec").finish()
1024
+ }
1025
+ }
1026
+
1027
+ impl LixStateScanExec {
1028
+ fn new(
1029
+ live_state: Arc<dyn LiveStateReader>,
1030
+ schema: SchemaRef,
1031
+ request: LiveStateScanRequest,
1032
+ ) -> Self {
1033
+ let properties = PlanProperties::new(
1034
+ EquivalenceProperties::new(schema.clone()),
1035
+ Partitioning::UnknownPartitioning(1),
1036
+ EmissionType::Incremental,
1037
+ Boundedness::Bounded,
1038
+ );
1039
+ Self {
1040
+ live_state,
1041
+ schema,
1042
+ request,
1043
+ properties: Arc::new(properties),
1044
+ }
1045
+ }
1046
+ }
1047
+
1048
+ impl DisplayAs for LixStateScanExec {
1049
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1050
+ match t {
1051
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
1052
+ write!(f, "LixStateScanExec(limit={:?})", self.request.limit)
1053
+ }
1054
+ DisplayFormatType::TreeRender => write!(f, "LixStateScanExec"),
1055
+ }
1056
+ }
1057
+ }
1058
+
1059
+ impl ExecutionPlan for LixStateScanExec {
1060
+ fn name(&self) -> &str {
1061
+ "LixStateScanExec"
1062
+ }
1063
+
1064
+ fn as_any(&self) -> &dyn Any {
1065
+ self
1066
+ }
1067
+
1068
+ fn properties(&self) -> &Arc<PlanProperties> {
1069
+ &self.properties
1070
+ }
1071
+
1072
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1073
+ Vec::new()
1074
+ }
1075
+
1076
+ fn with_new_children(
1077
+ self: Arc<Self>,
1078
+ children: Vec<Arc<dyn ExecutionPlan>>,
1079
+ ) -> Result<Arc<dyn ExecutionPlan>> {
1080
+ if !children.is_empty() {
1081
+ return Err(DataFusionError::Execution(
1082
+ "LixStateScanExec does not accept children".to_string(),
1083
+ ));
1084
+ }
1085
+ Ok(self)
1086
+ }
1087
+
1088
+ fn execute(
1089
+ &self,
1090
+ partition: usize,
1091
+ _context: Arc<TaskContext>,
1092
+ ) -> Result<SendableRecordBatchStream> {
1093
+ if partition != 0 {
1094
+ return Err(DataFusionError::Execution(format!(
1095
+ "LixStateScanExec only exposes one partition, got {partition}"
1096
+ )));
1097
+ }
1098
+
1099
+ let live_state = Arc::clone(&self.live_state);
1100
+ let schema = Arc::clone(&self.schema);
1101
+ let request = self.request.clone();
1102
+ let stream_schema = Arc::clone(&schema);
1103
+ let stream = stream::once(async move {
1104
+ let rows = if request.limit == Some(0) {
1105
+ Vec::new()
1106
+ } else {
1107
+ live_state
1108
+ .scan_rows(&request)
1109
+ .await
1110
+ .map_err(lix_error_to_datafusion_error)?
1111
+ };
1112
+ let batch = lix_state_record_batch(Arc::clone(&stream_schema), &rows)
1113
+ .map_err(lix_error_to_datafusion_error)?;
1114
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1115
+ batch,
1116
+ )]))
1117
+ })
1118
+ .try_flatten();
1119
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1120
+ }
1121
+ }
1122
+
1123
+ fn lix_state_schema() -> SchemaRef {
1124
+ Arc::new(Schema::new(vec![
1125
+ Field::new("entity_id", DataType::Utf8, false),
1126
+ Field::new("schema_key", DataType::Utf8, false),
1127
+ Field::new("file_id", DataType::Utf8, true),
1128
+ json_field("snapshot_content", true),
1129
+ json_field("metadata", true),
1130
+ Field::new("schema_version", DataType::Utf8, true),
1131
+ Field::new("created_at", DataType::Utf8, true),
1132
+ Field::new("updated_at", DataType::Utf8, true),
1133
+ Field::new("global", DataType::Boolean, true),
1134
+ Field::new("change_id", DataType::Utf8, true),
1135
+ Field::new("commit_id", DataType::Utf8, true),
1136
+ Field::new("untracked", DataType::Boolean, true),
1137
+ ]))
1138
+ }
1139
+
1140
+ fn lix_state_by_version_schema() -> SchemaRef {
1141
+ Arc::new(Schema::new(vec![
1142
+ Field::new("entity_id", DataType::Utf8, false),
1143
+ Field::new("schema_key", DataType::Utf8, false),
1144
+ Field::new("file_id", DataType::Utf8, true),
1145
+ json_field("snapshot_content", true),
1146
+ json_field("metadata", true),
1147
+ Field::new("schema_version", DataType::Utf8, true),
1148
+ Field::new("created_at", DataType::Utf8, true),
1149
+ Field::new("updated_at", DataType::Utf8, true),
1150
+ Field::new("global", DataType::Boolean, true),
1151
+ Field::new("change_id", DataType::Utf8, true),
1152
+ Field::new("commit_id", DataType::Utf8, true),
1153
+ Field::new("untracked", DataType::Boolean, true),
1154
+ Field::new("version_id", DataType::Utf8, false),
1155
+ ]))
1156
+ }
1157
+
1158
+ #[derive(Debug, Clone, PartialEq, Eq, Default)]
1159
+ struct LixStateByVersionRoute {
1160
+ schema_keys: Option<BTreeSet<String>>,
1161
+ version_ids: Option<BTreeSet<String>>,
1162
+ entity_ids: Option<BTreeSet<String>>,
1163
+ file_id: Option<NullableKeyFilter<String>>,
1164
+ contradictory: bool,
1165
+ }
1166
+
1167
+ impl LixStateByVersionRoute {
1168
+ fn from_filters(filters: &[Expr]) -> Self {
1169
+ let mut route = Self::default();
1170
+ for filter in filters {
1171
+ let Some(predicates) = parse_lix_state_filters(filter) else {
1172
+ continue;
1173
+ };
1174
+ for predicate in predicates {
1175
+ match predicate {
1176
+ LixStateFilterPredicate::SchemaKeys(values) => {
1177
+ merge_string_route_slot(
1178
+ &mut route.schema_keys,
1179
+ values,
1180
+ &mut route.contradictory,
1181
+ );
1182
+ }
1183
+ LixStateFilterPredicate::VersionIds(values) => {
1184
+ merge_string_route_slot(
1185
+ &mut route.version_ids,
1186
+ values,
1187
+ &mut route.contradictory,
1188
+ );
1189
+ }
1190
+ LixStateFilterPredicate::EntityIds(values) => {
1191
+ merge_string_route_slot(
1192
+ &mut route.entity_ids,
1193
+ values,
1194
+ &mut route.contradictory,
1195
+ );
1196
+ }
1197
+ LixStateFilterPredicate::FileId(filter) => {
1198
+ merge_nullable_key_route_slot(
1199
+ &mut route.file_id,
1200
+ filter,
1201
+ &mut route.contradictory,
1202
+ );
1203
+ }
1204
+ }
1205
+ }
1206
+ }
1207
+ route
1208
+ }
1209
+ }
1210
+
1211
+ #[derive(Debug, Clone, PartialEq, Eq)]
1212
+ enum LixStateFilterPredicate {
1213
+ SchemaKeys(BTreeSet<String>),
1214
+ VersionIds(BTreeSet<String>),
1215
+ EntityIds(BTreeSet<String>),
1216
+ FileId(NullableKeyFilter<String>),
1217
+ }
1218
+
1219
+ fn lix_state_scan_request(
1220
+ schema: &SchemaRef,
1221
+ version_binding: Option<&str>,
1222
+ projection: Option<&Vec<usize>>,
1223
+ route: &LixStateByVersionRoute,
1224
+ limit: Option<usize>,
1225
+ ) -> LiveStateScanRequest {
1226
+ let projection = LiveStateProjection {
1227
+ columns: projection_column_names(schema, projection),
1228
+ };
1229
+ let mut filter = LiveStateFilter {
1230
+ schema_keys: route
1231
+ .schema_keys
1232
+ .as_ref()
1233
+ .map(|values| values.iter().cloned().collect())
1234
+ .unwrap_or_default(),
1235
+ entity_ids: route
1236
+ .entity_ids
1237
+ .as_ref()
1238
+ .map(|values| {
1239
+ values
1240
+ .iter()
1241
+ .map(|value| EntityIdentity::single(value))
1242
+ .collect()
1243
+ })
1244
+ .unwrap_or_default(),
1245
+ version_ids: version_binding
1246
+ .map(|value| vec![value.to_string()])
1247
+ .or_else(|| {
1248
+ route
1249
+ .version_ids
1250
+ .as_ref()
1251
+ .map(|values| values.iter().cloned().collect())
1252
+ })
1253
+ .unwrap_or_default(),
1254
+ ..LiveStateFilter::default()
1255
+ };
1256
+ if let Some(file_id) = route.file_id.clone() {
1257
+ filter.file_ids.push(file_id);
1258
+ }
1259
+
1260
+ LiveStateScanRequest {
1261
+ filter,
1262
+ projection,
1263
+ limit: route.contradictory.then_some(0).or(limit),
1264
+ }
1265
+ }
1266
+
1267
+ fn projection_column_names(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Vec<String> {
1268
+ projection
1269
+ .map(|indices| {
1270
+ indices
1271
+ .iter()
1272
+ .filter_map(|index| schema.fields().get(*index))
1273
+ .map(|field| field.name().to_string())
1274
+ .collect::<Vec<_>>()
1275
+ })
1276
+ .unwrap_or_default()
1277
+ }
1278
+
1279
+ fn merge_string_route_slot(
1280
+ slot: &mut Option<BTreeSet<String>>,
1281
+ values: BTreeSet<String>,
1282
+ contradictory: &mut bool,
1283
+ ) {
1284
+ if values.is_empty() {
1285
+ return;
1286
+ }
1287
+
1288
+ match slot {
1289
+ Some(existing) => {
1290
+ existing.retain(|value| values.contains(value));
1291
+ if existing.is_empty() {
1292
+ *contradictory = true;
1293
+ }
1294
+ }
1295
+ None => *slot = Some(values),
1296
+ }
1297
+ }
1298
+
1299
+ fn merge_nullable_key_route_slot(
1300
+ slot: &mut Option<NullableKeyFilter<String>>,
1301
+ value: NullableKeyFilter<String>,
1302
+ contradictory: &mut bool,
1303
+ ) {
1304
+ match slot {
1305
+ Some(existing) if *existing != value => *contradictory = true,
1306
+ Some(_) => {}
1307
+ None => *slot = Some(value),
1308
+ }
1309
+ }
1310
+
1311
+ fn parse_lix_state_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1312
+ parse_lix_state_filters(expr)?.into_iter().next()
1313
+ }
1314
+
1315
+ fn parse_lix_state_filters(expr: &Expr) -> Option<Vec<LixStateFilterPredicate>> {
1316
+ match expr {
1317
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
1318
+ let mut predicates = parse_lix_state_filters(&binary_expr.left)?;
1319
+ predicates.extend(parse_lix_state_filters(&binary_expr.right)?);
1320
+ Some(predicates)
1321
+ }
1322
+ Expr::BinaryExpr(binary_expr) => {
1323
+ parse_lix_state_binary_filter(binary_expr).map(|predicate| vec![predicate])
1324
+ }
1325
+ Expr::InList(in_list) => {
1326
+ parse_lix_state_in_list_filter(in_list).map(|predicate| vec![predicate])
1327
+ }
1328
+ Expr::IsNull(expr) => parse_lix_state_null_filter(expr).map(|predicate| vec![predicate]),
1329
+ _ => None,
1330
+ }
1331
+ }
1332
+
1333
+ fn parse_lix_state_binary_filter(binary_expr: &BinaryExpr) -> Option<LixStateFilterPredicate> {
1334
+ if binary_expr.op != Operator::Eq {
1335
+ return None;
1336
+ }
1337
+
1338
+ parse_lix_state_column_literal_filter(&binary_expr.left, &binary_expr.right)
1339
+ .or_else(|| parse_lix_state_column_literal_filter(&binary_expr.right, &binary_expr.left))
1340
+ }
1341
+
1342
+ fn parse_lix_state_in_list_filter(in_list: &InList) -> Option<LixStateFilterPredicate> {
1343
+ if in_list.negated {
1344
+ return None;
1345
+ }
1346
+ let Expr::Column(column) = in_list.expr.as_ref() else {
1347
+ return None;
1348
+ };
1349
+
1350
+ let values = in_list
1351
+ .list
1352
+ .iter()
1353
+ .map(string_expr_literal)
1354
+ .collect::<Option<Vec<_>>>()?;
1355
+ if values.is_empty() {
1356
+ return None;
1357
+ }
1358
+
1359
+ let values = values.into_iter().collect::<BTreeSet<_>>();
1360
+ match column.name.as_str() {
1361
+ "schema_key" => Some(LixStateFilterPredicate::SchemaKeys(values)),
1362
+ "version_id" => Some(LixStateFilterPredicate::VersionIds(values)),
1363
+ "entity_id" => Some(LixStateFilterPredicate::EntityIds(values)),
1364
+ _ => None,
1365
+ }
1366
+ }
1367
+
1368
+ fn parse_lix_state_null_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1369
+ let Expr::Column(column) = expr else {
1370
+ return None;
1371
+ };
1372
+
1373
+ match column.name.as_str() {
1374
+ "file_id" => Some(LixStateFilterPredicate::FileId(NullableKeyFilter::Null)),
1375
+ _ => None,
1376
+ }
1377
+ }
1378
+
1379
+ fn parse_lix_state_column_literal_filter(
1380
+ column_expr: &Expr,
1381
+ literal_expr: &Expr,
1382
+ ) -> Option<LixStateFilterPredicate> {
1383
+ let Expr::Column(column) = column_expr else {
1384
+ return None;
1385
+ };
1386
+
1387
+ match column.name.as_str() {
1388
+ "schema_key" => string_expr_literal(literal_expr)
1389
+ .map(|value| LixStateFilterPredicate::SchemaKeys(BTreeSet::from([value]))),
1390
+ "version_id" => string_expr_literal(literal_expr)
1391
+ .map(|value| LixStateFilterPredicate::VersionIds(BTreeSet::from([value]))),
1392
+ "entity_id" => string_expr_literal(literal_expr)
1393
+ .map(|value| LixStateFilterPredicate::EntityIds(BTreeSet::from([value]))),
1394
+ "file_id" => nullable_key_literal(literal_expr).map(LixStateFilterPredicate::FileId),
1395
+ _ => None,
1396
+ }
1397
+ }
1398
+
1399
+ fn nullable_key_literal(expr: &Expr) -> Option<NullableKeyFilter<String>> {
1400
+ if is_null_literal(expr) {
1401
+ return Some(NullableKeyFilter::Null);
1402
+ }
1403
+ string_expr_literal(expr).map(NullableKeyFilter::Value)
1404
+ }
1405
+
1406
+ fn string_expr_literal(expr: &Expr) -> Option<String> {
1407
+ let Expr::Literal(literal, _) = expr else {
1408
+ return None;
1409
+ };
1410
+ match literal {
1411
+ ScalarValue::Utf8(Some(value))
1412
+ | ScalarValue::Utf8View(Some(value))
1413
+ | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
1414
+ _ => None,
1415
+ }
1416
+ }
1417
+
1418
+ fn is_null_literal(expr: &Expr) -> bool {
1419
+ matches!(expr, Expr::Literal(ScalarValue::Null, _))
1420
+ }
1421
+
1422
+ fn lix_state_record_batch(
1423
+ schema: SchemaRef,
1424
+ rows: &[LiveStateRow],
1425
+ ) -> Result<RecordBatch, LixError> {
1426
+ if schema.fields().is_empty() {
1427
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
1428
+ return RecordBatch::try_new_with_options(schema, vec![], &options).map_err(|error| {
1429
+ LixError::new(
1430
+ "LIX_ERROR_UNKNOWN",
1431
+ format!("sql2 failed to build zero-column lix_state batch: {error}"),
1432
+ )
1433
+ });
1434
+ }
1435
+
1436
+ let columns = schema
1437
+ .fields()
1438
+ .iter()
1439
+ .map(|field| {
1440
+ Ok(match field.name().as_str() {
1441
+ "entity_id" => Arc::new(StringArray::from(
1442
+ rows.iter()
1443
+ .map(|row| row.entity_id.as_string().map(Some))
1444
+ .collect::<std::result::Result<Vec<_>, LixError>>()?,
1445
+ )) as ArrayRef,
1446
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
1447
+ "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
1448
+ "snapshot_content" => {
1449
+ string_array(rows.iter().map(|row| row.snapshot_content.as_deref()))
1450
+ }
1451
+ "metadata" => Arc::new(StringArray::from(
1452
+ rows.iter()
1453
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1454
+ .collect::<Vec<_>>(),
1455
+ )),
1456
+ "schema_version" => {
1457
+ string_array(rows.iter().map(|row| Some(row.schema_version.as_str())))
1458
+ }
1459
+ "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1460
+ "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1461
+ "global" => Arc::new(BooleanArray::from(
1462
+ rows.iter().map(|row| row.global).collect::<Vec<_>>(),
1463
+ )) as ArrayRef,
1464
+ "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
1465
+ "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
1466
+ "untracked" => Arc::new(BooleanArray::from(
1467
+ rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
1468
+ )) as ArrayRef,
1469
+ "version_id" => string_array(rows.iter().map(|row| Some(row.version_id.as_str()))),
1470
+ other => {
1471
+ return Err(LixError::new(
1472
+ "LIX_ERROR_UNKNOWN",
1473
+ format!("sql2 does not support lix_state column '{other}'"),
1474
+ ))
1475
+ }
1476
+ })
1477
+ })
1478
+ .collect::<Result<Vec<_>, _>>()?;
1479
+
1480
+ RecordBatch::try_new(schema, columns).map_err(|error| {
1481
+ LixError::new(
1482
+ "LIX_ERROR_UNKNOWN",
1483
+ format!("sql2 failed to build lix_state_by_version batch: {error}"),
1484
+ )
1485
+ })
1486
+ }
1487
+
1488
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1489
+ let values = values
1490
+ .map(|value| value.map(ToOwned::to_owned))
1491
+ .collect::<Vec<_>>();
1492
+ Arc::new(StringArray::from(values)) as ArrayRef
1493
+ }
1494
+
1495
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1496
+ let Some(projection) = projection else {
1497
+ return Ok(Arc::clone(schema));
1498
+ };
1499
+
1500
+ let projected = schema.project(projection).map_err(|error| {
1501
+ DataFusionError::Execution(format!("sql2 failed to project lix_state schema: {error}"))
1502
+ })?;
1503
+ Ok(Arc::new(projected))
1504
+ }
1505
+
1506
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1507
+ super::error::datafusion_error_to_lix_error(error)
1508
+ }
1509
+
1510
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1511
+ super::error::lix_error_to_datafusion_error(error)
1512
+ }
1513
+
1514
+ #[cfg(test)]
1515
+ mod tests {
1516
+ use super::{
1517
+ lix_state_scan_request, lix_state_schema, lix_state_write_rows_from_batch,
1518
+ parse_lix_state_filter, register_lix_state_write_providers, LixStateByVersionRoute,
1519
+ LixStateDeleteExec, LixStateFilterPredicate, LixStateInsertSink, LixStateProvider,
1520
+ LixStateUpdateExec,
1521
+ };
1522
+ use crate::binary_cas::BlobDataReader;
1523
+ use crate::functions::{
1524
+ FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1525
+ };
1526
+ use crate::sql2::dml::{InsertExec, InsertSink};
1527
+ use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1528
+ use crate::transaction::types::{StageRow, StageWrite, StageWriteMode, StageWriteOutcome};
1529
+ use crate::version::{VersionHead, VersionRefReader};
1530
+ use crate::{
1531
+ entity_identity::EntityIdentity,
1532
+ live_state::{LiveStateReader, LiveStateRow, LiveStateRowRequest, LiveStateScanRequest},
1533
+ };
1534
+ use crate::{LixError, NullableKeyFilter};
1535
+ use async_trait::async_trait;
1536
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
1537
+ use datafusion::arrow::datatypes::DataType;
1538
+ use datafusion::arrow::record_batch::RecordBatch;
1539
+ use datafusion::catalog::TableProvider;
1540
+ use datafusion::common::{Column, DataFusionError};
1541
+ use datafusion::execution::TaskContext;
1542
+ use datafusion::logical_expr::dml::InsertOp;
1543
+ use datafusion::logical_expr::expr::InList;
1544
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
1545
+ use datafusion::physical_expr::EquivalenceProperties;
1546
+ use datafusion::physical_plan::empty::EmptyExec;
1547
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
1548
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
1549
+ use datafusion::physical_plan::{
1550
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
1551
+ };
1552
+ use datafusion::prelude::SessionContext;
1553
+ use datafusion::scalar::ScalarValue;
1554
+ use futures_util::stream;
1555
+ use serde_json::json;
1556
+ use std::collections::BTreeSet;
1557
+ use std::sync::Arc;
1558
+
1559
+ struct EmptyLiveStateReader;
1560
+ struct EmptyVersionRefReader;
1561
+ #[allow(dead_code)]
1562
+ struct RowsLiveStateReader {
1563
+ rows: Vec<LiveStateRow>,
1564
+ }
1565
+ struct DummyBlobReader;
1566
+
1567
+ #[derive(Default)]
1568
+ struct DummyWriteContext {
1569
+ rows: Vec<LiveStateRow>,
1570
+ }
1571
+
1572
+ #[derive(Default)]
1573
+ struct CapturingWriteContext {
1574
+ rows: Vec<LiveStateRow>,
1575
+ writes: Vec<StageWrite>,
1576
+ }
1577
+
1578
+ struct SingleBatchExec {
1579
+ batch: RecordBatch,
1580
+ properties: Arc<PlanProperties>,
1581
+ }
1582
+
1583
+ impl std::fmt::Debug for SingleBatchExec {
1584
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1585
+ f.debug_struct("SingleBatchExec").finish()
1586
+ }
1587
+ }
1588
+
1589
+ impl SingleBatchExec {
1590
+ fn new(batch: RecordBatch) -> Self {
1591
+ let properties = PlanProperties::new(
1592
+ EquivalenceProperties::new(batch.schema()),
1593
+ Partitioning::UnknownPartitioning(1),
1594
+ EmissionType::Incremental,
1595
+ Boundedness::Bounded,
1596
+ );
1597
+ Self {
1598
+ batch,
1599
+ properties: Arc::new(properties),
1600
+ }
1601
+ }
1602
+ }
1603
+
1604
+ impl DisplayAs for SingleBatchExec {
1605
+ fn fmt_as(
1606
+ &self,
1607
+ _t: DisplayFormatType,
1608
+ f: &mut std::fmt::Formatter<'_>,
1609
+ ) -> std::fmt::Result {
1610
+ write!(f, "SingleBatchExec")
1611
+ }
1612
+ }
1613
+
1614
+ impl ExecutionPlan for SingleBatchExec {
1615
+ fn name(&self) -> &str {
1616
+ "SingleBatchExec"
1617
+ }
1618
+
1619
+ fn as_any(&self) -> &dyn std::any::Any {
1620
+ self
1621
+ }
1622
+
1623
+ fn properties(&self) -> &Arc<PlanProperties> {
1624
+ &self.properties
1625
+ }
1626
+
1627
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1628
+ Vec::new()
1629
+ }
1630
+
1631
+ fn with_new_children(
1632
+ self: Arc<Self>,
1633
+ children: Vec<Arc<dyn ExecutionPlan>>,
1634
+ ) -> datafusion::common::Result<Arc<dyn ExecutionPlan>> {
1635
+ if !children.is_empty() {
1636
+ return Err(DataFusionError::Execution(
1637
+ "SingleBatchExec does not accept children".to_string(),
1638
+ ));
1639
+ }
1640
+ Ok(self)
1641
+ }
1642
+
1643
+ fn execute(
1644
+ &self,
1645
+ partition: usize,
1646
+ _context: Arc<TaskContext>,
1647
+ ) -> datafusion::common::Result<SendableRecordBatchStream> {
1648
+ if partition != 0 {
1649
+ return Err(DataFusionError::Execution(format!(
1650
+ "SingleBatchExec only exposes one partition, got {partition}"
1651
+ )));
1652
+ }
1653
+
1654
+ let batch = self.batch.clone();
1655
+ let schema = batch.schema();
1656
+ let stream = stream::iter(vec![Ok(batch)]);
1657
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1658
+ }
1659
+ }
1660
+
1661
+ #[async_trait]
1662
+ impl LiveStateReader for EmptyLiveStateReader {
1663
+ async fn scan_rows(
1664
+ &self,
1665
+ _request: &LiveStateScanRequest,
1666
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1667
+ Ok(vec![])
1668
+ }
1669
+
1670
+ async fn load_row(
1671
+ &self,
1672
+ _request: &LiveStateRowRequest,
1673
+ ) -> Result<Option<LiveStateRow>, LixError> {
1674
+ Ok(None)
1675
+ }
1676
+ }
1677
+
1678
+ #[async_trait]
1679
+ impl VersionRefReader for EmptyVersionRefReader {
1680
+ async fn load_head(&self, _version_id: &str) -> Result<Option<VersionHead>, LixError> {
1681
+ Ok(None)
1682
+ }
1683
+
1684
+ async fn scan_heads(&self) -> Result<Vec<VersionHead>, LixError> {
1685
+ Ok(Vec::new())
1686
+ }
1687
+ }
1688
+
1689
+ fn empty_version_ref() -> Arc<dyn VersionRefReader> {
1690
+ Arc::new(EmptyVersionRefReader)
1691
+ }
1692
+
1693
+ #[async_trait]
1694
+ impl LiveStateReader for RowsLiveStateReader {
1695
+ async fn scan_rows(
1696
+ &self,
1697
+ _request: &LiveStateScanRequest,
1698
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1699
+ Ok(self.rows.clone())
1700
+ }
1701
+
1702
+ async fn load_row(
1703
+ &self,
1704
+ _request: &LiveStateRowRequest,
1705
+ ) -> Result<Option<LiveStateRow>, LixError> {
1706
+ Ok(None)
1707
+ }
1708
+ }
1709
+
1710
+ fn test_functions() -> FunctionProviderHandle {
1711
+ SharedFunctionProvider::new(
1712
+ Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1713
+ )
1714
+ }
1715
+
1716
+ #[async_trait]
1717
+ impl BlobDataReader for DummyBlobReader {
1718
+ async fn load_bytes_many(
1719
+ &self,
1720
+ hashes: &[crate::binary_cas::BlobHash],
1721
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1722
+ Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
1723
+ }
1724
+ }
1725
+
1726
+ #[async_trait]
1727
+ impl SqlWriteExecutionContext for DummyWriteContext {
1728
+ fn active_version_id(&self) -> &str {
1729
+ "version-a"
1730
+ }
1731
+
1732
+ fn functions(&self) -> FunctionProviderHandle {
1733
+ test_functions()
1734
+ }
1735
+
1736
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1737
+ Ok(Vec::new())
1738
+ }
1739
+
1740
+ async fn load_bytes_many(
1741
+ &mut self,
1742
+ hashes: &[crate::binary_cas::BlobHash],
1743
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1744
+ DummyBlobReader.load_bytes_many(hashes).await
1745
+ }
1746
+
1747
+ async fn scan_live_state(
1748
+ &mut self,
1749
+ _request: &LiveStateScanRequest,
1750
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1751
+ Ok(self.rows.clone())
1752
+ }
1753
+
1754
+ async fn load_version_head(
1755
+ &mut self,
1756
+ version_id: &str,
1757
+ ) -> Result<Option<String>, LixError> {
1758
+ if version_id == "ghost-version" {
1759
+ return Ok(None);
1760
+ }
1761
+ Ok(Some(format!("commit-{version_id}")))
1762
+ }
1763
+
1764
+ async fn stage_write(&mut self, _write: StageWrite) -> Result<StageWriteOutcome, LixError> {
1765
+ Ok(StageWriteOutcome { count: 0 })
1766
+ }
1767
+ }
1768
+
1769
+ #[async_trait]
1770
+ impl SqlWriteExecutionContext for CapturingWriteContext {
1771
+ fn active_version_id(&self) -> &str {
1772
+ "version-a"
1773
+ }
1774
+
1775
+ fn functions(&self) -> FunctionProviderHandle {
1776
+ test_functions()
1777
+ }
1778
+
1779
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1780
+ Ok(Vec::new())
1781
+ }
1782
+
1783
+ async fn load_bytes_many(
1784
+ &mut self,
1785
+ hashes: &[crate::binary_cas::BlobHash],
1786
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1787
+ DummyBlobReader.load_bytes_many(hashes).await
1788
+ }
1789
+
1790
+ async fn scan_live_state(
1791
+ &mut self,
1792
+ _request: &LiveStateScanRequest,
1793
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1794
+ Ok(self.rows.clone())
1795
+ }
1796
+
1797
+ async fn load_version_head(
1798
+ &mut self,
1799
+ version_id: &str,
1800
+ ) -> Result<Option<String>, LixError> {
1801
+ if version_id == "ghost-version" {
1802
+ return Ok(None);
1803
+ }
1804
+ Ok(Some(format!("commit-{version_id}")))
1805
+ }
1806
+
1807
+ async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
1808
+ self.writes.push(write);
1809
+ Ok(StageWriteOutcome { count: 0 })
1810
+ }
1811
+ }
1812
+
1813
+ fn col(name: &str) -> Expr {
1814
+ Expr::Column(Column::from_name(name))
1815
+ }
1816
+
1817
+ fn str_lit(value: &str) -> Expr {
1818
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
1819
+ }
1820
+
1821
+ fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
1822
+ Arc::new(StringArray::from(values)) as ArrayRef
1823
+ }
1824
+
1825
+ fn one_row_lix_state_batch(global: bool) -> RecordBatch {
1826
+ RecordBatch::try_new(
1827
+ lix_state_schema(),
1828
+ vec![
1829
+ string_column(vec![Some("entity-1")]),
1830
+ string_column(vec![Some("lix_key_value")]),
1831
+ string_column(vec![None]),
1832
+ string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1833
+ string_column(vec![Some("{\"source\":\"test\"}")]),
1834
+ string_column(vec![Some("1")]),
1835
+ string_column(vec![Some("2026-04-23T00:00:00Z")]),
1836
+ string_column(vec![Some("2026-04-23T01:00:00Z")]),
1837
+ Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
1838
+ string_column(vec![Some("change-a")]),
1839
+ string_column(vec![None]),
1840
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1841
+ ],
1842
+ )
1843
+ .expect("valid lix_state batch")
1844
+ }
1845
+
1846
+ fn one_row_stageable_lix_state_batch() -> RecordBatch {
1847
+ RecordBatch::try_new(
1848
+ lix_state_schema(),
1849
+ vec![
1850
+ string_column(vec![Some("entity-1")]),
1851
+ string_column(vec![Some("lix_key_value")]),
1852
+ string_column(vec![None]),
1853
+ string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1854
+ string_column(vec![None]),
1855
+ string_column(vec![Some("1")]),
1856
+ string_column(vec![None]),
1857
+ string_column(vec![None]),
1858
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1859
+ string_column(vec![None]),
1860
+ string_column(vec![None]),
1861
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1862
+ ],
1863
+ )
1864
+ .expect("valid stageable lix_state batch")
1865
+ }
1866
+
1867
+ fn live_row(entity_id: &str, metadata: Option<&str>) -> LiveStateRow {
1868
+ LiveStateRow {
1869
+ entity_id: EntityIdentity::from_string(entity_id).expect("entity id should decode"),
1870
+ schema_key: "lix_key_value".to_string(),
1871
+ file_id: None,
1872
+ snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
1873
+ metadata: metadata.map(|value| {
1874
+ serde_json::from_str(value).expect("test metadata should be valid JSON")
1875
+ }),
1876
+ schema_version: "1".to_string(),
1877
+ version_id: "version-a".to_string(),
1878
+ change_id: Some(format!("change-{entity_id}")),
1879
+ commit_id: Some(format!("commit-{entity_id}")),
1880
+ global: false,
1881
+ untracked: false,
1882
+ created_at: "2026-04-23T00:00:00Z".to_string(),
1883
+ updated_at: "2026-04-23T01:00:00Z".to_string(),
1884
+ }
1885
+ }
1886
+
1887
+ #[test]
1888
+ fn parses_eq_filter_for_schema_key() {
1889
+ let expr = Expr::BinaryExpr(BinaryExpr::new(
1890
+ Box::new(col("schema_key")),
1891
+ Operator::Eq,
1892
+ Box::new(str_lit("profile")),
1893
+ ));
1894
+
1895
+ assert_eq!(
1896
+ parse_lix_state_filter(&expr),
1897
+ Some(LixStateFilterPredicate::SchemaKeys(BTreeSet::from([
1898
+ "profile".to_string(),
1899
+ ])))
1900
+ );
1901
+ }
1902
+
1903
+ #[test]
1904
+ fn parses_in_list_filter_for_version_id() {
1905
+ let expr = Expr::InList(InList::new(
1906
+ Box::new(col("version_id")),
1907
+ vec![str_lit("a"), str_lit("b")],
1908
+ false,
1909
+ ));
1910
+
1911
+ assert_eq!(
1912
+ parse_lix_state_filter(&expr),
1913
+ Some(LixStateFilterPredicate::VersionIds(BTreeSet::from([
1914
+ "a".to_string(),
1915
+ "b".to_string(),
1916
+ ])))
1917
+ );
1918
+ }
1919
+
1920
+ #[test]
1921
+ fn builds_scan_request_from_route_and_projection() {
1922
+ let schema = super::lix_state_by_version_schema();
1923
+ let route = LixStateByVersionRoute::from_filters(&[
1924
+ Expr::BinaryExpr(BinaryExpr::new(
1925
+ Box::new(col("schema_key")),
1926
+ Operator::Eq,
1927
+ Box::new(str_lit("profile")),
1928
+ )),
1929
+ Expr::BinaryExpr(BinaryExpr::new(
1930
+ Box::new(col("version_id")),
1931
+ Operator::Eq,
1932
+ Box::new(str_lit("v1")),
1933
+ )),
1934
+ Expr::IsNull(Box::new(col("file_id"))),
1935
+ ]);
1936
+
1937
+ let request =
1938
+ lix_state_scan_request(&schema, None, Some(&vec![0, 1, 12]), &route, Some(10));
1939
+
1940
+ assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
1941
+ assert_eq!(request.filter.version_ids, vec!["v1".to_string()]);
1942
+ assert_eq!(request.filter.file_ids, vec![NullableKeyFilter::Null]);
1943
+ assert_eq!(
1944
+ request.projection.columns,
1945
+ vec![
1946
+ "entity_id".to_string(),
1947
+ "schema_key".to_string(),
1948
+ "version_id".to_string()
1949
+ ]
1950
+ );
1951
+ assert_eq!(request.limit, Some(10));
1952
+ }
1953
+
1954
+ #[test]
1955
+ fn builds_route_from_and_filter_tree() {
1956
+ let route = LixStateByVersionRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
1957
+ Box::new(Expr::BinaryExpr(BinaryExpr::new(
1958
+ Box::new(col("entity_id")),
1959
+ Operator::Eq,
1960
+ Box::new(str_lit("entity-a")),
1961
+ ))),
1962
+ Operator::And,
1963
+ Box::new(Expr::InList(InList::new(
1964
+ Box::new(col("version_id")),
1965
+ vec![str_lit("version-a"), str_lit("global")],
1966
+ false,
1967
+ ))),
1968
+ ))]);
1969
+
1970
+ assert_eq!(
1971
+ route.entity_ids,
1972
+ Some(BTreeSet::from(["entity-a".to_string()]))
1973
+ );
1974
+ assert_eq!(
1975
+ route.version_ids,
1976
+ Some(BTreeSet::from([
1977
+ "global".to_string(),
1978
+ "version-a".to_string()
1979
+ ]))
1980
+ );
1981
+ }
1982
+
1983
+ #[test]
1984
+ fn contradictory_filters_turn_into_zero_limit_request() {
1985
+ let schema = super::lix_state_by_version_schema();
1986
+ let route = LixStateByVersionRoute::from_filters(&[
1987
+ Expr::BinaryExpr(BinaryExpr::new(
1988
+ Box::new(col("schema_key")),
1989
+ Operator::Eq,
1990
+ Box::new(str_lit("a")),
1991
+ )),
1992
+ Expr::BinaryExpr(BinaryExpr::new(
1993
+ Box::new(col("schema_key")),
1994
+ Operator::Eq,
1995
+ Box::new(str_lit("b")),
1996
+ )),
1997
+ ]);
1998
+
1999
+ let request = lix_state_scan_request(&schema, None, None, &route, None);
2000
+
2001
+ assert_eq!(request.limit, Some(0));
2002
+ assert!(request.filter.schema_keys.is_empty());
2003
+ }
2004
+
2005
+ #[test]
2006
+ fn active_version_view_pins_version_filter() {
2007
+ let schema = super::lix_state_schema();
2008
+ let route = LixStateByVersionRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
2009
+ Box::new(col("schema_key")),
2010
+ Operator::Eq,
2011
+ Box::new(str_lit("profile")),
2012
+ ))]);
2013
+
2014
+ let request = lix_state_scan_request(&schema, Some("version-a"), None, &route, None);
2015
+
2016
+ assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
2017
+ assert_eq!(request.filter.version_ids, vec!["version-a".to_string()]);
2018
+ }
2019
+
2020
+ #[tokio::test]
2021
+ async fn registers_active_lix_state_with_write_context_only() {
2022
+ let session = SessionContext::new();
2023
+ let mut write_context = DummyWriteContext::default();
2024
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2025
+
2026
+ register_lix_state_write_providers(&session, write_ctx)
2027
+ .await
2028
+ .expect("lix_state providers should register");
2029
+
2030
+ let lix_state = session
2031
+ .table_provider("lix_state")
2032
+ .await
2033
+ .expect("lix_state provider should exist");
2034
+ let lix_state = lix_state
2035
+ .as_any()
2036
+ .downcast_ref::<LixStateProvider>()
2037
+ .expect("lix_state should be a LixStateProvider");
2038
+ assert!(lix_state.write_access.is_write());
2039
+
2040
+ let by_version = session
2041
+ .table_provider("lix_state_by_version")
2042
+ .await
2043
+ .expect("lix_state_by_version provider should exist");
2044
+ let by_version = by_version
2045
+ .as_any()
2046
+ .downcast_ref::<LixStateProvider>()
2047
+ .expect("lix_state_by_version should be a LixStateProvider");
2048
+ assert!(by_version.write_access.is_write());
2049
+ }
2050
+
2051
+ #[tokio::test]
2052
+ async fn insert_into_requires_write_transaction() {
2053
+ let session = SessionContext::new();
2054
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2055
+ let provider =
2056
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2057
+ let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2058
+
2059
+ let error = provider
2060
+ .insert_into(&session.state(), input, InsertOp::Append)
2061
+ .await
2062
+ .expect_err("insert without a write context should fail");
2063
+
2064
+ assert!(
2065
+ error.to_string().contains("requires a write transaction"),
2066
+ "unexpected error: {error}"
2067
+ );
2068
+ }
2069
+
2070
+ #[tokio::test]
2071
+ async fn update_requires_write_transaction() {
2072
+ let session = SessionContext::new();
2073
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2074
+ let provider =
2075
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2076
+
2077
+ let error = provider
2078
+ .update(
2079
+ &session.state(),
2080
+ vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2081
+ vec![],
2082
+ )
2083
+ .await
2084
+ .expect_err("update without a write context should fail");
2085
+
2086
+ assert!(
2087
+ error.to_string().contains("requires a write transaction"),
2088
+ "unexpected error: {error}"
2089
+ );
2090
+ }
2091
+
2092
+ #[tokio::test]
2093
+ async fn delete_requires_write_transaction() {
2094
+ let session = SessionContext::new();
2095
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2096
+ let provider =
2097
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2098
+
2099
+ let error = provider
2100
+ .delete_from(&session.state(), vec![])
2101
+ .await
2102
+ .expect_err("delete without a write context should fail");
2103
+
2104
+ assert!(
2105
+ error.to_string().contains("requires a write transaction"),
2106
+ "unexpected error: {error}"
2107
+ );
2108
+ }
2109
+
2110
+ #[tokio::test]
2111
+ async fn delete_returns_lix_state_delete_exec_with_write_ctx() {
2112
+ let session = SessionContext::new();
2113
+ let mut write_context = DummyWriteContext::default();
2114
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2115
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2116
+
2117
+ let plan = provider
2118
+ .delete_from(&session.state(), vec![])
2119
+ .await
2120
+ .expect("delete should produce a write plan");
2121
+
2122
+ assert!(plan.as_any().is::<LixStateDeleteExec>());
2123
+ }
2124
+
2125
+ #[tokio::test]
2126
+ async fn update_rejects_read_only_lix_state_columns() {
2127
+ let session = SessionContext::new();
2128
+ let mut write_context = DummyWriteContext::default();
2129
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2130
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2131
+
2132
+ let error = provider
2133
+ .update(
2134
+ &session.state(),
2135
+ vec![("entity_id".to_string(), str_lit("entity-2"))],
2136
+ vec![],
2137
+ )
2138
+ .await
2139
+ .expect_err("updating a read-only field should fail");
2140
+
2141
+ assert!(
2142
+ error.to_string().contains("read-only column 'entity_id'"),
2143
+ "unexpected error: {error}"
2144
+ );
2145
+ }
2146
+
2147
+ #[tokio::test]
2148
+ async fn update_returns_lix_state_update_exec_with_write_ctx() {
2149
+ let session = SessionContext::new();
2150
+ let mut write_context = DummyWriteContext::default();
2151
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2152
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2153
+
2154
+ let plan = provider
2155
+ .update(
2156
+ &session.state(),
2157
+ vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2158
+ vec![],
2159
+ )
2160
+ .await
2161
+ .expect("update should produce a write plan");
2162
+
2163
+ assert!(plan.as_any().is::<LixStateUpdateExec>());
2164
+ }
2165
+
2166
+ #[tokio::test]
2167
+ async fn insert_into_returns_data_sink_exec_with_write_ctx() {
2168
+ let session = SessionContext::new();
2169
+ let mut write_context = DummyWriteContext::default();
2170
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2171
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2172
+ let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2173
+
2174
+ let plan = provider
2175
+ .insert_into(&session.state(), input, InsertOp::Append)
2176
+ .await
2177
+ .expect("insert should produce a write plan");
2178
+
2179
+ assert!(plan.as_any().is::<InsertExec>());
2180
+ }
2181
+
2182
+ #[test]
2183
+ fn decodes_lix_state_batch_into_write_rows() {
2184
+ let rows = lix_state_write_rows_from_batch(&one_row_lix_state_batch(false), "version-a")
2185
+ .expect("batch should decode");
2186
+
2187
+ assert_eq!(
2188
+ rows,
2189
+ vec![StageRow {
2190
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2191
+ schema_key: "lix_key_value".to_string(),
2192
+ file_id: None,
2193
+ snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
2194
+ metadata: Some(json!({"source": "test"})),
2195
+ origin: None,
2196
+ schema_version: "1".to_string(),
2197
+ created_at: Some("2026-04-23T00:00:00Z".to_string()),
2198
+ updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2199
+ global: false,
2200
+ change_id: Some("change-a".to_string()),
2201
+ commit_id: None,
2202
+ untracked: false,
2203
+ version_id: "version-a".to_string(),
2204
+ }]
2205
+ );
2206
+ }
2207
+
2208
+ #[test]
2209
+ fn decodes_global_lix_state_batch_into_global_version() {
2210
+ let rows = lix_state_write_rows_from_batch(&one_row_lix_state_batch(true), "version-a")
2211
+ .expect("batch should decode");
2212
+
2213
+ assert_eq!(rows[0].version_id, "global");
2214
+ assert!(rows[0].global);
2215
+ }
2216
+
2217
+ #[tokio::test]
2218
+ async fn insert_sink_stages_decoded_lix_state_rows() {
2219
+ let mut write_context = CapturingWriteContext::default();
2220
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2221
+ let sink = LixStateInsertSink::new(lix_state_schema(), write_ctx, "version-a".to_string());
2222
+ let batch = one_row_lix_state_batch(false);
2223
+ let count = sink
2224
+ .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2225
+ .await
2226
+ .expect("sink should stage write");
2227
+
2228
+ assert_eq!(count, 1);
2229
+ assert_eq!(
2230
+ write_context.writes.as_slice(),
2231
+ &[StageWrite::Rows {
2232
+ mode: StageWriteMode::Insert,
2233
+ rows: vec![StageRow {
2234
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2235
+ schema_key: "lix_key_value".to_string(),
2236
+ file_id: None,
2237
+ snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
2238
+ metadata: Some(json!({"source": "test"})),
2239
+ origin: None,
2240
+ schema_version: "1".to_string(),
2241
+ created_at: Some("2026-04-23T00:00:00Z".to_string()),
2242
+ updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2243
+ global: false,
2244
+ change_id: Some("change-a".to_string()),
2245
+ commit_id: None,
2246
+ untracked: false,
2247
+ version_id: "version-a".to_string(),
2248
+ }]
2249
+ }]
2250
+ );
2251
+ }
2252
+
2253
+ #[tokio::test]
2254
+ async fn insert_plan_returns_datafusion_count_uint64() {
2255
+ let session = SessionContext::new();
2256
+ let mut write_context = CapturingWriteContext::default();
2257
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2258
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2259
+ let input = Arc::new(SingleBatchExec::new(one_row_stageable_lix_state_batch()))
2260
+ as Arc<dyn ExecutionPlan>;
2261
+
2262
+ let plan = provider
2263
+ .insert_into(&session.state(), input, InsertOp::Append)
2264
+ .await
2265
+ .expect("insert should produce a write plan");
2266
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2267
+ .await
2268
+ .expect("insert write plan should execute");
2269
+
2270
+ assert_eq!(batches.len(), 1);
2271
+ assert_eq!(batches[0].num_rows(), 1);
2272
+ assert_eq!(batches[0].num_columns(), 1);
2273
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2274
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2275
+ assert!(!batches[0].schema().field(0).is_nullable());
2276
+
2277
+ let count = batches[0]
2278
+ .column(0)
2279
+ .as_any()
2280
+ .downcast_ref::<UInt64Array>()
2281
+ .expect("count should be UInt64");
2282
+ assert_eq!(count.value(0), 1);
2283
+ assert_eq!(write_context.writes.len(), 1);
2284
+ }
2285
+
2286
+ #[tokio::test]
2287
+ async fn update_plan_evaluates_filters_assignments_and_stages_rows() {
2288
+ let session = SessionContext::new();
2289
+ let mut write_context = CapturingWriteContext {
2290
+ rows: vec![
2291
+ live_row("entity-1", Some("{\"source\":\"match\"}")),
2292
+ live_row("entity-2", Some("{\"source\":\"skip\"}")),
2293
+ ],
2294
+ writes: Vec::new(),
2295
+ };
2296
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2297
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2298
+
2299
+ let plan = provider
2300
+ .update(
2301
+ &session.state(),
2302
+ vec![
2303
+ (
2304
+ "snapshot_content".to_string(),
2305
+ str_lit("{\"key\":\"hello\",\"value\":\"updated\"}"),
2306
+ ),
2307
+ (
2308
+ "metadata".to_string(),
2309
+ str_lit("{\"schema_key\":\"lix_key_value\"}"),
2310
+ ),
2311
+ ],
2312
+ vec![Expr::BinaryExpr(BinaryExpr::new(
2313
+ Box::new(col("metadata")),
2314
+ Operator::Eq,
2315
+ Box::new(str_lit("{\"source\":\"match\"}")),
2316
+ ))],
2317
+ )
2318
+ .await
2319
+ .expect("update should produce a write plan");
2320
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2321
+ .await
2322
+ .expect("update write plan should execute");
2323
+
2324
+ assert_eq!(batches.len(), 1);
2325
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2326
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2327
+ let count = batches[0]
2328
+ .column(0)
2329
+ .as_any()
2330
+ .downcast_ref::<UInt64Array>()
2331
+ .expect("count should be UInt64");
2332
+ assert_eq!(count.value(0), 1);
2333
+
2334
+ assert_eq!(
2335
+ write_context.writes.as_slice(),
2336
+ &[StageWrite::Rows {
2337
+ mode: StageWriteMode::Replace,
2338
+ rows: vec![StageRow {
2339
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2340
+ schema_key: "lix_key_value".to_string(),
2341
+ file_id: None,
2342
+ snapshot_content: Some("{\"key\":\"hello\",\"value\":\"updated\"}".to_string()),
2343
+ metadata: Some(json!({"schema_key": "lix_key_value"})),
2344
+ origin: None,
2345
+ schema_version: "1".to_string(),
2346
+ created_at: None,
2347
+ updated_at: None,
2348
+ global: false,
2349
+ change_id: None,
2350
+ commit_id: None,
2351
+ untracked: false,
2352
+ version_id: "version-a".to_string(),
2353
+ }]
2354
+ }]
2355
+ );
2356
+ }
2357
+
2358
+ #[tokio::test]
2359
+ async fn delete_plan_with_empty_filters_stages_all_visible_rows() {
2360
+ let session = SessionContext::new();
2361
+ let mut write_context = CapturingWriteContext {
2362
+ rows: vec![
2363
+ live_row("entity-1", Some("{\"source\":\"one\"}")),
2364
+ live_row("entity-2", Some("{\"source\":\"two\"}")),
2365
+ ],
2366
+ writes: Vec::new(),
2367
+ };
2368
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2369
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2370
+
2371
+ let plan = provider
2372
+ .delete_from(&session.state(), vec![])
2373
+ .await
2374
+ .expect("delete should produce a write plan");
2375
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2376
+ .await
2377
+ .expect("delete write plan should execute");
2378
+
2379
+ assert_eq!(batches.len(), 1);
2380
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2381
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2382
+ let count = batches[0]
2383
+ .column(0)
2384
+ .as_any()
2385
+ .downcast_ref::<UInt64Array>()
2386
+ .expect("count should be UInt64");
2387
+ assert_eq!(count.value(0), 2);
2388
+
2389
+ assert_eq!(
2390
+ write_context.writes.as_slice(),
2391
+ &[StageWrite::Rows {
2392
+ mode: StageWriteMode::Replace,
2393
+ rows: vec![
2394
+ StageRow {
2395
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2396
+ schema_key: "lix_key_value".to_string(),
2397
+ file_id: None,
2398
+ snapshot_content: None,
2399
+ metadata: Some(json!({"source": "one"})),
2400
+ origin: None,
2401
+ schema_version: "1".to_string(),
2402
+ created_at: None,
2403
+ updated_at: None,
2404
+ global: false,
2405
+ change_id: None,
2406
+ commit_id: None,
2407
+ untracked: false,
2408
+ version_id: "version-a".to_string(),
2409
+ },
2410
+ StageRow {
2411
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-2")),
2412
+ schema_key: "lix_key_value".to_string(),
2413
+ file_id: None,
2414
+ snapshot_content: None,
2415
+ metadata: Some(json!({"source": "two"})),
2416
+ origin: None,
2417
+ schema_version: "1".to_string(),
2418
+ created_at: None,
2419
+ updated_at: None,
2420
+ global: false,
2421
+ change_id: None,
2422
+ commit_id: None,
2423
+ untracked: false,
2424
+ version_id: "version-a".to_string(),
2425
+ },
2426
+ ]
2427
+ }]
2428
+ );
2429
+ }
2430
+ }