@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,2512 @@
1
+ use std::any::Any;
2
+ use std::collections::BTreeSet;
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
7
+ use datafusion::arrow::compute::{and, filter_record_batch};
8
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
9
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
10
+ use datafusion::catalog::{Session, TableProvider};
11
+ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, SchemaExt};
12
+ use datafusion::datasource::TableType;
13
+ use datafusion::execution::TaskContext;
14
+ use datafusion::logical_expr::dml::InsertOp;
15
+ use datafusion::logical_expr::expr::InList;
16
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
17
+ use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
18
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
19
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
20
+ use datafusion::physical_plan::{
21
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
22
+ };
23
+ use datafusion::prelude::SessionContext;
24
+ use datafusion::scalar::ScalarValue;
25
+ use futures_util::{stream, TryStreamExt};
26
+ use serde_json::Value as JsonValue;
27
+
28
+ use crate::entity_identity::EntityIdentity;
29
+ use crate::live_state::MaterializedLiveStateRow;
30
+ use crate::live_state::{
31
+ LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
32
+ };
33
+ use crate::sql2::dml::{InsertExec, InsertSink};
34
+ use crate::sql2::read_only::reject_read_only_stage_rows;
35
+ use crate::sql2::version_scope::{resolve_provider_version_ids, VersionBinding};
36
+ use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
37
+ use crate::transaction::types::{TransactionJson, TransactionWriteRow};
38
+ use crate::version::VersionRefReader;
39
+ use crate::GLOBAL_VERSION_ID;
40
+ use crate::{parse_row_metadata_value, serialize_row_metadata, LixError, NullableKeyFilter};
41
+
42
+ use crate::sql2::{
43
+ SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
44
+ };
45
+ use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
46
+
47
+ use super::predicate_typecheck::validate_json_predicate_filters;
48
+ use super::result_metadata::json_field;
49
+
50
+ pub(crate) async fn register_lix_state_providers(
51
+ session: &SessionContext,
52
+ active_version_id: &str,
53
+ live_state: Arc<dyn LiveStateReader>,
54
+ version_ref: Arc<dyn VersionRefReader>,
55
+ ) -> Result<(), LixError> {
56
+ session
57
+ .register_table(
58
+ "lix_state_by_version",
59
+ Arc::new(LixStateProvider::by_version(
60
+ Arc::clone(&live_state),
61
+ Arc::clone(&version_ref),
62
+ )),
63
+ )
64
+ .map_err(datafusion_error_to_lix_error)?;
65
+ session
66
+ .register_table(
67
+ "lix_state",
68
+ Arc::new(LixStateProvider::active_version(
69
+ active_version_id,
70
+ live_state,
71
+ version_ref,
72
+ )),
73
+ )
74
+ .map_err(datafusion_error_to_lix_error)?;
75
+ Ok(())
76
+ }
77
+
78
+ pub(crate) async fn register_lix_state_write_providers(
79
+ session: &SessionContext,
80
+ write_ctx: SqlWriteContext,
81
+ ) -> Result<(), LixError> {
82
+ session
83
+ .register_table(
84
+ "lix_state_by_version",
85
+ Arc::new(LixStateProvider::by_version_with_write(write_ctx.clone())),
86
+ )
87
+ .map_err(datafusion_error_to_lix_error)?;
88
+ session
89
+ .register_table(
90
+ "lix_state",
91
+ Arc::new(LixStateProvider::active_version_with_write(write_ctx)),
92
+ )
93
+ .map_err(datafusion_error_to_lix_error)?;
94
+ Ok(())
95
+ }
96
+
97
+ pub(crate) struct LixStateProvider {
98
+ schema: SchemaRef,
99
+ live_state: Arc<dyn LiveStateReader>,
100
+ version_ref: Arc<dyn VersionRefReader>,
101
+ write_access: WriteAccess,
102
+ version_binding: VersionBinding,
103
+ }
104
+
105
+ impl std::fmt::Debug for LixStateProvider {
106
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107
+ f.debug_struct("LixStateProvider")
108
+ .field("write_access", &self.write_access.is_write())
109
+ .finish()
110
+ }
111
+ }
112
+
113
+ impl LixStateProvider {
114
+ pub(crate) fn active_version(
115
+ active_version_id: impl Into<String>,
116
+ live_state: Arc<dyn LiveStateReader>,
117
+ version_ref: Arc<dyn VersionRefReader>,
118
+ ) -> Self {
119
+ Self {
120
+ schema: lix_state_schema(),
121
+ live_state,
122
+ version_ref,
123
+ write_access: WriteAccess::read_only(),
124
+ version_binding: VersionBinding::active(active_version_id),
125
+ }
126
+ }
127
+
128
+ pub(crate) fn active_version_with_write(write_ctx: SqlWriteContext) -> Self {
129
+ let active_version_id = write_ctx.active_version_id();
130
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
131
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
132
+ Self {
133
+ schema: lix_state_schema(),
134
+ live_state,
135
+ version_ref,
136
+ write_access: WriteAccess::write(write_ctx),
137
+ version_binding: VersionBinding::active(active_version_id),
138
+ }
139
+ }
140
+
141
+ pub(crate) fn by_version(
142
+ live_state: Arc<dyn LiveStateReader>,
143
+ version_ref: Arc<dyn VersionRefReader>,
144
+ ) -> Self {
145
+ Self {
146
+ schema: lix_state_by_version_schema(),
147
+ live_state,
148
+ version_ref,
149
+ write_access: WriteAccess::read_only(),
150
+ version_binding: VersionBinding::explicit(),
151
+ }
152
+ }
153
+
154
+ pub(crate) fn by_version_with_write(write_ctx: SqlWriteContext) -> Self {
155
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
156
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
157
+ Self {
158
+ schema: lix_state_by_version_schema(),
159
+ live_state,
160
+ version_ref,
161
+ write_access: WriteAccess::write(write_ctx),
162
+ version_binding: VersionBinding::explicit(),
163
+ }
164
+ }
165
+ }
166
+
167
+ #[async_trait]
168
+ impl TableProvider for LixStateProvider {
169
+ fn as_any(&self) -> &dyn Any {
170
+ self
171
+ }
172
+
173
+ fn schema(&self) -> SchemaRef {
174
+ Arc::clone(&self.schema)
175
+ }
176
+
177
+ fn table_type(&self) -> TableType {
178
+ TableType::Base
179
+ }
180
+
181
+ fn supports_filters_pushdown(
182
+ &self,
183
+ filters: &[&Expr],
184
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
185
+ Ok(filters
186
+ .iter()
187
+ .map(|filter| {
188
+ if parse_lix_state_filter(filter).is_some() {
189
+ TableProviderFilterPushDown::Exact
190
+ } else {
191
+ TableProviderFilterPushDown::Unsupported
192
+ }
193
+ })
194
+ .collect())
195
+ }
196
+
197
+ async fn scan(
198
+ &self,
199
+ _state: &dyn Session,
200
+ projection: Option<&Vec<usize>>,
201
+ filters: &[Expr],
202
+ limit: Option<usize>,
203
+ ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
204
+ let route = LixStateByVersionRoute::from_filters(filters);
205
+ let projected_schema = projected_schema(&self.schema, projection)?;
206
+ let mut request = lix_state_scan_request(
207
+ &self.schema,
208
+ self.version_binding.active_version_id(),
209
+ projection,
210
+ &route,
211
+ limit,
212
+ );
213
+ if !route.contradictory {
214
+ request.filter.version_ids = resolve_provider_version_ids(
215
+ self.version_ref.as_ref(),
216
+ &self.version_binding,
217
+ request.filter.version_ids,
218
+ )
219
+ .await
220
+ .map_err(lix_error_to_datafusion_error)?;
221
+ }
222
+ Ok(Arc::new(LixStateScanExec::new(
223
+ Arc::clone(&self.live_state),
224
+ projected_schema,
225
+ request,
226
+ )))
227
+ }
228
+
229
+ async fn insert_into(
230
+ &self,
231
+ _state: &dyn Session,
232
+ input: Arc<dyn ExecutionPlan>,
233
+ insert_op: InsertOp,
234
+ ) -> Result<Arc<dyn ExecutionPlan>> {
235
+ if insert_op != InsertOp::Append {
236
+ return not_impl_err!("{insert_op} not implemented for lix_state yet");
237
+ }
238
+
239
+ let active_version_id = self
240
+ .version_binding
241
+ .require_active_version_id("INSERT")
242
+ .map_err(lix_error_to_datafusion_error)?;
243
+
244
+ let write_ctx = self.write_access.require_write("INSERT into lix_state")?;
245
+
246
+ self.schema
247
+ .logically_equivalent_names_and_types(&input.schema())?;
248
+
249
+ let sink = LixStateInsertSink::new(
250
+ Arc::clone(&self.schema),
251
+ write_ctx.clone(),
252
+ active_version_id,
253
+ );
254
+ Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
255
+ }
256
+
257
+ async fn delete_from(
258
+ &self,
259
+ state: &dyn Session,
260
+ filters: Vec<Expr>,
261
+ ) -> Result<Arc<dyn ExecutionPlan>> {
262
+ let active_version_id = self
263
+ .version_binding
264
+ .require_active_version_id("DELETE")
265
+ .map_err(lix_error_to_datafusion_error)?;
266
+
267
+ let write_ctx = self.write_access.require_write("DELETE FROM lix_state")?;
268
+
269
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
270
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
271
+ let physical_filters = filters
272
+ .iter()
273
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
274
+ .collect::<Result<Vec<_>>>()?;
275
+
276
+ let route = LixStateByVersionRoute::from_filters(&filters);
277
+ let request =
278
+ lix_state_scan_request(&self.schema, Some(&active_version_id), None, &route, None);
279
+
280
+ Ok(Arc::new(LixStateDeleteExec::new(
281
+ write_ctx.clone(),
282
+ Arc::clone(&self.schema),
283
+ active_version_id,
284
+ request,
285
+ physical_filters,
286
+ )))
287
+ }
288
+
289
+ async fn update(
290
+ &self,
291
+ state: &dyn Session,
292
+ assignments: Vec<(String, Expr)>,
293
+ filters: Vec<Expr>,
294
+ ) -> Result<Arc<dyn ExecutionPlan>> {
295
+ let active_version_id = self
296
+ .version_binding
297
+ .require_active_version_id("UPDATE")
298
+ .map_err(lix_error_to_datafusion_error)?;
299
+
300
+ let write_ctx = self.write_access.require_write("UPDATE lix_state")?;
301
+
302
+ validate_lix_state_update_assignments(&self.schema, &assignments)?;
303
+
304
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
305
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
306
+ let physical_assignments = assignments
307
+ .iter()
308
+ .map(|(column_name, expr)| {
309
+ Ok((
310
+ column_name.clone(),
311
+ create_physical_expr(expr, &df_schema, state.execution_props())?,
312
+ ))
313
+ })
314
+ .collect::<Result<Vec<_>>>()?;
315
+ let physical_filters = filters
316
+ .iter()
317
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
318
+ .collect::<Result<Vec<_>>>()?;
319
+
320
+ let route = LixStateByVersionRoute::from_filters(&filters);
321
+ let request =
322
+ lix_state_scan_request(&self.schema, Some(&active_version_id), None, &route, None);
323
+
324
+ Ok(Arc::new(LixStateUpdateExec::new(
325
+ write_ctx.clone(),
326
+ Arc::clone(&self.schema),
327
+ active_version_id,
328
+ request,
329
+ physical_assignments,
330
+ physical_filters,
331
+ )))
332
+ }
333
+ }
334
+
335
+ struct LixStateInsertSink {
336
+ write_ctx: SqlWriteContext,
337
+ version_binding: String,
338
+ }
339
+
340
+ impl std::fmt::Debug for LixStateInsertSink {
341
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
342
+ f.debug_struct("LixStateInsertSink").finish()
343
+ }
344
+ }
345
+
346
+ impl LixStateInsertSink {
347
+ fn new(_schema: SchemaRef, write_ctx: SqlWriteContext, version_binding: String) -> Self {
348
+ Self {
349
+ write_ctx,
350
+ version_binding,
351
+ }
352
+ }
353
+ }
354
+
355
+ impl DisplayAs for LixStateInsertSink {
356
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
357
+ match t {
358
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
359
+ write!(f, "LixStateInsertSink")
360
+ }
361
+ DisplayFormatType::TreeRender => write!(f, "LixStateInsertSink"),
362
+ }
363
+ }
364
+ }
365
+
366
+ #[async_trait]
367
+ impl InsertSink for LixStateInsertSink {
368
+ async fn write_batches(
369
+ &self,
370
+ batches: Vec<RecordBatch>,
371
+ _context: &Arc<TaskContext>,
372
+ ) -> Result<u64> {
373
+ let mut rows = Vec::new();
374
+ for batch in batches {
375
+ rows.extend(lix_state_write_rows_from_batch(
376
+ &batch,
377
+ &self.version_binding,
378
+ )?);
379
+ }
380
+ reject_read_only_stage_rows(&rows, "INSERT into lix_state")?;
381
+ let count = u64::try_from(rows.len())
382
+ .map_err(|_| DataFusionError::Execution("INSERT row count overflow".into()))?;
383
+
384
+ self.write_ctx
385
+ .stage_write(TransactionWrite::Rows {
386
+ mode: TransactionWriteMode::Insert,
387
+ rows,
388
+ })
389
+ .await
390
+ .map_err(lix_error_to_datafusion_error)?;
391
+
392
+ Ok(count)
393
+ }
394
+ }
395
+
396
+ #[allow(dead_code)]
397
+ struct LixStateDeleteExec {
398
+ write_ctx: SqlWriteContext,
399
+ table_schema: SchemaRef,
400
+ version_binding: String,
401
+ request: LiveStateScanRequest,
402
+ filters: Vec<Arc<dyn PhysicalExpr>>,
403
+ result_schema: SchemaRef,
404
+ properties: Arc<PlanProperties>,
405
+ }
406
+
407
+ impl std::fmt::Debug for LixStateDeleteExec {
408
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
409
+ f.debug_struct("LixStateDeleteExec").finish()
410
+ }
411
+ }
412
+
413
+ impl LixStateDeleteExec {
414
+ fn new(
415
+ write_ctx: SqlWriteContext,
416
+ table_schema: SchemaRef,
417
+ version_binding: String,
418
+ request: LiveStateScanRequest,
419
+ filters: Vec<Arc<dyn PhysicalExpr>>,
420
+ ) -> Self {
421
+ let result_schema = dml_count_schema();
422
+ let properties = PlanProperties::new(
423
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
424
+ Partitioning::UnknownPartitioning(1),
425
+ EmissionType::Final,
426
+ Boundedness::Bounded,
427
+ );
428
+ Self {
429
+ write_ctx,
430
+ table_schema,
431
+ version_binding,
432
+ request,
433
+ filters,
434
+ result_schema,
435
+ properties: Arc::new(properties),
436
+ }
437
+ }
438
+ }
439
+
440
+ impl DisplayAs for LixStateDeleteExec {
441
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442
+ match t {
443
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
444
+ write!(f, "LixStateDeleteExec(filters={})", self.filters.len())
445
+ }
446
+ DisplayFormatType::TreeRender => write!(f, "LixStateDeleteExec"),
447
+ }
448
+ }
449
+ }
450
+
451
+ impl ExecutionPlan for LixStateDeleteExec {
452
+ fn name(&self) -> &str {
453
+ "LixStateDeleteExec"
454
+ }
455
+
456
+ fn as_any(&self) -> &dyn Any {
457
+ self
458
+ }
459
+
460
+ fn properties(&self) -> &Arc<PlanProperties> {
461
+ &self.properties
462
+ }
463
+
464
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
465
+ Vec::new()
466
+ }
467
+
468
+ fn with_new_children(
469
+ self: Arc<Self>,
470
+ children: Vec<Arc<dyn ExecutionPlan>>,
471
+ ) -> Result<Arc<dyn ExecutionPlan>> {
472
+ if !children.is_empty() {
473
+ return Err(DataFusionError::Execution(
474
+ "LixStateDeleteExec does not accept children".to_string(),
475
+ ));
476
+ }
477
+ Ok(self)
478
+ }
479
+
480
+ fn execute(
481
+ &self,
482
+ partition: usize,
483
+ _context: Arc<TaskContext>,
484
+ ) -> Result<SendableRecordBatchStream> {
485
+ if partition != 0 {
486
+ return Err(DataFusionError::Execution(format!(
487
+ "LixStateDeleteExec only exposes one partition, got {partition}"
488
+ )));
489
+ }
490
+ let write_ctx = self.write_ctx.clone();
491
+ let table_schema = Arc::clone(&self.table_schema);
492
+ let version_binding = self.version_binding.clone();
493
+ let request = self.request.clone();
494
+ let filters = self.filters.clone();
495
+ let result_schema = Arc::clone(&self.result_schema);
496
+ let stream_schema = Arc::clone(&result_schema);
497
+
498
+ let stream = stream::once(async move {
499
+ let rows = if request.limit == Some(0) {
500
+ Vec::new()
501
+ } else {
502
+ write_ctx
503
+ .scan_live_state(&request)
504
+ .await
505
+ .map_err(lix_error_to_datafusion_error)?
506
+ };
507
+ let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
508
+ .map_err(lix_error_to_datafusion_error)?;
509
+ let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
510
+ let write_rows =
511
+ lix_state_deletable_write_rows_from_batch(&matched_batch, &version_binding)?;
512
+ reject_read_only_stage_rows(&write_rows, "DELETE FROM lix_state")?;
513
+ let count = u64::try_from(write_rows.len())
514
+ .map_err(|_| DataFusionError::Execution("DELETE row count overflow".to_string()))?;
515
+
516
+ if count > 0 {
517
+ write_ctx
518
+ .stage_write(TransactionWrite::Rows {
519
+ mode: TransactionWriteMode::Replace,
520
+ rows: write_rows,
521
+ })
522
+ .await
523
+ .map_err(lix_error_to_datafusion_error)?;
524
+ }
525
+
526
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
527
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
528
+ )]))
529
+ })
530
+ .try_flatten();
531
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
532
+ result_schema,
533
+ stream,
534
+ )))
535
+ }
536
+ }
537
+
538
+ #[allow(dead_code)]
539
+ struct LixStateUpdateExec {
540
+ write_ctx: SqlWriteContext,
541
+ table_schema: SchemaRef,
542
+ version_binding: String,
543
+ request: LiveStateScanRequest,
544
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
545
+ filters: Vec<Arc<dyn PhysicalExpr>>,
546
+ result_schema: SchemaRef,
547
+ properties: Arc<PlanProperties>,
548
+ }
549
+
550
+ impl std::fmt::Debug for LixStateUpdateExec {
551
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
552
+ f.debug_struct("LixStateUpdateExec").finish()
553
+ }
554
+ }
555
+
556
+ impl LixStateUpdateExec {
557
+ fn new(
558
+ write_ctx: SqlWriteContext,
559
+ table_schema: SchemaRef,
560
+ version_binding: String,
561
+ request: LiveStateScanRequest,
562
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
563
+ filters: Vec<Arc<dyn PhysicalExpr>>,
564
+ ) -> Self {
565
+ let result_schema = dml_count_schema();
566
+ let properties = PlanProperties::new(
567
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
568
+ Partitioning::UnknownPartitioning(1),
569
+ EmissionType::Final,
570
+ Boundedness::Bounded,
571
+ );
572
+ Self {
573
+ write_ctx,
574
+ table_schema,
575
+ version_binding,
576
+ request,
577
+ assignments,
578
+ filters,
579
+ result_schema,
580
+ properties: Arc::new(properties),
581
+ }
582
+ }
583
+ }
584
+
585
+ impl DisplayAs for LixStateUpdateExec {
586
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
587
+ match t {
588
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
589
+ write!(
590
+ f,
591
+ "LixStateUpdateExec(assignments={}, filters={})",
592
+ self.assignments.len(),
593
+ self.filters.len()
594
+ )
595
+ }
596
+ DisplayFormatType::TreeRender => write!(f, "LixStateUpdateExec"),
597
+ }
598
+ }
599
+ }
600
+
601
+ impl ExecutionPlan for LixStateUpdateExec {
602
+ fn name(&self) -> &str {
603
+ "LixStateUpdateExec"
604
+ }
605
+
606
+ fn as_any(&self) -> &dyn Any {
607
+ self
608
+ }
609
+
610
+ fn properties(&self) -> &Arc<PlanProperties> {
611
+ &self.properties
612
+ }
613
+
614
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
615
+ Vec::new()
616
+ }
617
+
618
+ fn with_new_children(
619
+ self: Arc<Self>,
620
+ children: Vec<Arc<dyn ExecutionPlan>>,
621
+ ) -> Result<Arc<dyn ExecutionPlan>> {
622
+ if !children.is_empty() {
623
+ return Err(DataFusionError::Execution(
624
+ "LixStateUpdateExec does not accept children".to_string(),
625
+ ));
626
+ }
627
+ Ok(self)
628
+ }
629
+
630
+ fn execute(
631
+ &self,
632
+ partition: usize,
633
+ _context: Arc<TaskContext>,
634
+ ) -> Result<SendableRecordBatchStream> {
635
+ if partition != 0 {
636
+ return Err(DataFusionError::Execution(format!(
637
+ "LixStateUpdateExec only exposes one partition, got {partition}"
638
+ )));
639
+ }
640
+ let write_ctx = self.write_ctx.clone();
641
+ let table_schema = Arc::clone(&self.table_schema);
642
+ let version_binding = self.version_binding.clone();
643
+ let request = self.request.clone();
644
+ let assignments = self.assignments.clone();
645
+ let filters = self.filters.clone();
646
+ let result_schema = Arc::clone(&self.result_schema);
647
+ let stream_schema = Arc::clone(&result_schema);
648
+
649
+ let stream = stream::once(async move {
650
+ let rows = if request.limit == Some(0) {
651
+ Vec::new()
652
+ } else {
653
+ write_ctx
654
+ .scan_live_state(&request)
655
+ .await
656
+ .map_err(lix_error_to_datafusion_error)?
657
+ };
658
+ let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
659
+ .map_err(lix_error_to_datafusion_error)?;
660
+ let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
661
+ let write_rows = lix_state_update_write_rows_from_batch(
662
+ &matched_batch,
663
+ &assignments,
664
+ &version_binding,
665
+ )?;
666
+ reject_read_only_stage_rows(&write_rows, "UPDATE lix_state")?;
667
+ let count = u64::try_from(write_rows.len())
668
+ .map_err(|_| DataFusionError::Execution("UPDATE row count overflow".to_string()))?;
669
+
670
+ if count > 0 {
671
+ write_ctx
672
+ .stage_write(TransactionWrite::Rows {
673
+ mode: TransactionWriteMode::Replace,
674
+ rows: write_rows,
675
+ })
676
+ .await
677
+ .map_err(lix_error_to_datafusion_error)?;
678
+ }
679
+
680
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
681
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
682
+ )]))
683
+ })
684
+ .try_flatten();
685
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
686
+ result_schema,
687
+ stream,
688
+ )))
689
+ }
690
+ }
691
+
692
+ fn validate_lix_state_update_assignments(
693
+ schema: &SchemaRef,
694
+ assignments: &[(String, Expr)],
695
+ ) -> Result<()> {
696
+ for (column_name, _) in assignments {
697
+ schema.field_with_name(column_name).map_err(|_| {
698
+ DataFusionError::Plan(format!(
699
+ "UPDATE lix_state failed: column '{column_name}' does not exist"
700
+ ))
701
+ })?;
702
+ if !matches!(column_name.as_str(), "snapshot_content" | "metadata") {
703
+ return Err(DataFusionError::Execution(format!(
704
+ "UPDATE lix_state cannot stage read-only column '{column_name}'"
705
+ )));
706
+ }
707
+ }
708
+ Ok(())
709
+ }
710
+
711
+ fn filter_lix_state_batch(
712
+ batch: RecordBatch,
713
+ filters: &[Arc<dyn PhysicalExpr>],
714
+ ) -> Result<RecordBatch> {
715
+ let Some(mask) = evaluate_lix_state_filters(&batch, filters)? else {
716
+ return Ok(batch);
717
+ };
718
+ Ok(filter_record_batch(&batch, &mask)?)
719
+ }
720
+
721
+ fn evaluate_lix_state_filters(
722
+ batch: &RecordBatch,
723
+ filters: &[Arc<dyn PhysicalExpr>],
724
+ ) -> Result<Option<BooleanArray>> {
725
+ if filters.is_empty() {
726
+ return Ok(None);
727
+ }
728
+
729
+ let mut combined_mask: Option<BooleanArray> = None;
730
+ for filter in filters {
731
+ let result = filter.evaluate(batch)?;
732
+ let array = result.into_array(batch.num_rows())?;
733
+ let bool_array = array
734
+ .as_any()
735
+ .downcast_ref::<BooleanArray>()
736
+ .ok_or_else(|| {
737
+ DataFusionError::Execution("UPDATE lix_state filter was not boolean".to_string())
738
+ })?;
739
+ let normalized = bool_array
740
+ .iter()
741
+ .map(|value| Some(value == Some(true)))
742
+ .collect::<BooleanArray>();
743
+ combined_mask = Some(match combined_mask {
744
+ Some(existing) => and(&existing, &normalized)?,
745
+ None => normalized,
746
+ });
747
+ }
748
+ Ok(combined_mask)
749
+ }
750
+
751
+ fn lix_state_stageable_write_rows_from_batch(
752
+ batch: &RecordBatch,
753
+ version_binding: &str,
754
+ ) -> Result<Vec<TransactionWriteRow>> {
755
+ let mut rows = lix_state_write_rows_from_batch(batch, version_binding)?;
756
+ for row in &mut rows {
757
+ row.created_at = None;
758
+ row.updated_at = None;
759
+ row.change_id = None;
760
+ row.commit_id = None;
761
+ }
762
+ Ok(rows)
763
+ }
764
+
765
+ fn lix_state_update_write_rows_from_batch(
766
+ batch: &RecordBatch,
767
+ assignments: &[(String, Arc<dyn PhysicalExpr>)],
768
+ version_binding: &str,
769
+ ) -> Result<Vec<TransactionWriteRow>> {
770
+ let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
771
+ (0..batch.num_rows())
772
+ .map(|row_index| {
773
+ let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
774
+ let version_id =
775
+ optional_string_value(batch, row_index, "version_id")?.unwrap_or_else(|| {
776
+ if global {
777
+ GLOBAL_VERSION_ID.to_string()
778
+ } else {
779
+ version_binding.to_string()
780
+ }
781
+ });
782
+
783
+ Ok(TransactionWriteRow {
784
+ entity_id: Some(
785
+ EntityIdentity::from_json_array_text(&required_string_value(
786
+ batch,
787
+ row_index,
788
+ "entity_id",
789
+ )?)
790
+ .map_err(|error| {
791
+ DataFusionError::Execution(format!(
792
+ "lix_state UPDATE has invalid entity_id: {error}"
793
+ ))
794
+ })?,
795
+ ),
796
+ schema_key: required_string_value(batch, row_index, "schema_key")?,
797
+ file_id: optional_string_value(batch, row_index, "file_id")?,
798
+ snapshot: update_optional_json_value(
799
+ batch,
800
+ &assignment_values,
801
+ row_index,
802
+ "snapshot_content",
803
+ )?,
804
+ metadata: update_optional_metadata_value(
805
+ batch,
806
+ &assignment_values,
807
+ row_index,
808
+ "metadata",
809
+ "lix_state",
810
+ )?,
811
+ origin: None,
812
+ created_at: None,
813
+ updated_at: None,
814
+ global,
815
+ change_id: None,
816
+ commit_id: None,
817
+ untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
818
+ version_id,
819
+ })
820
+ })
821
+ .collect()
822
+ }
823
+
824
+ fn lix_state_deletable_write_rows_from_batch(
825
+ batch: &RecordBatch,
826
+ version_binding: &str,
827
+ ) -> Result<Vec<TransactionWriteRow>> {
828
+ let mut rows = lix_state_stageable_write_rows_from_batch(batch, version_binding)?;
829
+ for row in &mut rows {
830
+ row.snapshot = None;
831
+ }
832
+ Ok(rows)
833
+ }
834
+
835
+ fn update_optional_string_value(
836
+ batch: &RecordBatch,
837
+ assignment_values: &UpdateAssignmentValues,
838
+ row_index: usize,
839
+ column_name: &str,
840
+ ) -> Result<Option<String>> {
841
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
842
+ InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
843
+ InsertCell::Provided(SqlCell::Value(
844
+ ScalarValue::Utf8(Some(value))
845
+ | ScalarValue::Utf8View(Some(value))
846
+ | ScalarValue::LargeUtf8(Some(value)),
847
+ )) => Ok(Some(value)),
848
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
849
+ "UPDATE lix_state expected text-compatible column '{column_name}', got {other:?}"
850
+ ))),
851
+ }
852
+ }
853
+
854
+ fn update_optional_metadata_value(
855
+ batch: &RecordBatch,
856
+ assignment_values: &UpdateAssignmentValues,
857
+ row_index: usize,
858
+ column_name: &str,
859
+ context: &str,
860
+ ) -> Result<Option<TransactionJson>> {
861
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?
862
+ .map(|value| {
863
+ let metadata = parse_row_metadata_value(&value, context)
864
+ .map_err(super::error::lix_error_to_datafusion_error)?;
865
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
866
+ .map_err(super::error::lix_error_to_datafusion_error)
867
+ })
868
+ .transpose()
869
+ }
870
+
871
+ fn update_optional_json_value(
872
+ batch: &RecordBatch,
873
+ assignment_values: &UpdateAssignmentValues,
874
+ row_index: usize,
875
+ column_name: &str,
876
+ ) -> Result<Option<TransactionJson>> {
877
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?
878
+ .map(|value| parse_snapshot_json(&value, column_name))
879
+ .transpose()
880
+ }
881
+
882
+ fn dml_count_schema() -> SchemaRef {
883
+ Arc::new(Schema::new(vec![Field::new(
884
+ "count",
885
+ DataType::UInt64,
886
+ false,
887
+ )]))
888
+ }
889
+
890
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
891
+ RecordBatch::try_new(
892
+ schema,
893
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
894
+ )
895
+ .map_err(DataFusionError::from)
896
+ }
897
+
898
+ fn lix_state_write_rows_from_batch(
899
+ batch: &RecordBatch,
900
+ version_binding: &str,
901
+ ) -> Result<Vec<TransactionWriteRow>> {
902
+ (0..batch.num_rows())
903
+ .map(|row_index| {
904
+ let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
905
+ let version_id =
906
+ optional_string_value(batch, row_index, "version_id")?.unwrap_or_else(|| {
907
+ if global {
908
+ GLOBAL_VERSION_ID.to_string()
909
+ } else {
910
+ version_binding.to_string()
911
+ }
912
+ });
913
+
914
+ Ok(TransactionWriteRow {
915
+ entity_id: Some(
916
+ EntityIdentity::from_json_array_text(&required_string_value(
917
+ batch,
918
+ row_index,
919
+ "entity_id",
920
+ )?)
921
+ .map_err(|error| {
922
+ DataFusionError::Execution(format!(
923
+ "lix_state INSERT has invalid entity_id: {error}"
924
+ ))
925
+ })?,
926
+ ),
927
+ schema_key: required_string_value(batch, row_index, "schema_key")?,
928
+ file_id: optional_string_value(batch, row_index, "file_id")?,
929
+ snapshot: optional_json_value(batch, row_index, "snapshot_content")?,
930
+ metadata: optional_metadata_value(batch, row_index, "metadata", "lix_state")?,
931
+ origin: None,
932
+ created_at: optional_string_value(batch, row_index, "created_at")?,
933
+ updated_at: optional_string_value(batch, row_index, "updated_at")?,
934
+ global,
935
+ change_id: optional_string_value(batch, row_index, "change_id")?,
936
+ commit_id: optional_string_value(batch, row_index, "commit_id")?,
937
+ untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
938
+ version_id,
939
+ })
940
+ })
941
+ .collect()
942
+ }
943
+
944
+ fn required_string_value(
945
+ batch: &RecordBatch,
946
+ row_index: usize,
947
+ column_name: &str,
948
+ ) -> Result<String> {
949
+ optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
950
+ DataFusionError::Execution(format!(
951
+ "INSERT into lix_state requires non-null text column '{column_name}'"
952
+ ))
953
+ })
954
+ }
955
+
956
+ fn optional_string_value(
957
+ batch: &RecordBatch,
958
+ row_index: usize,
959
+ column_name: &str,
960
+ ) -> Result<Option<String>> {
961
+ match optional_scalar_value(batch, row_index, column_name)? {
962
+ None
963
+ | Some(ScalarValue::Null)
964
+ | Some(ScalarValue::Utf8(None))
965
+ | Some(ScalarValue::Utf8View(None))
966
+ | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
967
+ Some(ScalarValue::Utf8(Some(value)))
968
+ | Some(ScalarValue::Utf8View(Some(value)))
969
+ | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
970
+ Some(other) => Err(DataFusionError::Execution(format!(
971
+ "INSERT into lix_state expected text-compatible column '{column_name}', got {other:?}"
972
+ ))),
973
+ }
974
+ }
975
+
976
+ fn optional_metadata_value(
977
+ batch: &RecordBatch,
978
+ row_index: usize,
979
+ column_name: &str,
980
+ context: &str,
981
+ ) -> Result<Option<TransactionJson>> {
982
+ optional_string_value(batch, row_index, column_name)?
983
+ .map(|value| {
984
+ let metadata = parse_row_metadata_value(&value, context)
985
+ .map_err(super::error::lix_error_to_datafusion_error)?;
986
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
987
+ .map_err(super::error::lix_error_to_datafusion_error)
988
+ })
989
+ .transpose()
990
+ }
991
+
992
+ fn optional_json_value(
993
+ batch: &RecordBatch,
994
+ row_index: usize,
995
+ column_name: &str,
996
+ ) -> Result<Option<TransactionJson>> {
997
+ optional_string_value(batch, row_index, column_name)?
998
+ .map(|value| parse_snapshot_json(&value, column_name))
999
+ .transpose()
1000
+ }
1001
+
1002
+ fn parse_snapshot_json(value: &str, column_name: &str) -> Result<TransactionJson> {
1003
+ let parsed = serde_json::from_str::<JsonValue>(value).map_err(|error| {
1004
+ DataFusionError::Execution(format!(
1005
+ "lix_state expected valid JSON in column '{column_name}': {error}"
1006
+ ))
1007
+ })?;
1008
+ TransactionJson::from_value(parsed, &format!("lix_state {column_name}"))
1009
+ .map_err(super::error::lix_error_to_datafusion_error)
1010
+ }
1011
+
1012
+ fn optional_bool_value(
1013
+ batch: &RecordBatch,
1014
+ row_index: usize,
1015
+ column_name: &str,
1016
+ ) -> Result<Option<bool>> {
1017
+ match optional_scalar_value(batch, row_index, column_name)? {
1018
+ Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1019
+ None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1020
+ Some(other) => Err(DataFusionError::Execution(format!(
1021
+ "INSERT into lix_state expected boolean column '{column_name}', got {other:?}"
1022
+ ))),
1023
+ }
1024
+ }
1025
+
1026
+ fn optional_scalar_value(
1027
+ batch: &RecordBatch,
1028
+ row_index: usize,
1029
+ column_name: &str,
1030
+ ) -> Result<Option<ScalarValue>> {
1031
+ let schema = batch.schema();
1032
+ let column_index = match schema.index_of(column_name) {
1033
+ Ok(column_index) => column_index,
1034
+ Err(_) => return Ok(None),
1035
+ };
1036
+
1037
+ if row_index >= batch.num_rows() {
1038
+ return Err(DataFusionError::Execution(format!(
1039
+ "row index {row_index} out of bounds for lix_state batch with {} rows",
1040
+ batch.num_rows()
1041
+ )));
1042
+ }
1043
+
1044
+ ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1045
+ .map(Some)
1046
+ .map_err(|error| {
1047
+ DataFusionError::Execution(format!(
1048
+ "failed to decode lix_state column '{column_name}' at row {row_index}: {error}"
1049
+ ))
1050
+ })
1051
+ }
1052
+
1053
+ struct LixStateScanExec {
1054
+ live_state: Arc<dyn LiveStateReader>,
1055
+ schema: SchemaRef,
1056
+ request: LiveStateScanRequest,
1057
+ properties: Arc<PlanProperties>,
1058
+ }
1059
+
1060
+ impl std::fmt::Debug for LixStateScanExec {
1061
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1062
+ f.debug_struct("LixStateScanExec").finish()
1063
+ }
1064
+ }
1065
+
1066
+ impl LixStateScanExec {
1067
+ fn new(
1068
+ live_state: Arc<dyn LiveStateReader>,
1069
+ schema: SchemaRef,
1070
+ request: LiveStateScanRequest,
1071
+ ) -> Self {
1072
+ let properties = PlanProperties::new(
1073
+ EquivalenceProperties::new(schema.clone()),
1074
+ Partitioning::UnknownPartitioning(1),
1075
+ EmissionType::Incremental,
1076
+ Boundedness::Bounded,
1077
+ );
1078
+ Self {
1079
+ live_state,
1080
+ schema,
1081
+ request,
1082
+ properties: Arc::new(properties),
1083
+ }
1084
+ }
1085
+ }
1086
+
1087
+ impl DisplayAs for LixStateScanExec {
1088
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1089
+ match t {
1090
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
1091
+ write!(f, "LixStateScanExec(limit={:?})", self.request.limit)
1092
+ }
1093
+ DisplayFormatType::TreeRender => write!(f, "LixStateScanExec"),
1094
+ }
1095
+ }
1096
+ }
1097
+
1098
+ impl ExecutionPlan for LixStateScanExec {
1099
+ fn name(&self) -> &str {
1100
+ "LixStateScanExec"
1101
+ }
1102
+
1103
+ fn as_any(&self) -> &dyn Any {
1104
+ self
1105
+ }
1106
+
1107
+ fn properties(&self) -> &Arc<PlanProperties> {
1108
+ &self.properties
1109
+ }
1110
+
1111
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1112
+ Vec::new()
1113
+ }
1114
+
1115
+ fn with_new_children(
1116
+ self: Arc<Self>,
1117
+ children: Vec<Arc<dyn ExecutionPlan>>,
1118
+ ) -> Result<Arc<dyn ExecutionPlan>> {
1119
+ if !children.is_empty() {
1120
+ return Err(DataFusionError::Execution(
1121
+ "LixStateScanExec does not accept children".to_string(),
1122
+ ));
1123
+ }
1124
+ Ok(self)
1125
+ }
1126
+
1127
+ fn execute(
1128
+ &self,
1129
+ partition: usize,
1130
+ _context: Arc<TaskContext>,
1131
+ ) -> Result<SendableRecordBatchStream> {
1132
+ if partition != 0 {
1133
+ return Err(DataFusionError::Execution(format!(
1134
+ "LixStateScanExec only exposes one partition, got {partition}"
1135
+ )));
1136
+ }
1137
+
1138
+ let live_state = Arc::clone(&self.live_state);
1139
+ let schema = Arc::clone(&self.schema);
1140
+ let request = self.request.clone();
1141
+ let stream_schema = Arc::clone(&schema);
1142
+ let stream = stream::once(async move {
1143
+ let rows = if request.limit == Some(0) {
1144
+ Vec::new()
1145
+ } else {
1146
+ live_state
1147
+ .scan_rows(&request)
1148
+ .await
1149
+ .map_err(lix_error_to_datafusion_error)?
1150
+ };
1151
+ let batch = lix_state_record_batch(Arc::clone(&stream_schema), &rows)
1152
+ .map_err(lix_error_to_datafusion_error)?;
1153
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1154
+ batch,
1155
+ )]))
1156
+ })
1157
+ .try_flatten();
1158
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1159
+ }
1160
+ }
1161
+
1162
+ fn lix_state_schema() -> SchemaRef {
1163
+ Arc::new(Schema::new(vec![
1164
+ json_field("entity_id", false),
1165
+ Field::new("schema_key", DataType::Utf8, false),
1166
+ Field::new("file_id", DataType::Utf8, true),
1167
+ json_field("snapshot_content", true),
1168
+ json_field("metadata", true),
1169
+ Field::new("created_at", DataType::Utf8, true),
1170
+ Field::new("updated_at", DataType::Utf8, true),
1171
+ Field::new("global", DataType::Boolean, true),
1172
+ Field::new("change_id", DataType::Utf8, true),
1173
+ Field::new("commit_id", DataType::Utf8, true),
1174
+ Field::new("untracked", DataType::Boolean, true),
1175
+ ]))
1176
+ }
1177
+
1178
+ fn lix_state_by_version_schema() -> SchemaRef {
1179
+ Arc::new(Schema::new(vec![
1180
+ json_field("entity_id", false),
1181
+ Field::new("schema_key", DataType::Utf8, false),
1182
+ Field::new("file_id", DataType::Utf8, true),
1183
+ json_field("snapshot_content", true),
1184
+ json_field("metadata", true),
1185
+ Field::new("created_at", DataType::Utf8, true),
1186
+ Field::new("updated_at", DataType::Utf8, true),
1187
+ Field::new("global", DataType::Boolean, true),
1188
+ Field::new("change_id", DataType::Utf8, true),
1189
+ Field::new("commit_id", DataType::Utf8, true),
1190
+ Field::new("untracked", DataType::Boolean, true),
1191
+ Field::new("version_id", DataType::Utf8, false),
1192
+ ]))
1193
+ }
1194
+
1195
+ #[derive(Debug, Clone, PartialEq, Eq, Default)]
1196
+ struct LixStateByVersionRoute {
1197
+ schema_keys: Option<BTreeSet<String>>,
1198
+ version_ids: Option<BTreeSet<String>>,
1199
+ entity_ids: Option<BTreeSet<String>>,
1200
+ file_id: Option<NullableKeyFilter<String>>,
1201
+ contradictory: bool,
1202
+ }
1203
+
1204
+ impl LixStateByVersionRoute {
1205
+ fn from_filters(filters: &[Expr]) -> Self {
1206
+ let mut route = Self::default();
1207
+ for filter in filters {
1208
+ let Some(predicates) = parse_lix_state_filters(filter) else {
1209
+ continue;
1210
+ };
1211
+ for predicate in predicates {
1212
+ match predicate {
1213
+ LixStateFilterPredicate::SchemaKeys(values) => {
1214
+ merge_string_route_slot(
1215
+ &mut route.schema_keys,
1216
+ values,
1217
+ &mut route.contradictory,
1218
+ );
1219
+ }
1220
+ LixStateFilterPredicate::VersionIds(values) => {
1221
+ merge_string_route_slot(
1222
+ &mut route.version_ids,
1223
+ values,
1224
+ &mut route.contradictory,
1225
+ );
1226
+ }
1227
+ LixStateFilterPredicate::EntityIds(values) => {
1228
+ merge_string_route_slot(
1229
+ &mut route.entity_ids,
1230
+ values,
1231
+ &mut route.contradictory,
1232
+ );
1233
+ }
1234
+ LixStateFilterPredicate::FileId(filter) => {
1235
+ merge_nullable_key_route_slot(
1236
+ &mut route.file_id,
1237
+ filter,
1238
+ &mut route.contradictory,
1239
+ );
1240
+ }
1241
+ }
1242
+ }
1243
+ }
1244
+ route
1245
+ }
1246
+ }
1247
+
1248
+ #[derive(Debug, Clone, PartialEq, Eq)]
1249
+ enum LixStateFilterPredicate {
1250
+ SchemaKeys(BTreeSet<String>),
1251
+ VersionIds(BTreeSet<String>),
1252
+ EntityIds(BTreeSet<String>),
1253
+ FileId(NullableKeyFilter<String>),
1254
+ }
1255
+
1256
+ fn lix_state_scan_request(
1257
+ schema: &SchemaRef,
1258
+ version_binding: Option<&str>,
1259
+ projection: Option<&Vec<usize>>,
1260
+ route: &LixStateByVersionRoute,
1261
+ limit: Option<usize>,
1262
+ ) -> LiveStateScanRequest {
1263
+ let projection = LiveStateProjection {
1264
+ columns: projection_column_names(schema, projection),
1265
+ };
1266
+ let mut filter = LiveStateFilter {
1267
+ schema_keys: route
1268
+ .schema_keys
1269
+ .as_ref()
1270
+ .map(|values| values.iter().cloned().collect())
1271
+ .unwrap_or_default(),
1272
+ entity_ids: route
1273
+ .entity_ids
1274
+ .as_ref()
1275
+ .map(|values| {
1276
+ values
1277
+ .iter()
1278
+ .filter_map(|value| EntityIdentity::from_json_array_text(value).ok())
1279
+ .collect()
1280
+ })
1281
+ .unwrap_or_default(),
1282
+ version_ids: version_binding
1283
+ .map(|value| vec![value.to_string()])
1284
+ .or_else(|| {
1285
+ route
1286
+ .version_ids
1287
+ .as_ref()
1288
+ .map(|values| values.iter().cloned().collect())
1289
+ })
1290
+ .unwrap_or_default(),
1291
+ ..LiveStateFilter::default()
1292
+ };
1293
+ if let Some(file_id) = route.file_id.clone() {
1294
+ filter.file_ids.push(file_id);
1295
+ }
1296
+
1297
+ LiveStateScanRequest {
1298
+ filter,
1299
+ projection,
1300
+ limit: route.contradictory.then_some(0).or(limit),
1301
+ }
1302
+ }
1303
+
1304
+ fn projection_column_names(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Vec<String> {
1305
+ projection
1306
+ .map(|indices| {
1307
+ indices
1308
+ .iter()
1309
+ .filter_map(|index| schema.fields().get(*index))
1310
+ .map(|field| field.name().to_string())
1311
+ .collect::<Vec<_>>()
1312
+ })
1313
+ .unwrap_or_default()
1314
+ }
1315
+
1316
+ fn merge_string_route_slot(
1317
+ slot: &mut Option<BTreeSet<String>>,
1318
+ values: BTreeSet<String>,
1319
+ contradictory: &mut bool,
1320
+ ) {
1321
+ if values.is_empty() {
1322
+ return;
1323
+ }
1324
+
1325
+ match slot {
1326
+ Some(existing) => {
1327
+ existing.retain(|value| values.contains(value));
1328
+ if existing.is_empty() {
1329
+ *contradictory = true;
1330
+ }
1331
+ }
1332
+ None => *slot = Some(values),
1333
+ }
1334
+ }
1335
+
1336
+ fn merge_nullable_key_route_slot(
1337
+ slot: &mut Option<NullableKeyFilter<String>>,
1338
+ value: NullableKeyFilter<String>,
1339
+ contradictory: &mut bool,
1340
+ ) {
1341
+ match slot {
1342
+ Some(existing) if *existing != value => *contradictory = true,
1343
+ Some(_) => {}
1344
+ None => *slot = Some(value),
1345
+ }
1346
+ }
1347
+
1348
+ fn parse_lix_state_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1349
+ parse_lix_state_filters(expr)?.into_iter().next()
1350
+ }
1351
+
1352
+ fn parse_lix_state_filters(expr: &Expr) -> Option<Vec<LixStateFilterPredicate>> {
1353
+ match expr {
1354
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
1355
+ let mut predicates = parse_lix_state_filters(&binary_expr.left)?;
1356
+ predicates.extend(parse_lix_state_filters(&binary_expr.right)?);
1357
+ Some(predicates)
1358
+ }
1359
+ Expr::BinaryExpr(binary_expr) => {
1360
+ parse_lix_state_binary_filter(binary_expr).map(|predicate| vec![predicate])
1361
+ }
1362
+ Expr::InList(in_list) => {
1363
+ parse_lix_state_in_list_filter(in_list).map(|predicate| vec![predicate])
1364
+ }
1365
+ Expr::IsNull(expr) => parse_lix_state_null_filter(expr).map(|predicate| vec![predicate]),
1366
+ _ => None,
1367
+ }
1368
+ }
1369
+
1370
+ fn parse_lix_state_binary_filter(binary_expr: &BinaryExpr) -> Option<LixStateFilterPredicate> {
1371
+ if binary_expr.op != Operator::Eq {
1372
+ return None;
1373
+ }
1374
+
1375
+ parse_lix_state_column_literal_filter(&binary_expr.left, &binary_expr.right)
1376
+ .or_else(|| parse_lix_state_column_literal_filter(&binary_expr.right, &binary_expr.left))
1377
+ }
1378
+
1379
+ fn parse_lix_state_in_list_filter(in_list: &InList) -> Option<LixStateFilterPredicate> {
1380
+ if in_list.negated {
1381
+ return None;
1382
+ }
1383
+ let Expr::Column(column) = in_list.expr.as_ref() else {
1384
+ return None;
1385
+ };
1386
+
1387
+ let values = in_list
1388
+ .list
1389
+ .iter()
1390
+ .map(string_expr_literal)
1391
+ .collect::<Option<Vec<_>>>()?;
1392
+ if values.is_empty() {
1393
+ return None;
1394
+ }
1395
+
1396
+ let values = values.into_iter().collect::<BTreeSet<_>>();
1397
+ match column.name.as_str() {
1398
+ "schema_key" => Some(LixStateFilterPredicate::SchemaKeys(values)),
1399
+ "version_id" => Some(LixStateFilterPredicate::VersionIds(values)),
1400
+ "entity_id" => canonical_entity_id_values(values).map(LixStateFilterPredicate::EntityIds),
1401
+ _ => None,
1402
+ }
1403
+ }
1404
+
1405
+ fn parse_lix_state_null_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1406
+ let Expr::Column(column) = expr else {
1407
+ return None;
1408
+ };
1409
+
1410
+ match column.name.as_str() {
1411
+ "file_id" => Some(LixStateFilterPredicate::FileId(NullableKeyFilter::Null)),
1412
+ _ => None,
1413
+ }
1414
+ }
1415
+
1416
+ fn parse_lix_state_column_literal_filter(
1417
+ column_expr: &Expr,
1418
+ literal_expr: &Expr,
1419
+ ) -> Option<LixStateFilterPredicate> {
1420
+ let Expr::Column(column) = column_expr else {
1421
+ return None;
1422
+ };
1423
+
1424
+ match column.name.as_str() {
1425
+ "schema_key" => string_expr_literal(literal_expr)
1426
+ .map(|value| LixStateFilterPredicate::SchemaKeys(BTreeSet::from([value]))),
1427
+ "version_id" => string_expr_literal(literal_expr)
1428
+ .map(|value| LixStateFilterPredicate::VersionIds(BTreeSet::from([value]))),
1429
+ "entity_id" => string_expr_literal(literal_expr)
1430
+ .and_then(|value| canonical_entity_id_value(&value))
1431
+ .map(|value| LixStateFilterPredicate::EntityIds(BTreeSet::from([value]))),
1432
+ "file_id" => nullable_key_literal(literal_expr).map(LixStateFilterPredicate::FileId),
1433
+ _ => None,
1434
+ }
1435
+ }
1436
+
1437
+ fn canonical_entity_id_values(values: BTreeSet<String>) -> Option<BTreeSet<String>> {
1438
+ values
1439
+ .into_iter()
1440
+ .map(|value| canonical_entity_id_value(&value))
1441
+ .collect()
1442
+ }
1443
+
1444
+ fn canonical_entity_id_value(value: &str) -> Option<String> {
1445
+ EntityIdentity::from_json_array_text(value)
1446
+ .ok()?
1447
+ .as_json_array_text()
1448
+ .ok()
1449
+ }
1450
+
1451
+ fn nullable_key_literal(expr: &Expr) -> Option<NullableKeyFilter<String>> {
1452
+ if is_null_literal(expr) {
1453
+ return Some(NullableKeyFilter::Null);
1454
+ }
1455
+ string_expr_literal(expr).map(NullableKeyFilter::Value)
1456
+ }
1457
+
1458
+ fn string_expr_literal(expr: &Expr) -> Option<String> {
1459
+ let Expr::Literal(literal, _) = expr else {
1460
+ return None;
1461
+ };
1462
+ match literal {
1463
+ ScalarValue::Utf8(Some(value))
1464
+ | ScalarValue::Utf8View(Some(value))
1465
+ | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
1466
+ _ => None,
1467
+ }
1468
+ }
1469
+
1470
+ fn is_null_literal(expr: &Expr) -> bool {
1471
+ matches!(expr, Expr::Literal(ScalarValue::Null, _))
1472
+ }
1473
+
1474
+ fn lix_state_record_batch(
1475
+ schema: SchemaRef,
1476
+ rows: &[MaterializedLiveStateRow],
1477
+ ) -> Result<RecordBatch, LixError> {
1478
+ if schema.fields().is_empty() {
1479
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
1480
+ return RecordBatch::try_new_with_options(schema, vec![], &options).map_err(|error| {
1481
+ LixError::new(
1482
+ "LIX_ERROR_UNKNOWN",
1483
+ format!("sql2 failed to build zero-column lix_state batch: {error}"),
1484
+ )
1485
+ });
1486
+ }
1487
+
1488
+ let columns = schema
1489
+ .fields()
1490
+ .iter()
1491
+ .map(|field| {
1492
+ Ok(match field.name().as_str() {
1493
+ "entity_id" => Arc::new(StringArray::from(
1494
+ rows.iter()
1495
+ .map(|row| row.entity_id.as_json_array_text().map(Some))
1496
+ .collect::<std::result::Result<Vec<_>, LixError>>()?,
1497
+ )) as ArrayRef,
1498
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
1499
+ "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
1500
+ "snapshot_content" => {
1501
+ string_array(rows.iter().map(|row| row.snapshot_content.as_deref()))
1502
+ }
1503
+ "metadata" => Arc::new(StringArray::from(
1504
+ rows.iter()
1505
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1506
+ .collect::<Vec<_>>(),
1507
+ )),
1508
+ "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1509
+ "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1510
+ "global" => Arc::new(BooleanArray::from(
1511
+ rows.iter().map(|row| row.global).collect::<Vec<_>>(),
1512
+ )) as ArrayRef,
1513
+ "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
1514
+ "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
1515
+ "untracked" => Arc::new(BooleanArray::from(
1516
+ rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
1517
+ )) as ArrayRef,
1518
+ "version_id" => string_array(rows.iter().map(|row| Some(row.version_id.as_str()))),
1519
+ other => {
1520
+ return Err(LixError::new(
1521
+ "LIX_ERROR_UNKNOWN",
1522
+ format!("sql2 does not support lix_state column '{other}'"),
1523
+ ))
1524
+ }
1525
+ })
1526
+ })
1527
+ .collect::<Result<Vec<_>, _>>()?;
1528
+
1529
+ RecordBatch::try_new(schema, columns).map_err(|error| {
1530
+ LixError::new(
1531
+ "LIX_ERROR_UNKNOWN",
1532
+ format!("sql2 failed to build lix_state_by_version batch: {error}"),
1533
+ )
1534
+ })
1535
+ }
1536
+
1537
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1538
+ let values = values
1539
+ .map(|value| value.map(ToOwned::to_owned))
1540
+ .collect::<Vec<_>>();
1541
+ Arc::new(StringArray::from(values)) as ArrayRef
1542
+ }
1543
+
1544
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1545
+ let Some(projection) = projection else {
1546
+ return Ok(Arc::clone(schema));
1547
+ };
1548
+
1549
+ let projected = schema.project(projection).map_err(|error| {
1550
+ DataFusionError::Execution(format!("sql2 failed to project lix_state schema: {error}"))
1551
+ })?;
1552
+ Ok(Arc::new(projected))
1553
+ }
1554
+
1555
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1556
+ super::error::datafusion_error_to_lix_error(error)
1557
+ }
1558
+
1559
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1560
+ super::error::lix_error_to_datafusion_error(error)
1561
+ }
1562
+
1563
+ #[cfg(test)]
1564
+ mod tests {
1565
+ use super::{
1566
+ lix_state_scan_request, lix_state_schema, lix_state_write_rows_from_batch,
1567
+ parse_lix_state_filter, register_lix_state_write_providers, LixStateByVersionRoute,
1568
+ LixStateDeleteExec, LixStateFilterPredicate, LixStateInsertSink, LixStateProvider,
1569
+ LixStateUpdateExec,
1570
+ };
1571
+ use crate::binary_cas::BlobDataReader;
1572
+ use crate::functions::{
1573
+ FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1574
+ };
1575
+ use crate::sql2::dml::{InsertExec, InsertSink};
1576
+ use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1577
+ use crate::transaction::types::{
1578
+ TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
1579
+ TransactionWriteRow,
1580
+ };
1581
+ use crate::version::{VersionHead, VersionRefReader};
1582
+ use crate::{
1583
+ entity_identity::EntityIdentity,
1584
+ live_state::{
1585
+ LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
1586
+ },
1587
+ };
1588
+ use crate::{LixError, NullableKeyFilter};
1589
+ use async_trait::async_trait;
1590
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
1591
+ use datafusion::arrow::datatypes::DataType;
1592
+ use datafusion::arrow::record_batch::RecordBatch;
1593
+ use datafusion::catalog::TableProvider;
1594
+ use datafusion::common::{Column, DataFusionError};
1595
+ use datafusion::execution::TaskContext;
1596
+ use datafusion::logical_expr::dml::InsertOp;
1597
+ use datafusion::logical_expr::expr::InList;
1598
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
1599
+ use datafusion::physical_expr::EquivalenceProperties;
1600
+ use datafusion::physical_plan::empty::EmptyExec;
1601
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
1602
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
1603
+ use datafusion::physical_plan::{
1604
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
1605
+ };
1606
+ use datafusion::prelude::SessionContext;
1607
+ use datafusion::scalar::ScalarValue;
1608
+ use futures_util::stream;
1609
+ use serde_json::json;
1610
+ use std::collections::BTreeSet;
1611
+ use std::sync::Arc;
1612
+
1613
+ struct EmptyLiveStateReader;
1614
+ struct EmptyVersionRefReader;
1615
+ #[allow(dead_code)]
1616
+ struct RowsLiveStateReader {
1617
+ rows: Vec<MaterializedLiveStateRow>,
1618
+ }
1619
+ struct DummyBlobReader;
1620
+
1621
+ #[derive(Default)]
1622
+ struct DummyWriteContext {
1623
+ rows: Vec<MaterializedLiveStateRow>,
1624
+ }
1625
+
1626
+ #[derive(Default)]
1627
+ struct CapturingWriteContext {
1628
+ rows: Vec<MaterializedLiveStateRow>,
1629
+ writes: Vec<TransactionWrite>,
1630
+ }
1631
+
1632
+ struct SingleBatchExec {
1633
+ batch: RecordBatch,
1634
+ properties: Arc<PlanProperties>,
1635
+ }
1636
+
1637
+ impl std::fmt::Debug for SingleBatchExec {
1638
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1639
+ f.debug_struct("SingleBatchExec").finish()
1640
+ }
1641
+ }
1642
+
1643
+ impl SingleBatchExec {
1644
+ fn new(batch: RecordBatch) -> Self {
1645
+ let properties = PlanProperties::new(
1646
+ EquivalenceProperties::new(batch.schema()),
1647
+ Partitioning::UnknownPartitioning(1),
1648
+ EmissionType::Incremental,
1649
+ Boundedness::Bounded,
1650
+ );
1651
+ Self {
1652
+ batch,
1653
+ properties: Arc::new(properties),
1654
+ }
1655
+ }
1656
+ }
1657
+
1658
+ impl DisplayAs for SingleBatchExec {
1659
+ fn fmt_as(
1660
+ &self,
1661
+ _t: DisplayFormatType,
1662
+ f: &mut std::fmt::Formatter<'_>,
1663
+ ) -> std::fmt::Result {
1664
+ write!(f, "SingleBatchExec")
1665
+ }
1666
+ }
1667
+
1668
+ impl ExecutionPlan for SingleBatchExec {
1669
+ fn name(&self) -> &str {
1670
+ "SingleBatchExec"
1671
+ }
1672
+
1673
+ fn as_any(&self) -> &dyn std::any::Any {
1674
+ self
1675
+ }
1676
+
1677
+ fn properties(&self) -> &Arc<PlanProperties> {
1678
+ &self.properties
1679
+ }
1680
+
1681
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1682
+ Vec::new()
1683
+ }
1684
+
1685
+ fn with_new_children(
1686
+ self: Arc<Self>,
1687
+ children: Vec<Arc<dyn ExecutionPlan>>,
1688
+ ) -> datafusion::common::Result<Arc<dyn ExecutionPlan>> {
1689
+ if !children.is_empty() {
1690
+ return Err(DataFusionError::Execution(
1691
+ "SingleBatchExec does not accept children".to_string(),
1692
+ ));
1693
+ }
1694
+ Ok(self)
1695
+ }
1696
+
1697
+ fn execute(
1698
+ &self,
1699
+ partition: usize,
1700
+ _context: Arc<TaskContext>,
1701
+ ) -> datafusion::common::Result<SendableRecordBatchStream> {
1702
+ if partition != 0 {
1703
+ return Err(DataFusionError::Execution(format!(
1704
+ "SingleBatchExec only exposes one partition, got {partition}"
1705
+ )));
1706
+ }
1707
+
1708
+ let batch = self.batch.clone();
1709
+ let schema = batch.schema();
1710
+ let stream = stream::iter(vec![Ok(batch)]);
1711
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1712
+ }
1713
+ }
1714
+
1715
+ #[async_trait]
1716
+ impl LiveStateReader for EmptyLiveStateReader {
1717
+ async fn scan_rows(
1718
+ &self,
1719
+ _request: &LiveStateScanRequest,
1720
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1721
+ Ok(vec![])
1722
+ }
1723
+
1724
+ async fn load_row(
1725
+ &self,
1726
+ _request: &LiveStateRowRequest,
1727
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1728
+ Ok(None)
1729
+ }
1730
+ }
1731
+
1732
+ #[async_trait]
1733
+ impl VersionRefReader for EmptyVersionRefReader {
1734
+ async fn load_head(&self, _version_id: &str) -> Result<Option<VersionHead>, LixError> {
1735
+ Ok(None)
1736
+ }
1737
+
1738
+ async fn scan_heads(&self) -> Result<Vec<VersionHead>, LixError> {
1739
+ Ok(Vec::new())
1740
+ }
1741
+ }
1742
+
1743
+ fn empty_version_ref() -> Arc<dyn VersionRefReader> {
1744
+ Arc::new(EmptyVersionRefReader)
1745
+ }
1746
+
1747
+ #[async_trait]
1748
+ impl LiveStateReader for RowsLiveStateReader {
1749
+ async fn scan_rows(
1750
+ &self,
1751
+ _request: &LiveStateScanRequest,
1752
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1753
+ Ok(self.rows.clone())
1754
+ }
1755
+
1756
+ async fn load_row(
1757
+ &self,
1758
+ _request: &LiveStateRowRequest,
1759
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1760
+ Ok(None)
1761
+ }
1762
+ }
1763
+
1764
+ fn test_functions() -> FunctionProviderHandle {
1765
+ SharedFunctionProvider::new(
1766
+ Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1767
+ )
1768
+ }
1769
+
1770
+ #[async_trait]
1771
+ impl BlobDataReader for DummyBlobReader {
1772
+ async fn load_bytes_many(
1773
+ &self,
1774
+ hashes: &[crate::binary_cas::BlobHash],
1775
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1776
+ Ok(crate::binary_cas::BlobBytesBatch::new(vec![
1777
+ None;
1778
+ hashes.len()
1779
+ ]))
1780
+ }
1781
+ }
1782
+
1783
+ #[async_trait]
1784
+ impl SqlWriteExecutionContext for DummyWriteContext {
1785
+ fn active_version_id(&self) -> &str {
1786
+ "version-a"
1787
+ }
1788
+
1789
+ fn functions(&self) -> FunctionProviderHandle {
1790
+ test_functions()
1791
+ }
1792
+
1793
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1794
+ Ok(Vec::new())
1795
+ }
1796
+
1797
+ async fn load_bytes_many(
1798
+ &mut self,
1799
+ hashes: &[crate::binary_cas::BlobHash],
1800
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1801
+ DummyBlobReader.load_bytes_many(hashes).await
1802
+ }
1803
+
1804
+ async fn scan_live_state(
1805
+ &mut self,
1806
+ _request: &LiveStateScanRequest,
1807
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1808
+ Ok(self.rows.clone())
1809
+ }
1810
+
1811
+ async fn load_version_head(
1812
+ &mut self,
1813
+ version_id: &str,
1814
+ ) -> Result<Option<String>, LixError> {
1815
+ if version_id == "ghost-version" {
1816
+ return Ok(None);
1817
+ }
1818
+ Ok(Some(format!("commit-{version_id}")))
1819
+ }
1820
+
1821
+ async fn stage_write(
1822
+ &mut self,
1823
+ _write: TransactionWrite,
1824
+ ) -> Result<TransactionWriteOutcome, LixError> {
1825
+ Ok(TransactionWriteOutcome { count: 0 })
1826
+ }
1827
+ }
1828
+
1829
+ #[async_trait]
1830
+ impl SqlWriteExecutionContext for CapturingWriteContext {
1831
+ fn active_version_id(&self) -> &str {
1832
+ "version-a"
1833
+ }
1834
+
1835
+ fn functions(&self) -> FunctionProviderHandle {
1836
+ test_functions()
1837
+ }
1838
+
1839
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1840
+ Ok(Vec::new())
1841
+ }
1842
+
1843
+ async fn load_bytes_many(
1844
+ &mut self,
1845
+ hashes: &[crate::binary_cas::BlobHash],
1846
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1847
+ DummyBlobReader.load_bytes_many(hashes).await
1848
+ }
1849
+
1850
+ async fn scan_live_state(
1851
+ &mut self,
1852
+ _request: &LiveStateScanRequest,
1853
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1854
+ Ok(self.rows.clone())
1855
+ }
1856
+
1857
+ async fn load_version_head(
1858
+ &mut self,
1859
+ version_id: &str,
1860
+ ) -> Result<Option<String>, LixError> {
1861
+ if version_id == "ghost-version" {
1862
+ return Ok(None);
1863
+ }
1864
+ Ok(Some(format!("commit-{version_id}")))
1865
+ }
1866
+
1867
+ async fn stage_write(
1868
+ &mut self,
1869
+ write: TransactionWrite,
1870
+ ) -> Result<TransactionWriteOutcome, LixError> {
1871
+ self.writes.push(write);
1872
+ Ok(TransactionWriteOutcome { count: 0 })
1873
+ }
1874
+ }
1875
+
1876
+ fn col(name: &str) -> Expr {
1877
+ Expr::Column(Column::from_name(name))
1878
+ }
1879
+
1880
+ fn str_lit(value: &str) -> Expr {
1881
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
1882
+ }
1883
+
1884
+ fn json_lit(value: &str) -> Expr {
1885
+ Expr::Literal(
1886
+ ScalarValue::Utf8(Some(value.to_string())),
1887
+ Some(datafusion::common::metadata::FieldMetadata::new(
1888
+ std::collections::BTreeMap::from([(
1889
+ crate::sql2::result_metadata::LIX_VALUE_TYPE_METADATA_KEY.to_string(),
1890
+ crate::sql2::result_metadata::LIX_VALUE_TYPE_JSON.to_string(),
1891
+ )]),
1892
+ )),
1893
+ )
1894
+ }
1895
+
1896
+ fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
1897
+ Arc::new(StringArray::from(values)) as ArrayRef
1898
+ }
1899
+
1900
+ fn one_row_lix_state_batch(global: bool) -> RecordBatch {
1901
+ RecordBatch::try_new(
1902
+ lix_state_schema(),
1903
+ vec![
1904
+ string_column(vec![Some("[\"entity-1\"]")]),
1905
+ string_column(vec![Some("lix_key_value")]),
1906
+ string_column(vec![None]),
1907
+ string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1908
+ string_column(vec![Some("{\"source\":\"test\"}")]),
1909
+ string_column(vec![Some("2026-04-23T00:00:00Z")]),
1910
+ string_column(vec![Some("2026-04-23T01:00:00Z")]),
1911
+ Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
1912
+ string_column(vec![Some("change-a")]),
1913
+ string_column(vec![None]),
1914
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1915
+ ],
1916
+ )
1917
+ .expect("valid lix_state batch")
1918
+ }
1919
+
1920
+ fn one_row_stageable_lix_state_batch() -> RecordBatch {
1921
+ RecordBatch::try_new(
1922
+ lix_state_schema(),
1923
+ vec![
1924
+ string_column(vec![Some("[\"entity-1\"]")]),
1925
+ string_column(vec![Some("lix_key_value")]),
1926
+ string_column(vec![None]),
1927
+ string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1928
+ string_column(vec![None]),
1929
+ string_column(vec![None]),
1930
+ string_column(vec![None]),
1931
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1932
+ string_column(vec![None]),
1933
+ string_column(vec![None]),
1934
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1935
+ ],
1936
+ )
1937
+ .expect("valid stageable lix_state batch")
1938
+ }
1939
+
1940
+ fn live_row(entity_id: &str, metadata: Option<&str>) -> MaterializedLiveStateRow {
1941
+ MaterializedLiveStateRow {
1942
+ entity_id: EntityIdentity::single(entity_id),
1943
+ schema_key: "lix_key_value".to_string(),
1944
+ file_id: None,
1945
+ snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
1946
+ metadata: metadata.map(str::to_string),
1947
+ deleted: false,
1948
+ version_id: "version-a".to_string(),
1949
+ change_id: Some(format!("change-{entity_id}")),
1950
+ commit_id: Some(format!("commit-{entity_id}")),
1951
+ global: false,
1952
+ untracked: false,
1953
+ created_at: "2026-04-23T00:00:00Z".to_string(),
1954
+ updated_at: "2026-04-23T01:00:00Z".to_string(),
1955
+ }
1956
+ }
1957
+
1958
+ #[test]
1959
+ fn parses_eq_filter_for_schema_key() {
1960
+ let expr = Expr::BinaryExpr(BinaryExpr::new(
1961
+ Box::new(col("schema_key")),
1962
+ Operator::Eq,
1963
+ Box::new(str_lit("profile")),
1964
+ ));
1965
+
1966
+ assert_eq!(
1967
+ parse_lix_state_filter(&expr),
1968
+ Some(LixStateFilterPredicate::SchemaKeys(BTreeSet::from([
1969
+ "profile".to_string(),
1970
+ ])))
1971
+ );
1972
+ }
1973
+
1974
+ #[test]
1975
+ fn parses_in_list_filter_for_version_id() {
1976
+ let expr = Expr::InList(InList::new(
1977
+ Box::new(col("version_id")),
1978
+ vec![str_lit("a"), str_lit("b")],
1979
+ false,
1980
+ ));
1981
+
1982
+ assert_eq!(
1983
+ parse_lix_state_filter(&expr),
1984
+ Some(LixStateFilterPredicate::VersionIds(BTreeSet::from([
1985
+ "a".to_string(),
1986
+ "b".to_string(),
1987
+ ])))
1988
+ );
1989
+ }
1990
+
1991
+ #[test]
1992
+ fn builds_scan_request_from_route_and_projection() {
1993
+ let schema = super::lix_state_by_version_schema();
1994
+ let route = LixStateByVersionRoute::from_filters(&[
1995
+ Expr::BinaryExpr(BinaryExpr::new(
1996
+ Box::new(col("schema_key")),
1997
+ Operator::Eq,
1998
+ Box::new(str_lit("profile")),
1999
+ )),
2000
+ Expr::BinaryExpr(BinaryExpr::new(
2001
+ Box::new(col("version_id")),
2002
+ Operator::Eq,
2003
+ Box::new(str_lit("v1")),
2004
+ )),
2005
+ Expr::IsNull(Box::new(col("file_id"))),
2006
+ ]);
2007
+
2008
+ let request =
2009
+ lix_state_scan_request(&schema, None, Some(&vec![0, 1, 11]), &route, Some(10));
2010
+
2011
+ assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
2012
+ assert_eq!(request.filter.version_ids, vec!["v1".to_string()]);
2013
+ assert_eq!(request.filter.file_ids, vec![NullableKeyFilter::Null]);
2014
+ assert_eq!(
2015
+ request.projection.columns,
2016
+ vec![
2017
+ "entity_id".to_string(),
2018
+ "schema_key".to_string(),
2019
+ "version_id".to_string()
2020
+ ]
2021
+ );
2022
+ assert_eq!(request.limit, Some(10));
2023
+ }
2024
+
2025
+ #[test]
2026
+ fn builds_route_from_and_filter_tree() {
2027
+ let route = LixStateByVersionRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
2028
+ Box::new(Expr::BinaryExpr(BinaryExpr::new(
2029
+ Box::new(col("entity_id")),
2030
+ Operator::Eq,
2031
+ Box::new(str_lit("[\"entity-a\"]")),
2032
+ ))),
2033
+ Operator::And,
2034
+ Box::new(Expr::InList(InList::new(
2035
+ Box::new(col("version_id")),
2036
+ vec![str_lit("version-a"), str_lit("global")],
2037
+ false,
2038
+ ))),
2039
+ ))]);
2040
+
2041
+ assert_eq!(
2042
+ route.entity_ids,
2043
+ Some(BTreeSet::from(["[\"entity-a\"]".to_string()]))
2044
+ );
2045
+ assert_eq!(
2046
+ route.version_ids,
2047
+ Some(BTreeSet::from([
2048
+ "global".to_string(),
2049
+ "version-a".to_string()
2050
+ ]))
2051
+ );
2052
+ }
2053
+
2054
+ #[test]
2055
+ fn contradictory_filters_turn_into_zero_limit_request() {
2056
+ let schema = super::lix_state_by_version_schema();
2057
+ let route = LixStateByVersionRoute::from_filters(&[
2058
+ Expr::BinaryExpr(BinaryExpr::new(
2059
+ Box::new(col("schema_key")),
2060
+ Operator::Eq,
2061
+ Box::new(str_lit("a")),
2062
+ )),
2063
+ Expr::BinaryExpr(BinaryExpr::new(
2064
+ Box::new(col("schema_key")),
2065
+ Operator::Eq,
2066
+ Box::new(str_lit("b")),
2067
+ )),
2068
+ ]);
2069
+
2070
+ let request = lix_state_scan_request(&schema, None, None, &route, None);
2071
+
2072
+ assert_eq!(request.limit, Some(0));
2073
+ assert!(request.filter.schema_keys.is_empty());
2074
+ }
2075
+
2076
+ #[test]
2077
+ fn active_version_view_pins_version_filter() {
2078
+ let schema = super::lix_state_schema();
2079
+ let route = LixStateByVersionRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
2080
+ Box::new(col("schema_key")),
2081
+ Operator::Eq,
2082
+ Box::new(str_lit("profile")),
2083
+ ))]);
2084
+
2085
+ let request = lix_state_scan_request(&schema, Some("version-a"), None, &route, None);
2086
+
2087
+ assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
2088
+ assert_eq!(request.filter.version_ids, vec!["version-a".to_string()]);
2089
+ }
2090
+
2091
+ #[tokio::test]
2092
+ async fn registers_active_lix_state_with_write_context_only() {
2093
+ let session = SessionContext::new();
2094
+ let mut write_context = DummyWriteContext::default();
2095
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2096
+
2097
+ register_lix_state_write_providers(&session, write_ctx)
2098
+ .await
2099
+ .expect("lix_state providers should register");
2100
+
2101
+ let lix_state = session
2102
+ .table_provider("lix_state")
2103
+ .await
2104
+ .expect("lix_state provider should exist");
2105
+ let lix_state = lix_state
2106
+ .as_any()
2107
+ .downcast_ref::<LixStateProvider>()
2108
+ .expect("lix_state should be a LixStateProvider");
2109
+ assert!(lix_state.write_access.is_write());
2110
+
2111
+ let by_version = session
2112
+ .table_provider("lix_state_by_version")
2113
+ .await
2114
+ .expect("lix_state_by_version provider should exist");
2115
+ let by_version = by_version
2116
+ .as_any()
2117
+ .downcast_ref::<LixStateProvider>()
2118
+ .expect("lix_state_by_version should be a LixStateProvider");
2119
+ assert!(by_version.write_access.is_write());
2120
+ }
2121
+
2122
+ #[tokio::test]
2123
+ async fn insert_into_requires_write_transaction() {
2124
+ let session = SessionContext::new();
2125
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2126
+ let provider =
2127
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2128
+ let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2129
+
2130
+ let error = provider
2131
+ .insert_into(&session.state(), input, InsertOp::Append)
2132
+ .await
2133
+ .expect_err("insert without a write context should fail");
2134
+
2135
+ assert!(
2136
+ error.to_string().contains("requires a write transaction"),
2137
+ "unexpected error: {error}"
2138
+ );
2139
+ }
2140
+
2141
+ #[tokio::test]
2142
+ async fn update_requires_write_transaction() {
2143
+ let session = SessionContext::new();
2144
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2145
+ let provider =
2146
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2147
+
2148
+ let error = provider
2149
+ .update(
2150
+ &session.state(),
2151
+ vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2152
+ vec![],
2153
+ )
2154
+ .await
2155
+ .expect_err("update without a write context should fail");
2156
+
2157
+ assert!(
2158
+ error.to_string().contains("requires a write transaction"),
2159
+ "unexpected error: {error}"
2160
+ );
2161
+ }
2162
+
2163
+ #[tokio::test]
2164
+ async fn delete_requires_write_transaction() {
2165
+ let session = SessionContext::new();
2166
+ let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2167
+ let provider =
2168
+ LixStateProvider::active_version("version-a", live_state, empty_version_ref());
2169
+
2170
+ let error = provider
2171
+ .delete_from(&session.state(), vec![])
2172
+ .await
2173
+ .expect_err("delete without a write context should fail");
2174
+
2175
+ assert!(
2176
+ error.to_string().contains("requires a write transaction"),
2177
+ "unexpected error: {error}"
2178
+ );
2179
+ }
2180
+
2181
+ #[tokio::test]
2182
+ async fn delete_returns_lix_state_delete_exec_with_write_ctx() {
2183
+ let session = SessionContext::new();
2184
+ let mut write_context = DummyWriteContext::default();
2185
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2186
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2187
+
2188
+ let plan = provider
2189
+ .delete_from(&session.state(), vec![])
2190
+ .await
2191
+ .expect("delete should produce a write plan");
2192
+
2193
+ assert!(plan.as_any().is::<LixStateDeleteExec>());
2194
+ }
2195
+
2196
+ #[tokio::test]
2197
+ async fn update_rejects_read_only_lix_state_columns() {
2198
+ let session = SessionContext::new();
2199
+ let mut write_context = DummyWriteContext::default();
2200
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2201
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2202
+
2203
+ let error = provider
2204
+ .update(
2205
+ &session.state(),
2206
+ vec![("entity_id".to_string(), str_lit("entity-2"))],
2207
+ vec![],
2208
+ )
2209
+ .await
2210
+ .expect_err("updating a read-only field should fail");
2211
+
2212
+ assert!(
2213
+ error.to_string().contains("read-only column 'entity_id'"),
2214
+ "unexpected error: {error}"
2215
+ );
2216
+ }
2217
+
2218
+ #[tokio::test]
2219
+ async fn update_returns_lix_state_update_exec_with_write_ctx() {
2220
+ let session = SessionContext::new();
2221
+ let mut write_context = DummyWriteContext::default();
2222
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2223
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2224
+
2225
+ let plan = provider
2226
+ .update(
2227
+ &session.state(),
2228
+ vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2229
+ vec![],
2230
+ )
2231
+ .await
2232
+ .expect("update should produce a write plan");
2233
+
2234
+ assert!(plan.as_any().is::<LixStateUpdateExec>());
2235
+ }
2236
+
2237
+ #[tokio::test]
2238
+ async fn insert_into_returns_data_sink_exec_with_write_ctx() {
2239
+ let session = SessionContext::new();
2240
+ let mut write_context = DummyWriteContext::default();
2241
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2242
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2243
+ let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2244
+
2245
+ let plan = provider
2246
+ .insert_into(&session.state(), input, InsertOp::Append)
2247
+ .await
2248
+ .expect("insert should produce a write plan");
2249
+
2250
+ assert!(plan.as_any().is::<InsertExec>());
2251
+ }
2252
+
2253
+ #[test]
2254
+ fn decodes_lix_state_batch_into_write_rows() {
2255
+ let rows = lix_state_write_rows_from_batch(&one_row_lix_state_batch(false), "version-a")
2256
+ .expect("batch should decode");
2257
+
2258
+ assert_eq!(
2259
+ rows,
2260
+ vec![TransactionWriteRow {
2261
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2262
+ schema_key: "lix_key_value".to_string(),
2263
+ file_id: None,
2264
+ snapshot: Some(TransactionJson::from_value_for_test(
2265
+ json!({"key":"hello","value":"world"})
2266
+ )),
2267
+ metadata: Some(TransactionJson::from_value_for_test(
2268
+ json!({"source": "test"})
2269
+ )),
2270
+ origin: None,
2271
+ created_at: Some("2026-04-23T00:00:00Z".to_string()),
2272
+ updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2273
+ global: false,
2274
+ change_id: Some("change-a".to_string()),
2275
+ commit_id: None,
2276
+ untracked: false,
2277
+ version_id: "version-a".to_string(),
2278
+ }]
2279
+ );
2280
+ }
2281
+
2282
+ #[test]
2283
+ fn decodes_global_lix_state_batch_into_global_version() {
2284
+ let rows = lix_state_write_rows_from_batch(&one_row_lix_state_batch(true), "version-a")
2285
+ .expect("batch should decode");
2286
+
2287
+ assert_eq!(rows[0].version_id, "global");
2288
+ assert!(rows[0].global);
2289
+ }
2290
+
2291
+ #[tokio::test]
2292
+ async fn insert_sink_stages_decoded_lix_state_rows() {
2293
+ let mut write_context = CapturingWriteContext::default();
2294
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2295
+ let sink = LixStateInsertSink::new(lix_state_schema(), write_ctx, "version-a".to_string());
2296
+ let batch = one_row_lix_state_batch(false);
2297
+ let count = sink
2298
+ .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2299
+ .await
2300
+ .expect("sink should stage write");
2301
+
2302
+ assert_eq!(count, 1);
2303
+ assert_eq!(
2304
+ write_context.writes.as_slice(),
2305
+ &[TransactionWrite::Rows {
2306
+ mode: TransactionWriteMode::Insert,
2307
+ rows: vec![TransactionWriteRow {
2308
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2309
+ schema_key: "lix_key_value".to_string(),
2310
+ file_id: None,
2311
+ snapshot: Some(TransactionJson::from_value_for_test(
2312
+ json!({"key":"hello","value":"world"})
2313
+ )),
2314
+ metadata: Some(TransactionJson::from_value_for_test(
2315
+ json!({"source": "test"})
2316
+ )),
2317
+ origin: None,
2318
+ created_at: Some("2026-04-23T00:00:00Z".to_string()),
2319
+ updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2320
+ global: false,
2321
+ change_id: Some("change-a".to_string()),
2322
+ commit_id: None,
2323
+ untracked: false,
2324
+ version_id: "version-a".to_string(),
2325
+ }]
2326
+ }]
2327
+ );
2328
+ }
2329
+
2330
+ #[tokio::test]
2331
+ async fn insert_plan_returns_datafusion_count_uint64() {
2332
+ let session = SessionContext::new();
2333
+ let mut write_context = CapturingWriteContext::default();
2334
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2335
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2336
+ let input = Arc::new(SingleBatchExec::new(one_row_stageable_lix_state_batch()))
2337
+ as Arc<dyn ExecutionPlan>;
2338
+
2339
+ let plan = provider
2340
+ .insert_into(&session.state(), input, InsertOp::Append)
2341
+ .await
2342
+ .expect("insert should produce a write plan");
2343
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2344
+ .await
2345
+ .expect("insert write plan should execute");
2346
+
2347
+ assert_eq!(batches.len(), 1);
2348
+ assert_eq!(batches[0].num_rows(), 1);
2349
+ assert_eq!(batches[0].num_columns(), 1);
2350
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2351
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2352
+ assert!(!batches[0].schema().field(0).is_nullable());
2353
+
2354
+ let count = batches[0]
2355
+ .column(0)
2356
+ .as_any()
2357
+ .downcast_ref::<UInt64Array>()
2358
+ .expect("count should be UInt64");
2359
+ assert_eq!(count.value(0), 1);
2360
+ assert_eq!(write_context.writes.len(), 1);
2361
+ }
2362
+
2363
+ #[tokio::test]
2364
+ async fn update_plan_evaluates_filters_assignments_and_stages_rows() {
2365
+ let session = SessionContext::new();
2366
+ let mut write_context = CapturingWriteContext {
2367
+ rows: vec![
2368
+ live_row("entity-1", Some("{\"source\":\"match\"}")),
2369
+ live_row("entity-2", Some("{\"source\":\"skip\"}")),
2370
+ ],
2371
+ writes: Vec::new(),
2372
+ };
2373
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2374
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2375
+
2376
+ let plan = provider
2377
+ .update(
2378
+ &session.state(),
2379
+ vec![
2380
+ (
2381
+ "snapshot_content".to_string(),
2382
+ str_lit("{\"key\":\"hello\",\"value\":\"updated\"}"),
2383
+ ),
2384
+ (
2385
+ "metadata".to_string(),
2386
+ str_lit("{\"schema_key\":\"lix_key_value\"}"),
2387
+ ),
2388
+ ],
2389
+ vec![Expr::BinaryExpr(BinaryExpr::new(
2390
+ Box::new(col("metadata")),
2391
+ Operator::Eq,
2392
+ Box::new(json_lit("{\"source\":\"match\"}")),
2393
+ ))],
2394
+ )
2395
+ .await
2396
+ .expect("update should produce a write plan");
2397
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2398
+ .await
2399
+ .expect("update write plan should execute");
2400
+
2401
+ assert_eq!(batches.len(), 1);
2402
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2403
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2404
+ let count = batches[0]
2405
+ .column(0)
2406
+ .as_any()
2407
+ .downcast_ref::<UInt64Array>()
2408
+ .expect("count should be UInt64");
2409
+ assert_eq!(count.value(0), 1);
2410
+
2411
+ assert_eq!(
2412
+ write_context.writes.as_slice(),
2413
+ &[TransactionWrite::Rows {
2414
+ mode: TransactionWriteMode::Replace,
2415
+ rows: vec![TransactionWriteRow {
2416
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2417
+ schema_key: "lix_key_value".to_string(),
2418
+ file_id: None,
2419
+ snapshot: Some(TransactionJson::from_value_for_test(
2420
+ json!({"key":"hello","value":"updated"})
2421
+ )),
2422
+ metadata: Some(TransactionJson::from_value_for_test(
2423
+ json!({"schema_key": "lix_key_value"})
2424
+ )),
2425
+ origin: None,
2426
+ created_at: None,
2427
+ updated_at: None,
2428
+ global: false,
2429
+ change_id: None,
2430
+ commit_id: None,
2431
+ untracked: false,
2432
+ version_id: "version-a".to_string(),
2433
+ }]
2434
+ }]
2435
+ );
2436
+ }
2437
+
2438
+ #[tokio::test]
2439
+ async fn delete_plan_with_empty_filters_stages_all_visible_rows() {
2440
+ let session = SessionContext::new();
2441
+ let mut write_context = CapturingWriteContext {
2442
+ rows: vec![
2443
+ live_row("entity-1", Some("{\"source\":\"one\"}")),
2444
+ live_row("entity-2", Some("{\"source\":\"two\"}")),
2445
+ ],
2446
+ writes: Vec::new(),
2447
+ };
2448
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2449
+ let provider = LixStateProvider::active_version_with_write(write_ctx);
2450
+
2451
+ let plan = provider
2452
+ .delete_from(&session.state(), vec![])
2453
+ .await
2454
+ .expect("delete should produce a write plan");
2455
+ let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2456
+ .await
2457
+ .expect("delete write plan should execute");
2458
+
2459
+ assert_eq!(batches.len(), 1);
2460
+ assert_eq!(batches[0].schema().field(0).name(), "count");
2461
+ assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2462
+ let count = batches[0]
2463
+ .column(0)
2464
+ .as_any()
2465
+ .downcast_ref::<UInt64Array>()
2466
+ .expect("count should be UInt64");
2467
+ assert_eq!(count.value(0), 2);
2468
+
2469
+ assert_eq!(
2470
+ write_context.writes.as_slice(),
2471
+ &[TransactionWrite::Rows {
2472
+ mode: TransactionWriteMode::Replace,
2473
+ rows: vec![
2474
+ TransactionWriteRow {
2475
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2476
+ schema_key: "lix_key_value".to_string(),
2477
+ file_id: None,
2478
+ snapshot: None,
2479
+ metadata: Some(TransactionJson::from_value_for_test(
2480
+ json!({"source": "one"})
2481
+ )),
2482
+ origin: None,
2483
+ created_at: None,
2484
+ updated_at: None,
2485
+ global: false,
2486
+ change_id: None,
2487
+ commit_id: None,
2488
+ untracked: false,
2489
+ version_id: "version-a".to_string(),
2490
+ },
2491
+ TransactionWriteRow {
2492
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-2")),
2493
+ schema_key: "lix_key_value".to_string(),
2494
+ file_id: None,
2495
+ snapshot: None,
2496
+ metadata: Some(TransactionJson::from_value_for_test(
2497
+ json!({"source": "two"})
2498
+ )),
2499
+ origin: None,
2500
+ created_at: None,
2501
+ updated_at: None,
2502
+ global: false,
2503
+ change_id: None,
2504
+ commit_id: None,
2505
+ untracked: false,
2506
+ version_id: "version-a".to_string(),
2507
+ },
2508
+ ]
2509
+ }]
2510
+ );
2511
+ }
2512
+ }