@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/SKILL.md +305 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/open-lix.d.ts +103 -14
  7. package/dist/open-lix.js +3 -0
  8. package/dist/sqlite/index.js +99 -22
  9. package/dist-engine-src/README.md +18 -0
  10. package/dist-engine-src/src/backend/kv.rs +358 -0
  11. package/dist-engine-src/src/backend/mod.rs +12 -0
  12. package/dist-engine-src/src/backend/testing.rs +658 -0
  13. package/dist-engine-src/src/backend/types.rs +96 -0
  14. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  15. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  16. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  17. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  18. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  19. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  20. package/dist-engine-src/src/cel/context.rs +86 -0
  21. package/dist-engine-src/src/cel/error.rs +19 -0
  22. package/dist-engine-src/src/cel/mod.rs +8 -0
  23. package/dist-engine-src/src/cel/provider.rs +9 -0
  24. package/dist-engine-src/src/cel/runtime.rs +167 -0
  25. package/dist-engine-src/src/cel/value.rs +50 -0
  26. package/dist-engine-src/src/changelog/codec.rs +321 -0
  27. package/dist-engine-src/src/changelog/context.rs +92 -0
  28. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  29. package/dist-engine-src/src/changelog/mod.rs +13 -0
  30. package/dist-engine-src/src/changelog/reader.rs +20 -0
  31. package/dist-engine-src/src/changelog/storage.rs +220 -0
  32. package/dist-engine-src/src/changelog/types.rs +38 -0
  33. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  34. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  35. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  36. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  37. package/dist-engine-src/src/common/error.rs +313 -0
  38. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  39. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  40. package/dist-engine-src/src/common/identity.rs +135 -0
  41. package/dist-engine-src/src/common/metadata.rs +35 -0
  42. package/dist-engine-src/src/common/mod.rs +23 -0
  43. package/dist-engine-src/src/common/types.rs +105 -0
  44. package/dist-engine-src/src/common/wire.rs +222 -0
  45. package/dist-engine-src/src/engine.rs +239 -0
  46. package/dist-engine-src/src/entity_identity.rs +285 -0
  47. package/dist-engine-src/src/functions/context.rs +327 -0
  48. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  49. package/dist-engine-src/src/functions/mod.rs +18 -0
  50. package/dist-engine-src/src/functions/provider.rs +130 -0
  51. package/dist-engine-src/src/functions/state.rs +363 -0
  52. package/dist-engine-src/src/functions/types.rs +37 -0
  53. package/dist-engine-src/src/init.rs +505 -0
  54. package/dist-engine-src/src/json_store/compression.rs +77 -0
  55. package/dist-engine-src/src/json_store/context.rs +129 -0
  56. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  57. package/dist-engine-src/src/json_store/mod.rs +9 -0
  58. package/dist-engine-src/src/json_store/store.rs +236 -0
  59. package/dist-engine-src/src/json_store/types.rs +52 -0
  60. package/dist-engine-src/src/lib.rs +61 -0
  61. package/dist-engine-src/src/live_state/context.rs +2241 -0
  62. package/dist-engine-src/src/live_state/mod.rs +15 -0
  63. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  64. package/dist-engine-src/src/live_state/reader.rs +23 -0
  65. package/dist-engine-src/src/live_state/types.rs +239 -0
  66. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  67. package/dist-engine-src/src/plugin/archive.rs +441 -0
  68. package/dist-engine-src/src/plugin/component.rs +183 -0
  69. package/dist-engine-src/src/plugin/install.rs +637 -0
  70. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  71. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  72. package/dist-engine-src/src/plugin/mod.rs +33 -0
  73. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  74. package/dist-engine-src/src/plugin/storage.rs +74 -0
  75. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  76. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  77. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  78. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  79. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  80. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  81. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  82. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  83. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  84. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  85. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  86. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  87. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  89. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  90. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  91. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  92. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  93. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  94. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  95. package/dist-engine-src/src/schema/definition.json +157 -0
  96. package/dist-engine-src/src/schema/definition.rs +636 -0
  97. package/dist-engine-src/src/schema/key.rs +206 -0
  98. package/dist-engine-src/src/schema/mod.rs +20 -0
  99. package/dist-engine-src/src/schema/seed.rs +14 -0
  100. package/dist-engine-src/src/schema/tests.rs +739 -0
  101. package/dist-engine-src/src/schema_registry.rs +294 -0
  102. package/dist-engine-src/src/session/context.rs +366 -0
  103. package/dist-engine-src/src/session/create_version.rs +80 -0
  104. package/dist-engine-src/src/session/execute.rs +447 -0
  105. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  106. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  107. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  108. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  109. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  110. package/dist-engine-src/src/session/merge/version.rs +437 -0
  111. package/dist-engine-src/src/session/mod.rs +25 -0
  112. package/dist-engine-src/src/session/switch_version.rs +121 -0
  113. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  114. package/dist-engine-src/src/sql2/classify.rs +147 -0
  115. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  116. package/dist-engine-src/src/sql2/context.rs +307 -0
  117. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  118. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  119. package/dist-engine-src/src/sql2/dml.rs +148 -0
  120. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  121. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  122. package/dist-engine-src/src/sql2/error.rs +196 -0
  123. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  124. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  125. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  126. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  127. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  128. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  129. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  130. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  131. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  132. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  133. package/dist-engine-src/src/sql2/mod.rs +43 -0
  134. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  135. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  136. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  137. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  138. package/dist-engine-src/src/sql2/session.rs +135 -0
  139. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  140. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  141. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  142. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  143. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  144. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  148. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  149. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  150. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  151. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  152. package/dist-engine-src/src/storage/context.rs +356 -0
  153. package/dist-engine-src/src/storage/mod.rs +14 -0
  154. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  155. package/dist-engine-src/src/storage/types.rs +501 -0
  156. package/dist-engine-src/src/storage_bench.rs +3406 -0
  157. package/dist-engine-src/src/test_support.rs +81 -0
  158. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  159. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  160. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  161. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  162. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  163. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  164. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  165. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  166. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  167. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  168. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  169. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  170. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  171. package/dist-engine-src/src/transaction/context.rs +1307 -0
  172. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  173. package/dist-engine-src/src/transaction/mod.rs +11 -0
  174. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  175. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  176. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  177. package/dist-engine-src/src/transaction/types.rs +351 -0
  178. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  179. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  180. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  181. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  182. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  183. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  184. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  185. package/dist-engine-src/src/version/context.rs +52 -0
  186. package/dist-engine-src/src/version/mod.rs +12 -0
  187. package/dist-engine-src/src/version/refs.rs +421 -0
  188. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  189. package/dist-engine-src/src/version/types.rs +21 -0
  190. package/dist-engine-src/src/wasm/mod.rs +60 -0
  191. package/package.json +68 -64
@@ -0,0 +1,2700 @@
1
+ use std::any::Any;
2
+ use std::collections::BTreeSet;
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{
7
+ ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, UInt64Array,
8
+ };
9
+ use datafusion::arrow::compute::{and, filter_record_batch};
10
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
12
+ use datafusion::catalog::{Session, TableProvider};
13
+ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
14
+ use datafusion::datasource::TableType;
15
+ use datafusion::execution::TaskContext;
16
+ use datafusion::logical_expr::dml::InsertOp;
17
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
18
+ use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
21
+ use datafusion::physical_plan::{
22
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
23
+ };
24
+ use datafusion::prelude::SessionContext;
25
+ use futures_util::{stream, TryStreamExt};
26
+ use serde_json::Value as JsonValue;
27
+
28
+ use crate::commit_graph::CommitGraphReader;
29
+ use crate::entity_identity::EntityIdentity;
30
+ use crate::live_state::LiveStateRow;
31
+ use crate::live_state::{
32
+ LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
33
+ };
34
+ use crate::sql2::dml::{InsertExec, InsertSink};
35
+ use crate::sql2::read_only::reject_read_only_entity_surface;
36
+ use crate::sql2::version_scope::{
37
+ explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
38
+ resolve_write_version_scope, VersionBinding,
39
+ };
40
+ use crate::sql2::write_normalization::{
41
+ InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues, UpdateCell,
42
+ };
43
+ use crate::transaction::types::StageRow;
44
+ use crate::version::VersionRefReader;
45
+ use crate::{parse_row_metadata, serialize_row_metadata, LixError, RowMetadata};
46
+
47
+ use super::entity_history_provider::EntityHistoryProvider;
48
+ use super::history_route::{
49
+ HISTORY_COL_CHANGE_ID, HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID,
50
+ HISTORY_COL_FILE_ID, HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID,
51
+ HISTORY_COL_SCHEMA_KEY, HISTORY_COL_SCHEMA_VERSION, HISTORY_COL_SNAPSHOT_CONTENT,
52
+ HISTORY_COL_START_COMMIT_ID,
53
+ };
54
+ use super::result_metadata::{json_field, mark_json_field};
55
+ use crate::sql2::{
56
+ SqlChangelogQuerySource, SqlWriteContext, WriteAccess, WriteContextLiveStateReader,
57
+ WriteContextVersionRefReader,
58
+ };
59
+ use crate::transaction::types::{StageWrite, StageWriteMode};
60
+
61
+ pub(crate) async fn register_entity_providers(
62
+ ctx: &SessionContext,
63
+ active_version_id: &str,
64
+ live_state: Arc<dyn LiveStateReader>,
65
+ version_ref: Arc<dyn VersionRefReader>,
66
+ commit_graph: Arc<tokio::sync::Mutex<Box<dyn CommitGraphReader>>>,
67
+ query_source: SqlChangelogQuerySource,
68
+ schema_definitions: &[JsonValue],
69
+ ) -> Result<(), LixError> {
70
+ for schema in schema_definitions {
71
+ let spec = match derive_entity_surface_spec_from_schema(schema) {
72
+ Ok(spec) => Arc::new(spec),
73
+ Err(_) => continue,
74
+ };
75
+
76
+ if !schema_exposed_as_entity_surface(&spec.schema_key) {
77
+ continue;
78
+ }
79
+
80
+ let by_version_name = format!("{}_by_version", spec.schema_key);
81
+ ctx.register_table(
82
+ &by_version_name,
83
+ Arc::new(EntityProvider::by_version(
84
+ Arc::clone(&spec),
85
+ Arc::clone(&live_state),
86
+ Arc::clone(&version_ref),
87
+ )),
88
+ )
89
+ .map_err(datafusion_error_to_lix_error)?;
90
+
91
+ ctx.register_table(
92
+ &spec.schema_key,
93
+ Arc::new(EntityProvider::active(
94
+ Arc::clone(&spec),
95
+ Arc::clone(&live_state),
96
+ Arc::clone(&version_ref),
97
+ active_version_id.to_string(),
98
+ )),
99
+ )
100
+ .map_err(datafusion_error_to_lix_error)?;
101
+
102
+ let history_name = format!("{}_history", spec.schema_key);
103
+ ctx.register_table(
104
+ &history_name,
105
+ Arc::new(EntityHistoryProvider::new(
106
+ Arc::clone(&spec),
107
+ Arc::clone(&commit_graph),
108
+ query_source.clone(),
109
+ )),
110
+ )
111
+ .map_err(datafusion_error_to_lix_error)?;
112
+ }
113
+
114
+ Ok(())
115
+ }
116
+
117
+ pub(crate) async fn register_entity_write_providers(
118
+ ctx: &SessionContext,
119
+ write_ctx: SqlWriteContext,
120
+ schema_definitions: &[JsonValue],
121
+ ) -> Result<(), LixError> {
122
+ for schema in schema_definitions {
123
+ let spec = match derive_entity_surface_spec_from_schema(schema) {
124
+ Ok(spec) => Arc::new(spec),
125
+ Err(_) => continue,
126
+ };
127
+
128
+ if !schema_exposed_as_entity_surface(&spec.schema_key) {
129
+ continue;
130
+ }
131
+
132
+ let by_version_name = format!("{}_by_version", spec.schema_key);
133
+ ctx.register_table(
134
+ &by_version_name,
135
+ Arc::new(EntityProvider::by_version_with_write(
136
+ Arc::clone(&spec),
137
+ write_ctx.clone(),
138
+ )),
139
+ )
140
+ .map_err(datafusion_error_to_lix_error)?;
141
+
142
+ ctx.register_table(
143
+ &spec.schema_key,
144
+ Arc::new(EntityProvider::active_with_write(
145
+ Arc::clone(&spec),
146
+ write_ctx.clone(),
147
+ )),
148
+ )
149
+ .map_err(datafusion_error_to_lix_error)?;
150
+ }
151
+
152
+ Ok(())
153
+ }
154
+
155
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
156
+ pub(super) enum EntityProviderVariant {
157
+ Active,
158
+ ByVersion,
159
+ History,
160
+ }
161
+
162
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
163
+ pub(super) enum EntityColumnType {
164
+ String,
165
+ Json,
166
+ Integer,
167
+ Number,
168
+ Boolean,
169
+ }
170
+
171
+ #[derive(Debug, Clone, PartialEq, Eq)]
172
+ pub(super) struct EntitySurfaceColumn {
173
+ pub(super) name: String,
174
+ pub(super) column_type: EntityColumnType,
175
+ }
176
+
177
+ #[derive(Debug, Clone, PartialEq, Eq)]
178
+ pub(super) struct EntitySurfaceSpec {
179
+ pub(super) schema_key: String,
180
+ schema_version: Option<String>,
181
+ pub(super) primary_key_paths: Vec<Vec<String>>,
182
+ pub(super) columns: Vec<EntitySurfaceColumn>,
183
+ }
184
+
185
+ impl EntitySurfaceSpec {
186
+ #[cfg(test)]
187
+ fn visible_column_names(&self) -> impl Iterator<Item = &str> {
188
+ self.columns.iter().map(|column| column.name.as_str())
189
+ }
190
+
191
+ pub(super) fn visible_column(&self, column_name: &str) -> Option<&EntitySurfaceColumn> {
192
+ self.columns
193
+ .iter()
194
+ .find(|column| column.name == column_name)
195
+ }
196
+
197
+ fn is_visible_column(&self, column_name: &str) -> bool {
198
+ self.visible_column(column_name).is_some()
199
+ }
200
+ }
201
+
202
+ pub(crate) struct EntityProvider {
203
+ spec: Arc<EntitySurfaceSpec>,
204
+ live_state: Arc<dyn LiveStateReader>,
205
+ version_ref: Arc<dyn VersionRefReader>,
206
+ write_access: WriteAccess,
207
+ schema: SchemaRef,
208
+ variant: EntityProviderVariant,
209
+ version_binding: VersionBinding,
210
+ }
211
+
212
+ impl std::fmt::Debug for EntityProvider {
213
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
214
+ f.debug_struct("EntityProvider")
215
+ .field("schema_key", &self.spec.schema_key)
216
+ .field("variant", &self.variant)
217
+ .finish()
218
+ }
219
+ }
220
+
221
+ impl EntityProvider {
222
+ fn active(
223
+ spec: Arc<EntitySurfaceSpec>,
224
+ live_state: Arc<dyn LiveStateReader>,
225
+ version_ref: Arc<dyn VersionRefReader>,
226
+ active_version_id: String,
227
+ ) -> Self {
228
+ Self {
229
+ schema: entity_surface_schema(&spec, EntityProviderVariant::Active),
230
+ spec,
231
+ live_state,
232
+ version_ref,
233
+ write_access: WriteAccess::read_only(),
234
+ variant: EntityProviderVariant::Active,
235
+ version_binding: VersionBinding::active(active_version_id),
236
+ }
237
+ }
238
+
239
+ fn active_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
240
+ let active_version_id = write_ctx.active_version_id();
241
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
242
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
243
+ Self {
244
+ schema: entity_surface_schema(&spec, EntityProviderVariant::Active),
245
+ spec,
246
+ live_state,
247
+ version_ref,
248
+ write_access: WriteAccess::write(write_ctx),
249
+ variant: EntityProviderVariant::Active,
250
+ version_binding: VersionBinding::active(active_version_id),
251
+ }
252
+ }
253
+
254
+ fn by_version(
255
+ spec: Arc<EntitySurfaceSpec>,
256
+ live_state: Arc<dyn LiveStateReader>,
257
+ version_ref: Arc<dyn VersionRefReader>,
258
+ ) -> Self {
259
+ Self {
260
+ schema: entity_surface_schema(&spec, EntityProviderVariant::ByVersion),
261
+ spec,
262
+ live_state,
263
+ version_ref,
264
+ write_access: WriteAccess::read_only(),
265
+ variant: EntityProviderVariant::ByVersion,
266
+ version_binding: VersionBinding::explicit(),
267
+ }
268
+ }
269
+
270
+ fn by_version_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
271
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
272
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
273
+ Self {
274
+ schema: entity_surface_schema(&spec, EntityProviderVariant::ByVersion),
275
+ spec,
276
+ live_state,
277
+ version_ref,
278
+ write_access: WriteAccess::write(write_ctx),
279
+ variant: EntityProviderVariant::ByVersion,
280
+ version_binding: VersionBinding::explicit(),
281
+ }
282
+ }
283
+ }
284
+
285
+ #[async_trait]
286
+ impl TableProvider for EntityProvider {
287
+ fn as_any(&self) -> &dyn Any {
288
+ self
289
+ }
290
+
291
+ fn schema(&self) -> SchemaRef {
292
+ Arc::clone(&self.schema)
293
+ }
294
+
295
+ fn table_type(&self) -> TableType {
296
+ TableType::Base
297
+ }
298
+
299
+ fn supports_filters_pushdown(
300
+ &self,
301
+ filters: &[&Expr],
302
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
303
+ Ok(filters
304
+ .iter()
305
+ .map(|filter| {
306
+ if explicit_version_ids_from_dml_filters(&[(*filter).clone()]).is_empty() {
307
+ TableProviderFilterPushDown::Unsupported
308
+ } else {
309
+ TableProviderFilterPushDown::Inexact
310
+ }
311
+ })
312
+ .collect())
313
+ }
314
+
315
+ async fn scan(
316
+ &self,
317
+ _state: &dyn Session,
318
+ projection: Option<&Vec<usize>>,
319
+ filters: &[Expr],
320
+ limit: Option<usize>,
321
+ ) -> Result<Arc<dyn ExecutionPlan>> {
322
+ let projected_schema = projected_schema(&self.schema, projection)?;
323
+ let mut request = entity_live_state_scan_request(
324
+ &self.spec.schema_key,
325
+ self.version_binding.active_version_id(),
326
+ limit,
327
+ );
328
+ if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
329
+ {
330
+ request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
331
+ if request.filter.version_ids.is_empty() {
332
+ return Err(DataFusionError::Plan(format!(
333
+ "DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
334
+ self.spec.schema_key
335
+ )));
336
+ }
337
+ }
338
+ request.filter.version_ids = resolve_provider_version_ids(
339
+ self.version_ref.as_ref(),
340
+ &self.version_binding,
341
+ request.filter.version_ids,
342
+ )
343
+ .await
344
+ .map_err(lix_error_to_datafusion_error)?;
345
+
346
+ Ok(Arc::new(EntityScanExec::new(
347
+ Arc::clone(&self.spec),
348
+ Arc::clone(&self.live_state),
349
+ projected_schema,
350
+ request,
351
+ )))
352
+ }
353
+
354
+ async fn insert_into(
355
+ &self,
356
+ _state: &dyn Session,
357
+ input: Arc<dyn ExecutionPlan>,
358
+ insert_op: InsertOp,
359
+ ) -> Result<Arc<dyn ExecutionPlan>> {
360
+ if insert_op != InsertOp::Append {
361
+ return not_impl_err!("{insert_op} not implemented for entity surfaces yet");
362
+ }
363
+ reject_read_only_entity_surface(&self.spec.schema_key, "INSERT")?;
364
+
365
+ let write_ctx = self.write_access.require_write(&format!(
366
+ "INSERT into {} entity surface",
367
+ self.spec.schema_key
368
+ ))?;
369
+
370
+ let insert_version_binding = match self.variant {
371
+ EntityProviderVariant::Active => self.version_binding.clone(),
372
+ EntityProviderVariant::ByVersion => VersionBinding::explicit(),
373
+ EntityProviderVariant::History => {
374
+ return not_impl_err!("INSERT is not implemented for entity history surfaces");
375
+ }
376
+ };
377
+
378
+ let sink = EntityInsertSink::new(
379
+ Arc::clone(&self.spec),
380
+ input.schema(),
381
+ InsertColumnIntents::from_input(&input),
382
+ write_ctx.clone(),
383
+ insert_version_binding,
384
+ );
385
+ Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
386
+ }
387
+
388
+ async fn delete_from(
389
+ &self,
390
+ state: &dyn Session,
391
+ filters: Vec<Expr>,
392
+ ) -> Result<Arc<dyn ExecutionPlan>> {
393
+ reject_read_only_entity_surface(&self.spec.schema_key, "DELETE")?;
394
+
395
+ let write_ctx = self.write_access.require_write(&format!(
396
+ "DELETE FROM {} entity surface",
397
+ self.spec.schema_key
398
+ ))?;
399
+
400
+ let version_binding = match self.variant {
401
+ EntityProviderVariant::Active => self.version_binding.clone(),
402
+ EntityProviderVariant::ByVersion => VersionBinding::explicit(),
403
+ EntityProviderVariant::History => {
404
+ return not_impl_err!("DELETE is not implemented for entity history surfaces");
405
+ }
406
+ };
407
+
408
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
409
+ let physical_filters = filters
410
+ .iter()
411
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
412
+ .collect::<Result<Vec<_>>>()?;
413
+ let mut request = entity_live_state_scan_request(
414
+ &self.spec.schema_key,
415
+ version_binding.active_version_id(),
416
+ None,
417
+ );
418
+ if matches!(version_binding, VersionBinding::Explicit) {
419
+ request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
420
+ if request.filter.version_ids.is_empty() {
421
+ return Err(DataFusionError::Plan(format!(
422
+ "DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
423
+ self.spec.schema_key
424
+ )));
425
+ }
426
+ }
427
+
428
+ Ok(Arc::new(EntityDeleteExec::new(
429
+ Arc::clone(&self.spec),
430
+ write_ctx.clone(),
431
+ Arc::clone(&self.schema),
432
+ version_binding,
433
+ request,
434
+ physical_filters,
435
+ )))
436
+ }
437
+
438
+ async fn update(
439
+ &self,
440
+ state: &dyn Session,
441
+ assignments: Vec<(String, Expr)>,
442
+ filters: Vec<Expr>,
443
+ ) -> Result<Arc<dyn ExecutionPlan>> {
444
+ reject_read_only_entity_surface(&self.spec.schema_key, "UPDATE")?;
445
+
446
+ let write_ctx = self
447
+ .write_access
448
+ .require_write(&format!("UPDATE {} entity surface", self.spec.schema_key))?;
449
+
450
+ validate_entity_update_assignments(&self.spec, &self.schema, &assignments)?;
451
+
452
+ let version_binding = match self.variant {
453
+ EntityProviderVariant::Active => self.version_binding.clone(),
454
+ EntityProviderVariant::ByVersion => VersionBinding::explicit(),
455
+ EntityProviderVariant::History => {
456
+ return not_impl_err!("UPDATE is not implemented for entity history surfaces");
457
+ }
458
+ };
459
+
460
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
461
+ let physical_assignments = assignments
462
+ .iter()
463
+ .map(|(column_name, expr)| {
464
+ Ok((
465
+ column_name.clone(),
466
+ create_physical_expr(expr, &df_schema, state.execution_props())?,
467
+ ))
468
+ })
469
+ .collect::<Result<Vec<_>>>()?;
470
+ let physical_filters = filters
471
+ .iter()
472
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
473
+ .collect::<Result<Vec<_>>>()?;
474
+ let request = entity_live_state_scan_request(
475
+ &self.spec.schema_key,
476
+ version_binding.active_version_id(),
477
+ None,
478
+ );
479
+
480
+ Ok(Arc::new(EntityUpdateExec::new(
481
+ Arc::clone(&self.spec),
482
+ write_ctx.clone(),
483
+ Arc::clone(&self.schema),
484
+ version_binding,
485
+ request,
486
+ physical_assignments,
487
+ physical_filters,
488
+ )))
489
+ }
490
+ }
491
+
492
+ struct EntityInsertSink {
493
+ spec: Arc<EntitySurfaceSpec>,
494
+ insert_column_intents: InsertColumnIntents,
495
+ write_ctx: SqlWriteContext,
496
+ version_binding: VersionBinding,
497
+ }
498
+
499
+ impl std::fmt::Debug for EntityInsertSink {
500
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
501
+ f.debug_struct("EntityInsertSink")
502
+ .field("schema_key", &self.spec.schema_key)
503
+ .finish()
504
+ }
505
+ }
506
+
507
+ impl EntityInsertSink {
508
+ fn new(
509
+ spec: Arc<EntitySurfaceSpec>,
510
+ _schema: SchemaRef,
511
+ insert_column_intents: InsertColumnIntents,
512
+ write_ctx: SqlWriteContext,
513
+ version_binding: VersionBinding,
514
+ ) -> Self {
515
+ Self {
516
+ spec,
517
+ insert_column_intents,
518
+ write_ctx,
519
+ version_binding,
520
+ }
521
+ }
522
+ }
523
+
524
+ impl DisplayAs for EntityInsertSink {
525
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
526
+ match t {
527
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
528
+ write!(f, "EntityInsertSink(schema_key={})", self.spec.schema_key)
529
+ }
530
+ DisplayFormatType::TreeRender => write!(f, "EntityInsertSink"),
531
+ }
532
+ }
533
+ }
534
+
535
+ #[async_trait]
536
+ impl InsertSink for EntityInsertSink {
537
+ async fn write_batches(
538
+ &self,
539
+ batches: Vec<RecordBatch>,
540
+ _context: &Arc<TaskContext>,
541
+ ) -> Result<u64> {
542
+ let mut rows = Vec::new();
543
+ for batch in batches {
544
+ rows.extend(entity_lix_state_write_rows_from_batch(
545
+ &self.spec,
546
+ &batch,
547
+ &self.insert_column_intents,
548
+ self.version_binding.active_version_id(),
549
+ )?);
550
+ }
551
+ let count = u64::try_from(rows.len())
552
+ .map_err(|_| DataFusionError::Execution("entity INSERT row count overflow".into()))?;
553
+
554
+ self.write_ctx
555
+ .stage_write(StageWrite::Rows {
556
+ mode: StageWriteMode::Insert,
557
+ rows,
558
+ })
559
+ .await
560
+ .map_err(lix_error_to_datafusion_error)?;
561
+
562
+ Ok(count)
563
+ }
564
+ }
565
+
566
+ #[allow(dead_code)]
567
+ struct EntityDeleteExec {
568
+ spec: Arc<EntitySurfaceSpec>,
569
+ write_ctx: SqlWriteContext,
570
+ table_schema: SchemaRef,
571
+ version_binding: VersionBinding,
572
+ request: LiveStateScanRequest,
573
+ filters: Vec<Arc<dyn PhysicalExpr>>,
574
+ result_schema: SchemaRef,
575
+ properties: Arc<PlanProperties>,
576
+ }
577
+
578
+ impl std::fmt::Debug for EntityDeleteExec {
579
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
580
+ f.debug_struct("EntityDeleteExec")
581
+ .field("schema_key", &self.spec.schema_key)
582
+ .finish()
583
+ }
584
+ }
585
+
586
+ impl EntityDeleteExec {
587
+ fn new(
588
+ spec: Arc<EntitySurfaceSpec>,
589
+ write_ctx: SqlWriteContext,
590
+ table_schema: SchemaRef,
591
+ version_binding: VersionBinding,
592
+ request: LiveStateScanRequest,
593
+ filters: Vec<Arc<dyn PhysicalExpr>>,
594
+ ) -> Self {
595
+ let result_schema = dml_count_schema();
596
+ let properties = PlanProperties::new(
597
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
598
+ Partitioning::UnknownPartitioning(1),
599
+ EmissionType::Final,
600
+ Boundedness::Bounded,
601
+ );
602
+ Self {
603
+ spec,
604
+ write_ctx,
605
+ table_schema,
606
+ version_binding,
607
+ request,
608
+ filters,
609
+ result_schema,
610
+ properties: Arc::new(properties),
611
+ }
612
+ }
613
+ }
614
+
615
+ impl DisplayAs for EntityDeleteExec {
616
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
617
+ match t {
618
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
619
+ write!(
620
+ f,
621
+ "EntityDeleteExec(schema_key={}, filters={})",
622
+ self.spec.schema_key,
623
+ self.filters.len()
624
+ )
625
+ }
626
+ DisplayFormatType::TreeRender => write!(f, "EntityDeleteExec"),
627
+ }
628
+ }
629
+ }
630
+
631
+ impl ExecutionPlan for EntityDeleteExec {
632
+ fn name(&self) -> &str {
633
+ "EntityDeleteExec"
634
+ }
635
+
636
+ fn as_any(&self) -> &dyn Any {
637
+ self
638
+ }
639
+
640
+ fn properties(&self) -> &Arc<PlanProperties> {
641
+ &self.properties
642
+ }
643
+
644
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
645
+ Vec::new()
646
+ }
647
+
648
+ fn with_new_children(
649
+ self: Arc<Self>,
650
+ children: Vec<Arc<dyn ExecutionPlan>>,
651
+ ) -> Result<Arc<dyn ExecutionPlan>> {
652
+ if !children.is_empty() {
653
+ return Err(DataFusionError::Execution(
654
+ "EntityDeleteExec does not accept children".to_string(),
655
+ ));
656
+ }
657
+ Ok(self)
658
+ }
659
+
660
+ fn execute(
661
+ &self,
662
+ partition: usize,
663
+ _context: Arc<TaskContext>,
664
+ ) -> Result<SendableRecordBatchStream> {
665
+ if partition != 0 {
666
+ return Err(DataFusionError::Execution(format!(
667
+ "EntityDeleteExec only exposes one partition, got {partition}"
668
+ )));
669
+ }
670
+
671
+ let spec = Arc::clone(&self.spec);
672
+ let write_ctx = self.write_ctx.clone();
673
+ let table_schema = Arc::clone(&self.table_schema);
674
+ let version_binding = self.version_binding.clone();
675
+ let request = self.request.clone();
676
+ let filters = self.filters.clone();
677
+ let result_schema = Arc::clone(&self.result_schema);
678
+ let stream_schema = Arc::clone(&result_schema);
679
+
680
+ let stream = stream::once(async move {
681
+ let rows = if request.limit == Some(0) {
682
+ Vec::new()
683
+ } else {
684
+ write_ctx
685
+ .scan_live_state(&request)
686
+ .await
687
+ .map_err(lix_error_to_datafusion_error)?
688
+ };
689
+ let source_batch = entity_record_batch(&spec, Arc::clone(&table_schema), &rows)?;
690
+ let matched_batch = filter_entity_batch(source_batch, &filters)?;
691
+ let mut write_rows = entity_existing_lix_state_write_rows_from_batch(
692
+ &spec,
693
+ &matched_batch,
694
+ version_binding.active_version_id(),
695
+ )?;
696
+ for row in &mut write_rows {
697
+ row.snapshot_content = None;
698
+ }
699
+ let count = u64::try_from(write_rows.len()).map_err(|_| {
700
+ DataFusionError::Execution("entity DELETE row count overflow".to_string())
701
+ })?;
702
+
703
+ if count > 0 {
704
+ write_ctx
705
+ .stage_write(StageWrite::Rows {
706
+ mode: StageWriteMode::Replace,
707
+ rows: write_rows,
708
+ })
709
+ .await
710
+ .map_err(lix_error_to_datafusion_error)?;
711
+ }
712
+
713
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
714
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
715
+ )]))
716
+ })
717
+ .try_flatten();
718
+
719
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
720
+ result_schema,
721
+ stream,
722
+ )))
723
+ }
724
+ }
725
+
726
+ #[allow(dead_code)]
727
+ struct EntityUpdateExec {
728
+ spec: Arc<EntitySurfaceSpec>,
729
+ write_ctx: SqlWriteContext,
730
+ table_schema: SchemaRef,
731
+ version_binding: VersionBinding,
732
+ request: LiveStateScanRequest,
733
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
734
+ filters: Vec<Arc<dyn PhysicalExpr>>,
735
+ result_schema: SchemaRef,
736
+ properties: Arc<PlanProperties>,
737
+ }
738
+
739
+ impl std::fmt::Debug for EntityUpdateExec {
740
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
741
+ f.debug_struct("EntityUpdateExec")
742
+ .field("schema_key", &self.spec.schema_key)
743
+ .finish()
744
+ }
745
+ }
746
+
747
+ impl EntityUpdateExec {
748
+ fn new(
749
+ spec: Arc<EntitySurfaceSpec>,
750
+ write_ctx: SqlWriteContext,
751
+ table_schema: SchemaRef,
752
+ version_binding: VersionBinding,
753
+ request: LiveStateScanRequest,
754
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
755
+ filters: Vec<Arc<dyn PhysicalExpr>>,
756
+ ) -> Self {
757
+ let result_schema = dml_count_schema();
758
+ let properties = PlanProperties::new(
759
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
760
+ Partitioning::UnknownPartitioning(1),
761
+ EmissionType::Final,
762
+ Boundedness::Bounded,
763
+ );
764
+ Self {
765
+ spec,
766
+ write_ctx,
767
+ table_schema,
768
+ version_binding,
769
+ request,
770
+ assignments,
771
+ filters,
772
+ result_schema,
773
+ properties: Arc::new(properties),
774
+ }
775
+ }
776
+ }
777
+
778
+ impl DisplayAs for EntityUpdateExec {
779
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
780
+ match t {
781
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
782
+ write!(
783
+ f,
784
+ "EntityUpdateExec(schema_key={}, assignments={}, filters={})",
785
+ self.spec.schema_key,
786
+ self.assignments.len(),
787
+ self.filters.len()
788
+ )
789
+ }
790
+ DisplayFormatType::TreeRender => write!(f, "EntityUpdateExec"),
791
+ }
792
+ }
793
+ }
794
+
795
+ impl ExecutionPlan for EntityUpdateExec {
796
+ fn name(&self) -> &str {
797
+ "EntityUpdateExec"
798
+ }
799
+
800
+ fn as_any(&self) -> &dyn Any {
801
+ self
802
+ }
803
+
804
+ fn properties(&self) -> &Arc<PlanProperties> {
805
+ &self.properties
806
+ }
807
+
808
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
809
+ Vec::new()
810
+ }
811
+
812
+ fn with_new_children(
813
+ self: Arc<Self>,
814
+ children: Vec<Arc<dyn ExecutionPlan>>,
815
+ ) -> Result<Arc<dyn ExecutionPlan>> {
816
+ if !children.is_empty() {
817
+ return Err(DataFusionError::Execution(
818
+ "EntityUpdateExec does not accept children".to_string(),
819
+ ));
820
+ }
821
+ Ok(self)
822
+ }
823
+
824
+ fn execute(
825
+ &self,
826
+ partition: usize,
827
+ _context: Arc<TaskContext>,
828
+ ) -> Result<SendableRecordBatchStream> {
829
+ if partition != 0 {
830
+ return Err(DataFusionError::Execution(format!(
831
+ "EntityUpdateExec only exposes one partition, got {partition}"
832
+ )));
833
+ }
834
+
835
+ let spec = Arc::clone(&self.spec);
836
+ let write_ctx = self.write_ctx.clone();
837
+ let table_schema = Arc::clone(&self.table_schema);
838
+ let version_binding = self.version_binding.clone();
839
+ let request = self.request.clone();
840
+ let assignments = self.assignments.clone();
841
+ let filters = self.filters.clone();
842
+ let result_schema = Arc::clone(&self.result_schema);
843
+ let stream_schema = Arc::clone(&result_schema);
844
+
845
+ let stream = stream::once(async move {
846
+ let rows = if request.limit == Some(0) {
847
+ Vec::new()
848
+ } else {
849
+ write_ctx
850
+ .scan_live_state(&request)
851
+ .await
852
+ .map_err(lix_error_to_datafusion_error)?
853
+ };
854
+ let source_batch = entity_record_batch(&spec, Arc::clone(&table_schema), &rows)?;
855
+ let matched_batch = filter_entity_batch(source_batch, &filters)?;
856
+ let write_rows = entity_update_write_rows_from_batch(
857
+ &spec,
858
+ &matched_batch,
859
+ &assignments,
860
+ version_binding.active_version_id(),
861
+ )?;
862
+ let count = u64::try_from(write_rows.len()).map_err(|_| {
863
+ DataFusionError::Execution("entity UPDATE row count overflow".to_string())
864
+ })?;
865
+
866
+ if count > 0 {
867
+ write_ctx
868
+ .stage_write(StageWrite::Rows {
869
+ mode: StageWriteMode::Replace,
870
+ rows: write_rows,
871
+ })
872
+ .await
873
+ .map_err(lix_error_to_datafusion_error)?;
874
+ }
875
+
876
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
877
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
878
+ )]))
879
+ })
880
+ .try_flatten();
881
+
882
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
883
+ result_schema,
884
+ stream,
885
+ )))
886
+ }
887
+ }
888
+
889
+ fn validate_entity_update_assignments(
890
+ spec: &EntitySurfaceSpec,
891
+ schema: &SchemaRef,
892
+ assignments: &[(String, Expr)],
893
+ ) -> Result<()> {
894
+ for (column_name, _) in assignments {
895
+ schema.field_with_name(column_name).map_err(|_| {
896
+ DataFusionError::Plan(format!(
897
+ "UPDATE entity surface '{}' failed: column '{column_name}' does not exist",
898
+ spec.schema_key
899
+ ))
900
+ })?;
901
+ if !spec.is_visible_column(column_name) && column_name != "lixcol_metadata" {
902
+ return Err(DataFusionError::Execution(format!(
903
+ "UPDATE entity surface '{}' cannot stage read-only column '{column_name}'",
904
+ spec.schema_key
905
+ )));
906
+ }
907
+ }
908
+ Ok(())
909
+ }
910
+
911
+ fn filter_entity_batch(
912
+ batch: RecordBatch,
913
+ filters: &[Arc<dyn PhysicalExpr>],
914
+ ) -> Result<RecordBatch> {
915
+ let Some(mask) = evaluate_entity_filters(&batch, filters)? else {
916
+ return Ok(batch);
917
+ };
918
+ Ok(filter_record_batch(&batch, &mask)?)
919
+ }
920
+
921
+ fn evaluate_entity_filters(
922
+ batch: &RecordBatch,
923
+ filters: &[Arc<dyn PhysicalExpr>],
924
+ ) -> Result<Option<BooleanArray>> {
925
+ if filters.is_empty() {
926
+ return Ok(None);
927
+ }
928
+
929
+ let mut combined_mask: Option<BooleanArray> = None;
930
+ for filter in filters {
931
+ let result = filter.evaluate(batch)?;
932
+ let array = result.into_array(batch.num_rows())?;
933
+ let bool_array = array
934
+ .as_any()
935
+ .downcast_ref::<BooleanArray>()
936
+ .ok_or_else(|| {
937
+ DataFusionError::Execution("entity surface filter was not boolean".to_string())
938
+ })?;
939
+ let normalized = bool_array
940
+ .iter()
941
+ .map(|value| Some(value == Some(true)))
942
+ .collect::<BooleanArray>();
943
+ combined_mask = Some(match combined_mask {
944
+ Some(existing) => and(&existing, &normalized)?,
945
+ None => normalized,
946
+ });
947
+ }
948
+ Ok(combined_mask)
949
+ }
950
+
951
+ fn entity_update_write_rows_from_batch(
952
+ spec: &EntitySurfaceSpec,
953
+ batch: &RecordBatch,
954
+ assignments: &[(String, Arc<dyn PhysicalExpr>)],
955
+ version_binding: Option<&str>,
956
+ ) -> Result<Vec<StageRow>> {
957
+ let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
958
+ (0..batch.num_rows())
959
+ .map(|row_index| {
960
+ let scope = resolve_write_version_scope(
961
+ optional_bool_value(batch, row_index, "lixcol_global")?,
962
+ optional_string_value(batch, row_index, "lixcol_version_id")?,
963
+ version_binding,
964
+ &format!("UPDATE into {}_by_version", spec.schema_key),
965
+ &spec.schema_key,
966
+ )?;
967
+
968
+ let schema_version = optional_string_value(batch, row_index, "lixcol_schema_version")?
969
+ .or_else(|| spec.schema_version.clone())
970
+ .ok_or_else(|| {
971
+ DataFusionError::Execution(format!(
972
+ "UPDATE entity surface '{}' requires lixcol_schema_version",
973
+ spec.schema_key
974
+ ))
975
+ })?;
976
+
977
+ Ok(StageRow {
978
+ entity_id: optional_string_value(batch, row_index, "lixcol_entity_id")?
979
+ .map(|entity_id| {
980
+ EntityIdentity::from_string(&entity_id).map_err(|error| {
981
+ DataFusionError::Execution(format!(
982
+ "UPDATE entity surface '{}' has invalid lixcol_entity_id: {error}",
983
+ spec.schema_key
984
+ ))
985
+ })
986
+ })
987
+ .transpose()?,
988
+ schema_key: spec.schema_key.clone(),
989
+ file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
990
+ snapshot_content: Some(entity_update_snapshot_content_from_batch(
991
+ spec,
992
+ batch,
993
+ &assignment_values,
994
+ row_index,
995
+ )?),
996
+ metadata: entity_update_optional_metadata_value(
997
+ batch,
998
+ &assignment_values,
999
+ row_index,
1000
+ "lixcol_metadata",
1001
+ &spec.schema_key,
1002
+ )?,
1003
+ origin: None,
1004
+ schema_version,
1005
+ created_at: None,
1006
+ updated_at: None,
1007
+ global: scope.global,
1008
+ change_id: None,
1009
+ commit_id: None,
1010
+ untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?
1011
+ .unwrap_or(false),
1012
+ version_id: scope.version_id,
1013
+ })
1014
+ })
1015
+ .collect()
1016
+ }
1017
+
1018
+ fn entity_update_snapshot_content_from_batch(
1019
+ spec: &EntitySurfaceSpec,
1020
+ batch: &RecordBatch,
1021
+ assignment_values: &UpdateAssignmentValues,
1022
+ row_index: usize,
1023
+ ) -> Result<String> {
1024
+ let snapshot_content = optional_string_value(batch, row_index, "lixcol_snapshot_content")?
1025
+ .ok_or_else(|| {
1026
+ DataFusionError::Execution(format!(
1027
+ "UPDATE entity surface '{}' requires existing lixcol_snapshot_content",
1028
+ spec.schema_key
1029
+ ))
1030
+ })?;
1031
+ let mut object = match serde_json::from_str::<JsonValue>(&snapshot_content).map_err(|error| {
1032
+ DataFusionError::Execution(format!(
1033
+ "UPDATE entity surface '{}' expected existing snapshot_content to be valid JSON: {error}",
1034
+ spec.schema_key
1035
+ ))
1036
+ })? {
1037
+ JsonValue::Object(object) => object,
1038
+ other => {
1039
+ return Err(DataFusionError::Execution(format!(
1040
+ "UPDATE entity surface '{}' expected existing snapshot_content to be a JSON object, got {other}",
1041
+ spec.schema_key
1042
+ )))
1043
+ }
1044
+ };
1045
+
1046
+ for column in &spec.columns {
1047
+ let value = match entity_update_json_value(
1048
+ assignment_values,
1049
+ row_index,
1050
+ &column.name,
1051
+ column.column_type,
1052
+ )? {
1053
+ Some(value) => value,
1054
+ None => continue,
1055
+ };
1056
+ object.insert(column.name.clone(), value);
1057
+ }
1058
+ serde_json::to_string(&JsonValue::Object(object)).map_err(|error| {
1059
+ DataFusionError::Execution(format!(
1060
+ "failed to serialize entity surface '{}' snapshot_content: {error}",
1061
+ spec.schema_key
1062
+ ))
1063
+ })
1064
+ }
1065
+
1066
+ fn entity_update_optional_string_value(
1067
+ batch: &RecordBatch,
1068
+ assignment_values: &UpdateAssignmentValues,
1069
+ row_index: usize,
1070
+ column_name: &str,
1071
+ ) -> Result<Option<String>> {
1072
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1073
+ InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1074
+ InsertCell::Provided(SqlCell::Value(
1075
+ ScalarValue::Utf8(Some(value))
1076
+ | ScalarValue::Utf8View(Some(value))
1077
+ | ScalarValue::LargeUtf8(Some(value)),
1078
+ )) => Ok(Some(value)),
1079
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1080
+ "UPDATE entity surface expected text-compatible column '{column_name}', got {other:?}"
1081
+ ))),
1082
+ }
1083
+ }
1084
+
1085
+ fn entity_update_optional_metadata_value(
1086
+ batch: &RecordBatch,
1087
+ assignment_values: &UpdateAssignmentValues,
1088
+ row_index: usize,
1089
+ column_name: &str,
1090
+ context: &str,
1091
+ ) -> Result<Option<RowMetadata>> {
1092
+ entity_update_optional_string_value(batch, assignment_values, row_index, column_name)?
1093
+ .map(|value| {
1094
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1095
+ })
1096
+ .transpose()
1097
+ }
1098
+
1099
+ fn entity_update_json_value(
1100
+ assignment_values: &UpdateAssignmentValues,
1101
+ row_index: usize,
1102
+ column_name: &str,
1103
+ column_type: EntityColumnType,
1104
+ ) -> Result<Option<JsonValue>> {
1105
+ match assignment_values.assigned_cell(row_index, column_name)? {
1106
+ UpdateCell::Unassigned => Ok(None),
1107
+ UpdateCell::Assigned(SqlCell::Null) => Ok(Some(JsonValue::Null)),
1108
+ UpdateCell::Assigned(SqlCell::Value(value)) => {
1109
+ entity_json_value_from_scalar(Some(value), column_type).map(Some)
1110
+ }
1111
+ }
1112
+ }
1113
+
1114
+ fn dml_count_schema() -> SchemaRef {
1115
+ Arc::new(Schema::new(vec![Field::new(
1116
+ "count",
1117
+ DataType::UInt64,
1118
+ false,
1119
+ )]))
1120
+ }
1121
+
1122
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1123
+ RecordBatch::try_new(
1124
+ schema,
1125
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1126
+ )
1127
+ .map_err(DataFusionError::from)
1128
+ }
1129
+
1130
+ fn entity_lix_state_write_rows_from_batch(
1131
+ spec: &EntitySurfaceSpec,
1132
+ batch: &RecordBatch,
1133
+ insert_column_intents: &InsertColumnIntents,
1134
+ version_binding: Option<&str>,
1135
+ ) -> Result<Vec<StageRow>> {
1136
+ entity_lix_state_write_rows_from_batch_with_options(
1137
+ spec,
1138
+ batch,
1139
+ insert_column_intents,
1140
+ version_binding,
1141
+ true,
1142
+ )
1143
+ }
1144
+
1145
+ fn entity_existing_lix_state_write_rows_from_batch(
1146
+ spec: &EntitySurfaceSpec,
1147
+ batch: &RecordBatch,
1148
+ version_binding: Option<&str>,
1149
+ ) -> Result<Vec<StageRow>> {
1150
+ entity_lix_state_write_rows_from_batch_with_options(
1151
+ spec,
1152
+ batch,
1153
+ &InsertColumnIntents::all_explicit(),
1154
+ version_binding,
1155
+ false,
1156
+ )
1157
+ }
1158
+
1159
+ fn entity_lix_state_write_rows_from_batch_with_options(
1160
+ spec: &EntitySurfaceSpec,
1161
+ batch: &RecordBatch,
1162
+ insert_column_intents: &InsertColumnIntents,
1163
+ version_binding: Option<&str>,
1164
+ reject_read_only_fields: bool,
1165
+ ) -> Result<Vec<StageRow>> {
1166
+ (0..batch.num_rows())
1167
+ .map(|row_index| {
1168
+ let scope = resolve_write_version_scope(
1169
+ optional_bool_value(batch, row_index, "lixcol_global")?,
1170
+ optional_string_value(batch, row_index, "lixcol_version_id")?,
1171
+ version_binding,
1172
+ &format!(
1173
+ "INSERT into {}_by_version",
1174
+ spec.schema_key
1175
+ ),
1176
+ &spec.schema_key,
1177
+ )?;
1178
+
1179
+ if let Some(schema_key) = optional_string_value(batch, row_index, "lixcol_schema_key")?
1180
+ {
1181
+ if schema_key != spec.schema_key {
1182
+ return Err(DataFusionError::Execution(format!(
1183
+ "INSERT into entity surface '{}' cannot set lixcol_schema_key to '{}'",
1184
+ spec.schema_key, schema_key
1185
+ )));
1186
+ }
1187
+ }
1188
+
1189
+ if reject_read_only_fields {
1190
+ reject_present_entity_insert_field(batch, row_index, "lixcol_snapshot_content")?;
1191
+ reject_present_entity_insert_field(batch, row_index, "lixcol_created_at")?;
1192
+ reject_present_entity_insert_field(batch, row_index, "lixcol_updated_at")?;
1193
+ reject_present_entity_insert_field(batch, row_index, "lixcol_change_id")?;
1194
+ reject_present_entity_insert_field(batch, row_index, "lixcol_commit_id")?;
1195
+ }
1196
+
1197
+ let schema_version = optional_string_value(batch, row_index, "lixcol_schema_version")?
1198
+ .or_else(|| spec.schema_version.clone())
1199
+ .ok_or_else(|| {
1200
+ DataFusionError::Execution(format!(
1201
+ "INSERT into entity surface '{}' requires lixcol_schema_version",
1202
+ spec.schema_key
1203
+ ))
1204
+ })?;
1205
+ let snapshot_content =
1206
+ entity_snapshot_content_from_batch(spec, batch, insert_column_intents, row_index)?;
1207
+ let explicit_entity_id = optional_string_value(batch, row_index, "lixcol_entity_id")?;
1208
+ let entity_id = if spec.primary_key_paths.is_empty() {
1209
+ let entity_id = explicit_entity_id.ok_or_else(|| {
1210
+ DataFusionError::Execution(format!(
1211
+ "INSERT into entity surface '{}' requires lixcol_entity_id because the schema has no x-lix-primary-key",
1212
+ spec.schema_key
1213
+ ))
1214
+ })?;
1215
+ Some(EntityIdentity::from_string(&entity_id).map_err(|error| {
1216
+ DataFusionError::Execution(format!(
1217
+ "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1218
+ spec.schema_key
1219
+ ))
1220
+ })?)
1221
+ } else {
1222
+ explicit_entity_id
1223
+ .map(|entity_id| {
1224
+ EntityIdentity::from_string(&entity_id).map_err(|error| {
1225
+ DataFusionError::Execution(format!(
1226
+ "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1227
+ spec.schema_key
1228
+ ))
1229
+ })
1230
+ })
1231
+ .transpose()?
1232
+ };
1233
+
1234
+ Ok(StageRow {
1235
+ entity_id,
1236
+ schema_key: spec.schema_key.clone(),
1237
+ file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1238
+ snapshot_content: Some(snapshot_content),
1239
+ metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", &spec.schema_key)?,
1240
+ origin: None,
1241
+ schema_version: schema_version,
1242
+ created_at: None,
1243
+ updated_at: None,
1244
+ global: scope.global,
1245
+ change_id: None,
1246
+ commit_id: None,
1247
+ untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?
1248
+ .unwrap_or(false),
1249
+ version_id: scope.version_id,
1250
+ })
1251
+ })
1252
+ .collect()
1253
+ }
1254
+
1255
+ fn entity_snapshot_content_from_batch(
1256
+ spec: &EntitySurfaceSpec,
1257
+ batch: &RecordBatch,
1258
+ insert_column_intents: &InsertColumnIntents,
1259
+ row_index: usize,
1260
+ ) -> Result<String> {
1261
+ let mut object = serde_json::Map::new();
1262
+ for column in &spec.columns {
1263
+ let value = match insert_column_intents.cell(batch, row_index, &column.name)? {
1264
+ InsertCell::Omitted => {
1265
+ continue;
1266
+ }
1267
+ InsertCell::Provided(SqlCell::Null) => JsonValue::Null,
1268
+ InsertCell::Provided(SqlCell::Value(value)) => {
1269
+ entity_json_value_from_scalar(Some(value), column.column_type)?
1270
+ }
1271
+ };
1272
+ object.insert(column.name.clone(), value);
1273
+ }
1274
+ serde_json::to_string(&JsonValue::Object(object)).map_err(|error| {
1275
+ DataFusionError::Execution(format!(
1276
+ "failed to serialize entity surface '{}' snapshot_content: {error}",
1277
+ spec.schema_key
1278
+ ))
1279
+ })
1280
+ }
1281
+
1282
+ fn entity_json_value_from_scalar(
1283
+ value: Option<ScalarValue>,
1284
+ column_type: EntityColumnType,
1285
+ ) -> Result<JsonValue> {
1286
+ let Some(value) = value else {
1287
+ return Ok(JsonValue::Null);
1288
+ };
1289
+ match value {
1290
+ ScalarValue::Null
1291
+ | ScalarValue::Utf8(None)
1292
+ | ScalarValue::Utf8View(None)
1293
+ | ScalarValue::LargeUtf8(None)
1294
+ | ScalarValue::Boolean(None)
1295
+ | ScalarValue::Int64(None)
1296
+ | ScalarValue::Int32(None)
1297
+ | ScalarValue::UInt64(None)
1298
+ | ScalarValue::UInt32(None)
1299
+ | ScalarValue::Float64(None)
1300
+ | ScalarValue::Float32(None) => Ok(JsonValue::Null),
1301
+ ScalarValue::Utf8(Some(value))
1302
+ | ScalarValue::Utf8View(Some(value))
1303
+ | ScalarValue::LargeUtf8(Some(value)) => match column_type {
1304
+ EntityColumnType::Json => {
1305
+ // JSON surface columns accept SQL strings as JSON string values,
1306
+ // while still allowing callers to pass serialized JSON text for
1307
+ // objects, arrays, numbers, booleans, and null.
1308
+ Ok(serde_json::from_str(&value).unwrap_or(JsonValue::String(value)))
1309
+ }
1310
+ EntityColumnType::Integer => {
1311
+ value.parse::<i64>().map(JsonValue::from).map_err(|error| {
1312
+ DataFusionError::Execution(format!(
1313
+ "entity integer column expected integer text, got error: {error}"
1314
+ ))
1315
+ })
1316
+ }
1317
+ EntityColumnType::Number => value
1318
+ .parse::<f64>()
1319
+ .map_err(|error| {
1320
+ DataFusionError::Execution(format!(
1321
+ "entity number column expected number text, got error: {error}"
1322
+ ))
1323
+ })
1324
+ .and_then(json_number_from_f64),
1325
+ EntityColumnType::Boolean => {
1326
+ value.parse::<bool>().map(JsonValue::from).map_err(|error| {
1327
+ DataFusionError::Execution(format!(
1328
+ "entity boolean column expected boolean text, got error: {error}"
1329
+ ))
1330
+ })
1331
+ }
1332
+ EntityColumnType::String => Ok(JsonValue::String(value)),
1333
+ },
1334
+ ScalarValue::Boolean(Some(value)) => Ok(JsonValue::Bool(value)),
1335
+ ScalarValue::Int64(Some(value)) => Ok(JsonValue::from(value)),
1336
+ ScalarValue::Int32(Some(value)) => Ok(JsonValue::from(value)),
1337
+ ScalarValue::UInt64(Some(value)) => Ok(JsonValue::from(value)),
1338
+ ScalarValue::UInt32(Some(value)) => Ok(JsonValue::from(value)),
1339
+ ScalarValue::Float64(Some(value)) => json_number_from_f64(value),
1340
+ ScalarValue::Float32(Some(value)) => json_number_from_f64(value as f64),
1341
+ ScalarValue::Binary(Some(_))
1342
+ | ScalarValue::LargeBinary(Some(_))
1343
+ | ScalarValue::FixedSizeBinary(_, Some(_)) => Err(lix_error_to_datafusion_error(
1344
+ LixError::new(
1345
+ LixError::CODE_TYPE_MISMATCH,
1346
+ "entity JSON columns cannot store blob values directly",
1347
+ )
1348
+ .with_hint(
1349
+ "Encode bytes explicitly as JSON text/object, or store raw bytes in a blob-native surface such as lix_file.data.",
1350
+ ),
1351
+ )),
1352
+ ScalarValue::Binary(None)
1353
+ | ScalarValue::LargeBinary(None)
1354
+ | ScalarValue::FixedSizeBinary(_, None) => Ok(JsonValue::Null),
1355
+ other => Err(DataFusionError::Execution(format!(
1356
+ "entity insert does not support scalar value {other:?}"
1357
+ ))),
1358
+ }
1359
+ }
1360
+
1361
+ fn json_number_from_f64(value: f64) -> Result<JsonValue> {
1362
+ serde_json::Number::from_f64(value)
1363
+ .map(JsonValue::Number)
1364
+ .ok_or_else(|| {
1365
+ DataFusionError::Execution(format!("entity number column cannot store {value}"))
1366
+ })
1367
+ }
1368
+
1369
+ fn reject_present_entity_insert_field(
1370
+ batch: &RecordBatch,
1371
+ row_index: usize,
1372
+ column_name: &str,
1373
+ ) -> Result<()> {
1374
+ if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
1375
+ return Err(DataFusionError::Execution(format!(
1376
+ "INSERT into entity surface cannot stage read-only column '{column_name}'"
1377
+ )));
1378
+ }
1379
+ Ok(())
1380
+ }
1381
+
1382
+ fn optional_string_value(
1383
+ batch: &RecordBatch,
1384
+ row_index: usize,
1385
+ column_name: &str,
1386
+ ) -> Result<Option<String>> {
1387
+ match optional_scalar_value(batch, row_index, column_name)? {
1388
+ None
1389
+ | Some(ScalarValue::Null)
1390
+ | Some(ScalarValue::Utf8(None))
1391
+ | Some(ScalarValue::Utf8View(None))
1392
+ | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1393
+ Some(ScalarValue::Utf8(Some(value)))
1394
+ | Some(ScalarValue::Utf8View(Some(value)))
1395
+ | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1396
+ Some(other) => Err(DataFusionError::Execution(format!(
1397
+ "INSERT into entity surface expected text-compatible column '{column_name}', got {other:?}"
1398
+ ))),
1399
+ }
1400
+ }
1401
+
1402
+ fn optional_metadata_value(
1403
+ batch: &RecordBatch,
1404
+ row_index: usize,
1405
+ column_name: &str,
1406
+ context: &str,
1407
+ ) -> Result<Option<RowMetadata>> {
1408
+ optional_string_value(batch, row_index, column_name)?
1409
+ .map(|value| {
1410
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1411
+ })
1412
+ .transpose()
1413
+ }
1414
+
1415
+ fn optional_bool_value(
1416
+ batch: &RecordBatch,
1417
+ row_index: usize,
1418
+ column_name: &str,
1419
+ ) -> Result<Option<bool>> {
1420
+ match optional_scalar_value(batch, row_index, column_name)? {
1421
+ None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1422
+ Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1423
+ Some(other) => Err(DataFusionError::Execution(format!(
1424
+ "INSERT into entity surface expected boolean column '{column_name}', got {other:?}"
1425
+ ))),
1426
+ }
1427
+ }
1428
+
1429
+ fn optional_scalar_value(
1430
+ batch: &RecordBatch,
1431
+ row_index: usize,
1432
+ column_name: &str,
1433
+ ) -> Result<Option<ScalarValue>> {
1434
+ let schema = batch.schema();
1435
+ let column_index = match schema.index_of(column_name) {
1436
+ Ok(column_index) => column_index,
1437
+ Err(_) => return Ok(None),
1438
+ };
1439
+ if row_index >= batch.num_rows() {
1440
+ return Err(DataFusionError::Execution(format!(
1441
+ "row index {row_index} out of bounds for entity batch with {} rows",
1442
+ batch.num_rows()
1443
+ )));
1444
+ }
1445
+ ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1446
+ .map(Some)
1447
+ .map_err(|error| {
1448
+ DataFusionError::Execution(format!(
1449
+ "failed to decode entity column '{column_name}' at row {row_index}: {error}"
1450
+ ))
1451
+ })
1452
+ }
1453
+
1454
+ struct EntityScanExec {
1455
+ spec: Arc<EntitySurfaceSpec>,
1456
+ live_state: Arc<dyn LiveStateReader>,
1457
+ schema: SchemaRef,
1458
+ request: LiveStateScanRequest,
1459
+ properties: Arc<PlanProperties>,
1460
+ }
1461
+
1462
+ impl std::fmt::Debug for EntityScanExec {
1463
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1464
+ f.debug_struct("EntityScanExec")
1465
+ .field("schema_key", &self.spec.schema_key)
1466
+ .finish()
1467
+ }
1468
+ }
1469
+
1470
+ impl EntityScanExec {
1471
+ fn new(
1472
+ spec: Arc<EntitySurfaceSpec>,
1473
+ live_state: Arc<dyn LiveStateReader>,
1474
+ schema: SchemaRef,
1475
+ request: LiveStateScanRequest,
1476
+ ) -> Self {
1477
+ let properties = PlanProperties::new(
1478
+ EquivalenceProperties::new(Arc::clone(&schema)),
1479
+ Partitioning::UnknownPartitioning(1),
1480
+ EmissionType::Incremental,
1481
+ Boundedness::Bounded,
1482
+ );
1483
+ Self {
1484
+ spec,
1485
+ live_state,
1486
+ schema,
1487
+ request,
1488
+ properties: Arc::new(properties),
1489
+ }
1490
+ }
1491
+ }
1492
+
1493
+ impl DisplayAs for EntityScanExec {
1494
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1495
+ match t {
1496
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
1497
+ write!(
1498
+ f,
1499
+ "EntityScanExec(schema_key={}, limit={:?})",
1500
+ self.spec.schema_key, self.request.limit
1501
+ )
1502
+ }
1503
+ DisplayFormatType::TreeRender => write!(f, "EntityScanExec"),
1504
+ }
1505
+ }
1506
+ }
1507
+
1508
+ impl ExecutionPlan for EntityScanExec {
1509
+ fn name(&self) -> &str {
1510
+ "EntityScanExec"
1511
+ }
1512
+
1513
+ fn as_any(&self) -> &dyn Any {
1514
+ self
1515
+ }
1516
+
1517
+ fn properties(&self) -> &Arc<PlanProperties> {
1518
+ &self.properties
1519
+ }
1520
+
1521
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1522
+ Vec::new()
1523
+ }
1524
+
1525
+ fn with_new_children(
1526
+ self: Arc<Self>,
1527
+ children: Vec<Arc<dyn ExecutionPlan>>,
1528
+ ) -> Result<Arc<dyn ExecutionPlan>> {
1529
+ if !children.is_empty() {
1530
+ return Err(DataFusionError::Execution(
1531
+ "EntityScanExec does not accept children".to_string(),
1532
+ ));
1533
+ }
1534
+ Ok(self)
1535
+ }
1536
+
1537
+ fn execute(
1538
+ &self,
1539
+ partition: usize,
1540
+ _context: Arc<TaskContext>,
1541
+ ) -> Result<SendableRecordBatchStream> {
1542
+ if partition != 0 {
1543
+ return Err(DataFusionError::Execution(format!(
1544
+ "EntityScanExec only exposes one partition, got {partition}"
1545
+ )));
1546
+ }
1547
+
1548
+ let spec = Arc::clone(&self.spec);
1549
+ let live_state = Arc::clone(&self.live_state);
1550
+ let schema = Arc::clone(&self.schema);
1551
+ let request = self.request.clone();
1552
+ let stream_schema = Arc::clone(&schema);
1553
+ let stream = stream::once(async move {
1554
+ let rows = if request.limit == Some(0) {
1555
+ Vec::new()
1556
+ } else {
1557
+ live_state
1558
+ .scan_rows(&request)
1559
+ .await
1560
+ .map_err(lix_error_to_datafusion_error)?
1561
+ };
1562
+ let batch = entity_record_batch(&spec, Arc::clone(&stream_schema), &rows)?;
1563
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1564
+ batch,
1565
+ )]))
1566
+ })
1567
+ .try_flatten();
1568
+
1569
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1570
+ }
1571
+ }
1572
+
1573
+ fn entity_live_state_scan_request(
1574
+ schema_key: &str,
1575
+ active_version_id: Option<&str>,
1576
+ limit: Option<usize>,
1577
+ ) -> LiveStateScanRequest {
1578
+ LiveStateScanRequest {
1579
+ filter: LiveStateFilter {
1580
+ schema_keys: vec![schema_key.to_string()],
1581
+ version_ids: active_version_id
1582
+ .map(|version_id| vec![version_id.to_string()])
1583
+ .unwrap_or_default(),
1584
+ ..LiveStateFilter::default()
1585
+ },
1586
+ projection: LiveStateProjection::default(),
1587
+ limit,
1588
+ }
1589
+ }
1590
+
1591
+ fn entity_record_batch(
1592
+ spec: &EntitySurfaceSpec,
1593
+ schema: SchemaRef,
1594
+ rows: &[LiveStateRow],
1595
+ ) -> Result<RecordBatch> {
1596
+ if schema.fields().is_empty() {
1597
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
1598
+ return RecordBatch::try_new_with_options(schema, vec![], &options)
1599
+ .map_err(DataFusionError::from);
1600
+ }
1601
+
1602
+ let snapshots = rows
1603
+ .iter()
1604
+ .map(|row| parse_snapshot(row.snapshot_content.as_deref()))
1605
+ .collect::<Result<Vec<_>>>()?;
1606
+
1607
+ let columns = schema
1608
+ .fields()
1609
+ .iter()
1610
+ .map(|field| entity_column_array(spec, field.name(), rows, &snapshots))
1611
+ .collect::<Result<Vec<_>>>()?;
1612
+
1613
+ RecordBatch::try_new(schema, columns).map_err(DataFusionError::from)
1614
+ }
1615
+
1616
+ fn entity_column_array(
1617
+ spec: &EntitySurfaceSpec,
1618
+ column_name: &str,
1619
+ rows: &[LiveStateRow],
1620
+ snapshots: &[Option<JsonValue>],
1621
+ ) -> Result<ArrayRef> {
1622
+ if let Some(property_name) = column_name.strip_prefix("lixcol_") {
1623
+ return entity_system_column_array(property_name, rows);
1624
+ }
1625
+
1626
+ let column_type = spec
1627
+ .visible_column(column_name)
1628
+ .ok_or_else(|| {
1629
+ DataFusionError::Execution(format!(
1630
+ "sql2 entity provider '{}' does not expose column '{}'",
1631
+ spec.schema_key, column_name
1632
+ ))
1633
+ })?
1634
+ .column_type;
1635
+
1636
+ let values = snapshots
1637
+ .iter()
1638
+ .map(|snapshot| snapshot.as_ref().and_then(|value| value.get(column_name)))
1639
+ .collect::<Vec<_>>();
1640
+ Ok(match column_type {
1641
+ EntityColumnType::String | EntityColumnType::Json => Arc::new(StringArray::from(
1642
+ values
1643
+ .iter()
1644
+ .map(|value| entity_json_text_value(*value, column_type))
1645
+ .collect::<Result<Vec<_>>>()?,
1646
+ )) as ArrayRef,
1647
+ EntityColumnType::Integer => Arc::new(Int64Array::from(
1648
+ values
1649
+ .iter()
1650
+ .map(|value| entity_i64_value(*value))
1651
+ .collect::<Vec<_>>(),
1652
+ )) as ArrayRef,
1653
+ EntityColumnType::Number => Arc::new(Float64Array::from(
1654
+ values
1655
+ .iter()
1656
+ .map(|value| entity_f64_value(*value))
1657
+ .collect::<Vec<_>>(),
1658
+ )) as ArrayRef,
1659
+ EntityColumnType::Boolean => Arc::new(BooleanArray::from(
1660
+ values
1661
+ .iter()
1662
+ .map(|value| value.and_then(JsonValue::as_bool))
1663
+ .collect::<Vec<_>>(),
1664
+ )) as ArrayRef,
1665
+ })
1666
+ }
1667
+
1668
+ fn entity_system_column_array(column_name: &str, rows: &[LiveStateRow]) -> Result<ArrayRef> {
1669
+ Ok(match column_name {
1670
+ "entity_id" => Arc::new(StringArray::from(
1671
+ rows.iter()
1672
+ .map(|row| {
1673
+ row.entity_id
1674
+ .as_string()
1675
+ .map(Some)
1676
+ .map_err(lix_error_to_datafusion_error)
1677
+ })
1678
+ .collect::<Result<Vec<_>>>()?,
1679
+ )) as ArrayRef,
1680
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
1681
+ "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
1682
+ "snapshot_content" => string_array(rows.iter().map(|row| row.snapshot_content.as_deref())),
1683
+ "metadata" => Arc::new(StringArray::from(
1684
+ rows.iter()
1685
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1686
+ .collect::<Vec<_>>(),
1687
+ )) as ArrayRef,
1688
+ "schema_version" => string_array(rows.iter().map(|row| Some(row.schema_version.as_str()))),
1689
+ "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1690
+ "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1691
+ "global" => Arc::new(BooleanArray::from(
1692
+ rows.iter().map(|row| row.global).collect::<Vec<_>>(),
1693
+ )) as ArrayRef,
1694
+ "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
1695
+ "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
1696
+ "untracked" => Arc::new(BooleanArray::from(
1697
+ rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
1698
+ )) as ArrayRef,
1699
+ "version_id" => string_array(rows.iter().map(|row| Some(row.version_id.as_str()))),
1700
+ other => {
1701
+ return Err(DataFusionError::Execution(format!(
1702
+ "sql2 entity provider does not support system column 'lixcol_{other}'"
1703
+ )))
1704
+ }
1705
+ })
1706
+ }
1707
+
1708
+ pub(super) fn parse_snapshot(snapshot_content: Option<&str>) -> Result<Option<JsonValue>> {
1709
+ snapshot_content
1710
+ .map(|snapshot| {
1711
+ serde_json::from_str::<JsonValue>(snapshot).map_err(|error| {
1712
+ DataFusionError::Execution(format!(
1713
+ "sql2 entity provider expected valid snapshot_content JSON: {error}"
1714
+ ))
1715
+ })
1716
+ })
1717
+ .transpose()
1718
+ }
1719
+
1720
+ pub(super) fn entity_json_text_value(
1721
+ value: Option<&JsonValue>,
1722
+ column_type: EntityColumnType,
1723
+ ) -> Result<Option<String>> {
1724
+ Ok(match (column_type, value) {
1725
+ (_, None) | (_, Some(JsonValue::Null)) => None,
1726
+ (EntityColumnType::String, Some(JsonValue::Bool(value))) => Some(if *value {
1727
+ "true".to_string()
1728
+ } else {
1729
+ "false".to_string()
1730
+ }),
1731
+ (EntityColumnType::String, Some(JsonValue::String(value))) => Some(value.clone()),
1732
+ (EntityColumnType::String, Some(other)) => Some(json_to_string(other)?),
1733
+ (EntityColumnType::Json, Some(other)) => Some(json_to_string(other)?),
1734
+ _ => None,
1735
+ })
1736
+ }
1737
+
1738
+ pub(super) fn entity_i64_value(value: Option<&JsonValue>) -> Option<i64> {
1739
+ match value {
1740
+ Some(JsonValue::Number(number)) => number.as_i64(),
1741
+ Some(JsonValue::String(value)) => value.parse::<i64>().ok(),
1742
+ _ => None,
1743
+ }
1744
+ }
1745
+
1746
+ pub(super) fn entity_f64_value(value: Option<&JsonValue>) -> Option<f64> {
1747
+ match value {
1748
+ Some(JsonValue::Number(number)) => number.as_f64(),
1749
+ Some(JsonValue::String(value)) => value.parse::<f64>().ok(),
1750
+ _ => None,
1751
+ }
1752
+ }
1753
+
1754
+ fn json_to_string(value: &JsonValue) -> Result<String> {
1755
+ serde_json::to_string(value).map_err(|error| {
1756
+ DataFusionError::Execution(format!("failed to render JSON value: {error}"))
1757
+ })
1758
+ }
1759
+
1760
+ pub(super) fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1761
+ let values = values
1762
+ .map(|value| value.map(ToOwned::to_owned))
1763
+ .collect::<Vec<_>>();
1764
+ Arc::new(StringArray::from(values)) as ArrayRef
1765
+ }
1766
+
1767
+ pub(super) fn entity_surface_schema(
1768
+ spec: &EntitySurfaceSpec,
1769
+ variant: EntityProviderVariant,
1770
+ ) -> SchemaRef {
1771
+ let mut fields = spec
1772
+ .columns
1773
+ .iter()
1774
+ .map(|column| {
1775
+ let field = Field::new(
1776
+ &column.name,
1777
+ arrow_data_type_for_entity_column_type(column.column_type),
1778
+ true,
1779
+ );
1780
+ if column.column_type == EntityColumnType::Json {
1781
+ mark_json_field(field)
1782
+ } else {
1783
+ field
1784
+ }
1785
+ })
1786
+ .collect::<Vec<_>>();
1787
+
1788
+ fields.extend(entity_system_fields(variant));
1789
+ Arc::new(Schema::new(fields))
1790
+ }
1791
+
1792
+ fn arrow_data_type_for_entity_column_type(column_type: EntityColumnType) -> DataType {
1793
+ match column_type {
1794
+ EntityColumnType::String | EntityColumnType::Json => DataType::Utf8,
1795
+ EntityColumnType::Integer => DataType::Int64,
1796
+ EntityColumnType::Number => DataType::Float64,
1797
+ EntityColumnType::Boolean => DataType::Boolean,
1798
+ }
1799
+ }
1800
+
1801
+ pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field> {
1802
+ if variant == EntityProviderVariant::History {
1803
+ return vec![
1804
+ Field::new(HISTORY_COL_ENTITY_ID, DataType::Utf8, false),
1805
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
1806
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
1807
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
1808
+ json_field(HISTORY_COL_METADATA, true),
1809
+ Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
1810
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
1811
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
1812
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
1813
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
1814
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
1815
+ ];
1816
+ }
1817
+
1818
+ let mut fields = vec![
1819
+ Field::new("lixcol_entity_id", DataType::Utf8, true),
1820
+ Field::new("lixcol_schema_key", DataType::Utf8, false),
1821
+ Field::new("lixcol_file_id", DataType::Utf8, true),
1822
+ json_field("lixcol_snapshot_content", true),
1823
+ json_field("lixcol_metadata", true),
1824
+ Field::new("lixcol_schema_version", DataType::Utf8, true),
1825
+ Field::new("lixcol_created_at", DataType::Utf8, true),
1826
+ Field::new("lixcol_updated_at", DataType::Utf8, true),
1827
+ Field::new("lixcol_global", DataType::Boolean, true),
1828
+ Field::new("lixcol_change_id", DataType::Utf8, true),
1829
+ Field::new("lixcol_commit_id", DataType::Utf8, true),
1830
+ Field::new("lixcol_untracked", DataType::Boolean, true),
1831
+ ];
1832
+ if variant == EntityProviderVariant::ByVersion {
1833
+ fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
1834
+ }
1835
+ fields
1836
+ }
1837
+
1838
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1839
+ let Some(projection) = projection else {
1840
+ return Ok(Arc::clone(schema));
1841
+ };
1842
+ Ok(Arc::new(schema.project(projection)?))
1843
+ }
1844
+
1845
+ fn derive_entity_surface_spec_from_schema(
1846
+ schema: &JsonValue,
1847
+ ) -> std::result::Result<EntitySurfaceSpec, LixError> {
1848
+ let schema_key = schema
1849
+ .get("x-lix-key")
1850
+ .and_then(JsonValue::as_str)
1851
+ .ok_or_else(|| {
1852
+ LixError::new(
1853
+ "LIX_ERROR_UNKNOWN",
1854
+ "schema is missing string x-lix-key".to_string(),
1855
+ )
1856
+ })?;
1857
+
1858
+ let schema_version = schema
1859
+ .get("x-lix-version")
1860
+ .and_then(JsonValue::as_str)
1861
+ .map(ToOwned::to_owned);
1862
+
1863
+ let properties = schema
1864
+ .get("properties")
1865
+ .and_then(JsonValue::as_object)
1866
+ .ok_or_else(|| {
1867
+ LixError::new(
1868
+ LixError::CODE_SCHEMA_DEFINITION,
1869
+ format!("schema '{schema_key}' must define object properties"),
1870
+ )
1871
+ })?;
1872
+
1873
+ let mut columns = properties
1874
+ .iter()
1875
+ .filter(|(key, _)| !key.starts_with("lixcol_"))
1876
+ .map(|(key, property_schema)| {
1877
+ let column_type = entity_column_type_from_schema(property_schema).ok_or_else(|| {
1878
+ LixError::new(
1879
+ LixError::CODE_SCHEMA_DEFINITION,
1880
+ format!(
1881
+ "schema '{schema_key}' property '/{key}' must declare a SQL-projectable JSON Schema type"
1882
+ ),
1883
+ )
1884
+ .with_hint("Use an explicit type such as string, number, integer, boolean, object, array, or a supported union of those types.")
1885
+ })?;
1886
+ Ok(EntitySurfaceColumn {
1887
+ name: key.clone(),
1888
+ column_type,
1889
+ })
1890
+ })
1891
+ .collect::<std::result::Result<Vec<_>, LixError>>()?;
1892
+ columns.sort_by(|left, right| left.name.cmp(&right.name));
1893
+
1894
+ let primary_key_paths = parse_primary_key_paths(schema)?;
1895
+
1896
+ Ok(EntitySurfaceSpec {
1897
+ schema_key: schema_key.to_string(),
1898
+ schema_version,
1899
+ primary_key_paths,
1900
+ columns,
1901
+ })
1902
+ }
1903
+
1904
+ fn parse_primary_key_paths(schema: &JsonValue) -> std::result::Result<Vec<Vec<String>>, LixError> {
1905
+ let Some(primary_key) = schema.get("x-lix-primary-key") else {
1906
+ return Ok(Vec::new());
1907
+ };
1908
+ let primary_key = primary_key.as_array().ok_or_else(|| {
1909
+ LixError::new(
1910
+ "LIX_ERROR_UNKNOWN",
1911
+ "schema x-lix-primary-key must be an array of JSON Pointers".to_string(),
1912
+ )
1913
+ })?;
1914
+
1915
+ primary_key
1916
+ .iter()
1917
+ .enumerate()
1918
+ .map(|(index, pointer)| {
1919
+ let pointer = pointer.as_str().ok_or_else(|| {
1920
+ LixError::new(
1921
+ "LIX_ERROR_UNKNOWN",
1922
+ format!("schema x-lix-primary-key entry at index {index} must be a string"),
1923
+ )
1924
+ })?;
1925
+ parse_json_pointer(pointer)
1926
+ })
1927
+ .collect()
1928
+ }
1929
+
1930
+ // TODO(engine2): share JSON Pointer parsing with schema/canonical validation once
1931
+ // those helpers have a clean module boundary for SQL providers.
1932
+ fn parse_json_pointer(pointer: &str) -> std::result::Result<Vec<String>, LixError> {
1933
+ if pointer.is_empty() {
1934
+ return Ok(Vec::new());
1935
+ }
1936
+ if !pointer.starts_with('/') {
1937
+ return Err(LixError::new(
1938
+ "LIX_ERROR_UNKNOWN",
1939
+ format!("invalid JSON pointer '{pointer}'"),
1940
+ ));
1941
+ }
1942
+ pointer[1..]
1943
+ .split('/')
1944
+ .map(decode_json_pointer_segment)
1945
+ .collect()
1946
+ }
1947
+
1948
+ fn decode_json_pointer_segment(segment: &str) -> std::result::Result<String, LixError> {
1949
+ let mut out = String::new();
1950
+ let mut chars = segment.chars();
1951
+ while let Some(ch) = chars.next() {
1952
+ if ch == '~' {
1953
+ match chars.next() {
1954
+ Some('0') => out.push('~'),
1955
+ Some('1') => out.push('/'),
1956
+ _ => {
1957
+ return Err(LixError::new(
1958
+ "LIX_ERROR_UNKNOWN",
1959
+ format!("invalid JSON pointer segment '{segment}'"),
1960
+ ))
1961
+ }
1962
+ }
1963
+ } else {
1964
+ out.push(ch);
1965
+ }
1966
+ }
1967
+ Ok(out)
1968
+ }
1969
+
1970
+ fn schema_exposed_as_entity_surface(schema_key: &str) -> bool {
1971
+ !matches!(
1972
+ schema_key,
1973
+ "lix_active_account"
1974
+ | "lix_change"
1975
+ | "lix_commit_edge"
1976
+ | "lix_change_set"
1977
+ | "lix_change_set_element"
1978
+ )
1979
+ }
1980
+
1981
+ fn entity_column_type_from_schema(schema: &JsonValue) -> Option<EntityColumnType> {
1982
+ let mut kinds = BTreeSet::new();
1983
+ collect_entity_type_kinds(schema, &mut kinds);
1984
+ kinds.remove("null");
1985
+
1986
+ if kinds.is_empty() {
1987
+ return None;
1988
+ }
1989
+
1990
+ if kinds.len() == 1 {
1991
+ return match kinds.into_iter().next() {
1992
+ Some("boolean") => Some(EntityColumnType::Boolean),
1993
+ Some("integer") => Some(EntityColumnType::Integer),
1994
+ Some("number") => Some(EntityColumnType::Number),
1995
+ Some("string") => Some(EntityColumnType::String),
1996
+ Some("object" | "array") => Some(EntityColumnType::Json),
1997
+ _ => None,
1998
+ };
1999
+ }
2000
+
2001
+ Some(EntityColumnType::Json)
2002
+ }
2003
+
2004
+ fn collect_entity_type_kinds<'a>(schema: &'a JsonValue, out: &mut BTreeSet<&'a str>) {
2005
+ match schema.get("type") {
2006
+ Some(JsonValue::String(kind)) => {
2007
+ out.insert(kind.as_str());
2008
+ }
2009
+ Some(JsonValue::Array(kinds)) => {
2010
+ for kind in kinds.iter().filter_map(JsonValue::as_str) {
2011
+ out.insert(kind);
2012
+ }
2013
+ }
2014
+ _ => {}
2015
+ }
2016
+
2017
+ for keyword in ["anyOf", "oneOf", "allOf"] {
2018
+ if let Some(JsonValue::Array(branches)) = schema.get(keyword) {
2019
+ for branch in branches {
2020
+ collect_entity_type_kinds(branch, out);
2021
+ }
2022
+ }
2023
+ }
2024
+ }
2025
+
2026
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
2027
+ super::error::datafusion_error_to_lix_error(error)
2028
+ }
2029
+
2030
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
2031
+ DataFusionError::External(Box::new(error))
2032
+ }
2033
+
2034
+ #[cfg(test)]
2035
+ mod tests {
2036
+ use std::sync::Arc;
2037
+
2038
+ use async_trait::async_trait;
2039
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
2040
+ use datafusion::arrow::datatypes::{DataType, Field, Schema};
2041
+ use datafusion::arrow::record_batch::RecordBatch;
2042
+ use datafusion::execution::TaskContext;
2043
+ use serde_json::json;
2044
+
2045
+ use super::{
2046
+ derive_entity_surface_spec_from_schema, entity_lix_state_write_rows_from_batch,
2047
+ entity_record_batch, entity_surface_schema, schema_exposed_as_entity_surface,
2048
+ EntityColumnType, EntityInsertSink, EntityProviderVariant,
2049
+ };
2050
+ use crate::binary_cas::BlobDataReader;
2051
+ use crate::functions::{
2052
+ FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2053
+ };
2054
+ use crate::live_state::{
2055
+ LiveStateReader, LiveStateRow, LiveStateRowRequest, LiveStateScanRequest,
2056
+ };
2057
+ use crate::sql2::dml::InsertSink;
2058
+ use crate::sql2::write_normalization::InsertColumnIntents;
2059
+ use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2060
+ use crate::transaction::types::{StageRow, StageWrite, StageWriteMode, StageWriteOutcome};
2061
+ use crate::version::{VersionHead, VersionRefReader};
2062
+ use crate::LixError;
2063
+
2064
+ struct EmptyLiveStateReader;
2065
+ struct EmptyVersionRefReader;
2066
+ #[derive(Default)]
2067
+ struct CapturingWriteContext {
2068
+ rows: Vec<LiveStateRow>,
2069
+ writes: Vec<StageWrite>,
2070
+ }
2071
+
2072
+ #[async_trait]
2073
+ impl LiveStateReader for EmptyLiveStateReader {
2074
+ async fn scan_rows(
2075
+ &self,
2076
+ _request: &LiveStateScanRequest,
2077
+ ) -> Result<Vec<LiveStateRow>, LixError> {
2078
+ Ok(vec![])
2079
+ }
2080
+
2081
+ async fn load_row(
2082
+ &self,
2083
+ _request: &LiveStateRowRequest,
2084
+ ) -> Result<Option<LiveStateRow>, LixError> {
2085
+ Ok(None)
2086
+ }
2087
+ }
2088
+
2089
+ #[async_trait]
2090
+ impl VersionRefReader for EmptyVersionRefReader {
2091
+ async fn load_head(&self, _version_id: &str) -> Result<Option<VersionHead>, LixError> {
2092
+ Ok(None)
2093
+ }
2094
+
2095
+ async fn scan_heads(&self) -> Result<Vec<VersionHead>, LixError> {
2096
+ Ok(Vec::new())
2097
+ }
2098
+ }
2099
+
2100
+ fn empty_version_ref() -> Arc<dyn VersionRefReader> {
2101
+ Arc::new(EmptyVersionRefReader)
2102
+ }
2103
+
2104
+ fn test_functions() -> FunctionProviderHandle {
2105
+ SharedFunctionProvider::new(
2106
+ Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
2107
+ )
2108
+ }
2109
+
2110
+ #[async_trait]
2111
+ impl BlobDataReader for CapturingWriteContext {
2112
+ async fn load_bytes_many(
2113
+ &self,
2114
+ hashes: &[crate::binary_cas::BlobHash],
2115
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2116
+ Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
2117
+ }
2118
+ }
2119
+
2120
+ #[async_trait]
2121
+ impl SqlWriteExecutionContext for CapturingWriteContext {
2122
+ fn active_version_id(&self) -> &str {
2123
+ "version-a"
2124
+ }
2125
+
2126
+ fn functions(&self) -> FunctionProviderHandle {
2127
+ test_functions()
2128
+ }
2129
+
2130
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
2131
+ Ok(Vec::new())
2132
+ }
2133
+
2134
+ async fn load_bytes_many(
2135
+ &mut self,
2136
+ hashes: &[crate::binary_cas::BlobHash],
2137
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2138
+ BlobDataReader::load_bytes_many(self, hashes).await
2139
+ }
2140
+
2141
+ async fn scan_live_state(
2142
+ &mut self,
2143
+ _request: &LiveStateScanRequest,
2144
+ ) -> Result<Vec<LiveStateRow>, LixError> {
2145
+ Ok(self.rows.clone())
2146
+ }
2147
+
2148
+ async fn load_version_head(
2149
+ &mut self,
2150
+ version_id: &str,
2151
+ ) -> Result<Option<String>, LixError> {
2152
+ if version_id == "ghost-version" {
2153
+ return Ok(None);
2154
+ }
2155
+ Ok(Some(format!("commit-{version_id}")))
2156
+ }
2157
+
2158
+ async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
2159
+ self.writes.push(write);
2160
+ Ok(StageWriteOutcome { count: 0 })
2161
+ }
2162
+ }
2163
+
2164
+ fn live_row() -> LiveStateRow {
2165
+ LiveStateRow {
2166
+ entity_id: crate::entity_identity::EntityIdentity::single("entity-1"),
2167
+ schema_key: "project_message".to_string(),
2168
+ file_id: None,
2169
+ snapshot_content: Some(
2170
+ "{\"body\":\"hello\",\"rating\":4.5,\"count\":7,\"enabled\":true,\"meta\":{\"x\":1}}"
2171
+ .to_string(),
2172
+ ),
2173
+ metadata: Some(json!({"source": "test"})),
2174
+ schema_version: "1".to_string(),
2175
+ version_id: "version-a".to_string(),
2176
+ change_id: Some("change-a".to_string()),
2177
+ commit_id: Some("commit-a".to_string()),
2178
+ global: false,
2179
+ untracked: false,
2180
+ created_at: "2026-04-23T00:00:00Z".to_string(),
2181
+ updated_at: "2026-04-23T01:00:00Z".to_string(),
2182
+ }
2183
+ }
2184
+
2185
+ fn entity_insert_spec() -> Arc<super::EntitySurfaceSpec> {
2186
+ Arc::new(
2187
+ derive_entity_surface_spec_from_schema(&json!({
2188
+ "x-lix-key": "project_message",
2189
+ "x-lix-version": "1",
2190
+ "type": "object",
2191
+ "properties": {
2192
+ "body": { "type": "string" },
2193
+ "count": { "type": "integer" },
2194
+ "enabled": { "type": "boolean" },
2195
+ "meta": { "type": "object" },
2196
+ "rating": { "type": "number" }
2197
+ }
2198
+ }))
2199
+ .expect("schema should derive entity surface spec"),
2200
+ )
2201
+ }
2202
+
2203
+ fn entity_insert_spec_with_primary_key() -> Arc<super::EntitySurfaceSpec> {
2204
+ Arc::new(
2205
+ derive_entity_surface_spec_from_schema(&json!({
2206
+ "x-lix-key": "project_message",
2207
+ "x-lix-version": "1",
2208
+ "x-lix-primary-key": ["/id"],
2209
+ "type": "object",
2210
+ "properties": {
2211
+ "id": { "type": "string" },
2212
+ "body": { "type": "string" }
2213
+ },
2214
+ "required": ["id", "body"]
2215
+ }))
2216
+ .expect("schema should derive entity surface spec"),
2217
+ )
2218
+ }
2219
+
2220
+ fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2221
+ Arc::new(StringArray::from(values)) as ArrayRef
2222
+ }
2223
+
2224
+ fn entity_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2225
+ let mut fields = vec![
2226
+ Field::new("body", DataType::Utf8, true),
2227
+ Field::new("count", DataType::Int64, true),
2228
+ Field::new("enabled", DataType::Boolean, true),
2229
+ Field::new("meta", DataType::Utf8, true),
2230
+ Field::new("rating", DataType::Float64, true),
2231
+ Field::new("lixcol_entity_id", DataType::Utf8, false),
2232
+ Field::new("lixcol_metadata", DataType::Utf8, true),
2233
+ Field::new("lixcol_global", DataType::Boolean, false),
2234
+ Field::new("lixcol_untracked", DataType::Boolean, false),
2235
+ ];
2236
+ let mut columns = vec![
2237
+ string_column(vec![Some("hello")]),
2238
+ Arc::new(Int64Array::from(vec![7])) as ArrayRef,
2239
+ Arc::new(BooleanArray::from(vec![true])) as ArrayRef,
2240
+ string_column(vec![Some("{\"x\":1}")]),
2241
+ Arc::new(Float64Array::from(vec![4.5])) as ArrayRef,
2242
+ string_column(vec![Some("entity-1")]),
2243
+ string_column(vec![Some("{\"source\":\"entity\"}")]),
2244
+ Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2245
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2246
+ ];
2247
+ if include_version {
2248
+ fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2249
+ columns.push(string_column(vec![Some("version-a")]));
2250
+ }
2251
+
2252
+ RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2253
+ .expect("entity insert batch should build")
2254
+ }
2255
+
2256
+ fn primary_key_entity_insert_batch(include_entity_id: bool) -> RecordBatch {
2257
+ let mut fields = vec![
2258
+ Field::new("id", DataType::Utf8, false),
2259
+ Field::new("body", DataType::Utf8, true),
2260
+ Field::new("lixcol_version_id", DataType::Utf8, false),
2261
+ ];
2262
+ let mut columns = vec![
2263
+ string_column(vec![Some("message-1")]),
2264
+ string_column(vec![Some("hello")]),
2265
+ string_column(vec![Some("version-a")]),
2266
+ ];
2267
+ if include_entity_id {
2268
+ fields.push(Field::new("lixcol_entity_id", DataType::Utf8, false));
2269
+ columns.push(string_column(vec![Some("message-1")]));
2270
+ }
2271
+
2272
+ RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2273
+ .expect("primary-key entity insert batch should build")
2274
+ }
2275
+
2276
+ #[test]
2277
+ fn excludes_non_entity_builtin_session_surfaces() {
2278
+ assert!(!schema_exposed_as_entity_surface("lix_active_account"));
2279
+ assert!(schema_exposed_as_entity_surface("project_message"));
2280
+ }
2281
+
2282
+ #[test]
2283
+ fn derives_entity_surface_spec_from_schema_definition() {
2284
+ let spec = derive_entity_surface_spec_from_schema(&json!({
2285
+ "x-lix-key": "project_message",
2286
+ "x-lix-version": "1",
2287
+ "type": "object",
2288
+ "properties": {
2289
+ "body": { "type": "string" },
2290
+ "rating": { "type": "number" },
2291
+ "meta": { "type": "object" },
2292
+ "lixcol_entity_id": { "type": "string" }
2293
+ }
2294
+ }))
2295
+ .expect("schema should derive entity surface spec");
2296
+
2297
+ assert_eq!(spec.schema_key, "project_message");
2298
+ assert_eq!(spec.schema_version.as_deref(), Some("1"));
2299
+ assert_eq!(
2300
+ spec.visible_column_names().collect::<Vec<_>>(),
2301
+ vec!["body", "meta", "rating"]
2302
+ );
2303
+ assert_eq!(
2304
+ spec.visible_column("body").map(|column| column.column_type),
2305
+ Some(EntityColumnType::String)
2306
+ );
2307
+ assert_eq!(
2308
+ spec.visible_column("rating")
2309
+ .map(|column| column.column_type),
2310
+ Some(EntityColumnType::Number)
2311
+ );
2312
+ assert_eq!(
2313
+ spec.visible_column("meta").map(|column| column.column_type),
2314
+ Some(EntityColumnType::Json)
2315
+ );
2316
+ assert!(!spec.is_visible_column("lixcol_entity_id"));
2317
+ }
2318
+
2319
+ #[test]
2320
+ fn entity_surface_spec_rejects_properties_without_projection_type() {
2321
+ let error = derive_entity_surface_spec_from_schema(&json!({
2322
+ "x-lix-key": "project_message",
2323
+ "x-lix-version": "1",
2324
+ "x-lix-primary-key": ["/id"],
2325
+ "type": "object",
2326
+ "properties": {
2327
+ "id": { "type": "string" },
2328
+ "kind": {}
2329
+ },
2330
+ "required": ["id", "kind"],
2331
+ "additionalProperties": false
2332
+ }))
2333
+ .expect_err("unprojectable property should be rejected");
2334
+
2335
+ assert_eq!(error.code, LixError::CODE_SCHEMA_DEFINITION);
2336
+ assert!(
2337
+ error.message.contains("property '/kind'"),
2338
+ "error should identify the property: {error:?}"
2339
+ );
2340
+ }
2341
+
2342
+ #[test]
2343
+ fn by_version_schema_includes_version_system_column() {
2344
+ let spec = derive_entity_surface_spec_from_schema(&json!({
2345
+ "x-lix-key": "project_message",
2346
+ "type": "object",
2347
+ "properties": {
2348
+ "body": { "type": "string" }
2349
+ }
2350
+ }))
2351
+ .expect("schema should derive entity surface spec");
2352
+
2353
+ let schema = entity_surface_schema(&spec, EntityProviderVariant::ByVersion);
2354
+ assert!(schema.field_with_name("body").is_ok());
2355
+ assert!(schema.field_with_name("lixcol_entity_id").is_ok());
2356
+ assert!(schema.field_with_name("lixcol_version_id").is_ok());
2357
+ }
2358
+
2359
+ #[test]
2360
+ fn active_schema_excludes_version_system_column() {
2361
+ let spec = derive_entity_surface_spec_from_schema(&json!({
2362
+ "x-lix-key": "project_message",
2363
+ "type": "object",
2364
+ "properties": {
2365
+ "body": { "type": "string" }
2366
+ }
2367
+ }))
2368
+ .expect("schema should derive entity surface spec");
2369
+
2370
+ let schema = entity_surface_schema(&spec, EntityProviderVariant::Active);
2371
+ assert!(schema.field_with_name("body").is_ok());
2372
+ assert!(schema.field_with_name("lixcol_entity_id").is_ok());
2373
+ assert!(schema.field_with_name("lixcol_version_id").is_err());
2374
+ }
2375
+
2376
+ #[test]
2377
+ fn insert_schema_allows_defaulted_identity_columns_to_be_omitted() {
2378
+ let spec = derive_entity_surface_spec_from_schema(&json!({
2379
+ "x-lix-key": "project_message",
2380
+ "x-lix-primary-key": ["/id"],
2381
+ "type": "object",
2382
+ "properties": {
2383
+ "id": { "type": "string", "x-lix-default": "lix_uuid_v7()" },
2384
+ "body": { "type": "string" }
2385
+ }
2386
+ }))
2387
+ .expect("schema should derive entity surface spec");
2388
+
2389
+ let schema = entity_surface_schema(&spec, EntityProviderVariant::Active);
2390
+ assert!(
2391
+ schema
2392
+ .field_with_name("id")
2393
+ .expect("id field")
2394
+ .is_nullable(),
2395
+ "defaulted primary-key property should be nullable at SQL input"
2396
+ );
2397
+ assert!(
2398
+ schema
2399
+ .field_with_name("lixcol_entity_id")
2400
+ .expect("entity id field")
2401
+ .is_nullable(),
2402
+ "opaque identity projection should be nullable for normal primary-key inserts"
2403
+ );
2404
+ }
2405
+
2406
+ #[test]
2407
+ fn record_batch_projects_payload_and_system_columns() {
2408
+ let spec = Arc::new(
2409
+ derive_entity_surface_spec_from_schema(&json!({
2410
+ "x-lix-key": "project_message",
2411
+ "type": "object",
2412
+ "properties": {
2413
+ "body": { "type": "string" },
2414
+ "rating": { "type": "number" },
2415
+ "count": { "type": "integer" },
2416
+ "enabled": { "type": "boolean" },
2417
+ "meta": { "type": "object" }
2418
+ }
2419
+ }))
2420
+ .expect("schema should derive entity surface spec"),
2421
+ );
2422
+ let schema = entity_surface_schema(&spec, EntityProviderVariant::ByVersion);
2423
+
2424
+ let batch =
2425
+ entity_record_batch(&spec, schema, &[live_row()]).expect("entity batch should build");
2426
+
2427
+ assert_eq!(batch.num_rows(), 1);
2428
+ assert_eq!(
2429
+ batch
2430
+ .column_by_name("body")
2431
+ .expect("body column")
2432
+ .as_any()
2433
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
2434
+ .expect("body is string")
2435
+ .value(0),
2436
+ "hello"
2437
+ );
2438
+ assert_eq!(
2439
+ batch
2440
+ .column_by_name("rating")
2441
+ .expect("rating column")
2442
+ .as_any()
2443
+ .downcast_ref::<Float64Array>()
2444
+ .expect("rating is f64")
2445
+ .value(0),
2446
+ 4.5
2447
+ );
2448
+ assert_eq!(
2449
+ batch
2450
+ .column_by_name("count")
2451
+ .expect("count column")
2452
+ .as_any()
2453
+ .downcast_ref::<Int64Array>()
2454
+ .expect("count is i64")
2455
+ .value(0),
2456
+ 7
2457
+ );
2458
+ assert_eq!(
2459
+ batch
2460
+ .column_by_name("lixcol_entity_id")
2461
+ .expect("entity id column")
2462
+ .as_any()
2463
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
2464
+ .expect("entity id is string")
2465
+ .value(0),
2466
+ "entity-1"
2467
+ );
2468
+ assert_eq!(
2469
+ batch
2470
+ .column_by_name("lixcol_version_id")
2471
+ .expect("version id column")
2472
+ .as_any()
2473
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
2474
+ .expect("version id is string")
2475
+ .value(0),
2476
+ "version-a"
2477
+ );
2478
+ }
2479
+
2480
+ #[tokio::test]
2481
+ async fn provider_registers_as_table_provider() {
2482
+ let spec = Arc::new(
2483
+ derive_entity_surface_spec_from_schema(&json!({
2484
+ "x-lix-key": "project_message",
2485
+ "type": "object",
2486
+ "properties": {
2487
+ "body": { "type": "string" }
2488
+ }
2489
+ }))
2490
+ .expect("schema should derive entity surface spec"),
2491
+ );
2492
+ let provider = super::EntityProvider::by_version(
2493
+ spec,
2494
+ Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>,
2495
+ empty_version_ref(),
2496
+ );
2497
+
2498
+ assert!(provider.schema.field_with_name("lixcol_version_id").is_ok());
2499
+ }
2500
+
2501
+ #[test]
2502
+ fn decodes_by_version_entity_insert_into_lix_state_write_row() {
2503
+ let spec = entity_insert_spec();
2504
+ let rows = entity_lix_state_write_rows_from_batch(
2505
+ &spec,
2506
+ &entity_insert_batch(true, false),
2507
+ &InsertColumnIntents::all_explicit(),
2508
+ None,
2509
+ )
2510
+ .expect("entity batch should decode");
2511
+
2512
+ assert_eq!(rows.len(), 1);
2513
+ assert_eq!(
2514
+ rows[0].entity_id.as_ref(),
2515
+ Some(&crate::entity_identity::EntityIdentity::single("entity-1"))
2516
+ );
2517
+ assert_eq!(rows[0].schema_key, "project_message");
2518
+ assert_eq!(rows[0].schema_version.as_str(), "1");
2519
+ assert_eq!(rows[0].version_id, "version-a");
2520
+ assert_eq!(
2521
+ rows[0].metadata.as_ref(),
2522
+ Some(&json!({"source": "entity"}))
2523
+ );
2524
+ assert!(!rows[0].global);
2525
+ assert_eq!(
2526
+ serde_json::from_str::<serde_json::Value>(
2527
+ rows[0]
2528
+ .snapshot_content
2529
+ .as_deref()
2530
+ .expect("snapshot_content")
2531
+ )
2532
+ .expect("snapshot_content JSON"),
2533
+ json!({
2534
+ "body": "hello",
2535
+ "count": 7,
2536
+ "enabled": true,
2537
+ "meta": {"x": 1},
2538
+ "rating": 4.5
2539
+ })
2540
+ );
2541
+ }
2542
+
2543
+ #[test]
2544
+ fn primary_key_entity_insert_stages_partial_row_for_normalization() {
2545
+ let spec = entity_insert_spec_with_primary_key();
2546
+ let rows = entity_lix_state_write_rows_from_batch(
2547
+ &spec,
2548
+ &primary_key_entity_insert_batch(false),
2549
+ &InsertColumnIntents::all_explicit(),
2550
+ None,
2551
+ )
2552
+ .expect("entity batch should decode");
2553
+
2554
+ assert_eq!(rows.len(), 1);
2555
+ assert_eq!(rows[0].entity_id, None);
2556
+ assert_eq!(
2557
+ serde_json::from_str::<serde_json::Value>(
2558
+ rows[0]
2559
+ .snapshot_content
2560
+ .as_deref()
2561
+ .expect("snapshot_content")
2562
+ )
2563
+ .expect("snapshot_content JSON"),
2564
+ json!({
2565
+ "body": "hello",
2566
+ "id": "message-1"
2567
+ })
2568
+ );
2569
+ }
2570
+
2571
+ #[test]
2572
+ fn primary_key_entity_insert_preserves_explicit_opaque_projection_for_normalization() {
2573
+ let spec = entity_insert_spec_with_primary_key();
2574
+ let rows = entity_lix_state_write_rows_from_batch(
2575
+ &spec,
2576
+ &primary_key_entity_insert_batch(true),
2577
+ &InsertColumnIntents::all_explicit(),
2578
+ None,
2579
+ )
2580
+ .expect("primary-key entity insert should stage explicit lixcol_entity_id");
2581
+
2582
+ assert_eq!(rows.len(), 1);
2583
+ assert_eq!(
2584
+ rows[0].entity_id.as_ref(),
2585
+ Some(&crate::entity_identity::EntityIdentity::single("message-1"))
2586
+ );
2587
+ }
2588
+
2589
+ #[test]
2590
+ fn active_entity_insert_defaults_version_id() {
2591
+ let spec = entity_insert_spec();
2592
+ let rows = entity_lix_state_write_rows_from_batch(
2593
+ &spec,
2594
+ &entity_insert_batch(false, false),
2595
+ &InsertColumnIntents::all_explicit(),
2596
+ Some("version-active"),
2597
+ )
2598
+ .expect("active entity batch should decode");
2599
+
2600
+ assert_eq!(rows.len(), 1);
2601
+ assert_eq!(rows[0].version_id, "version-active");
2602
+ assert!(!rows[0].global);
2603
+ }
2604
+
2605
+ #[test]
2606
+ fn by_version_entity_insert_requires_version_id_for_non_global_rows() {
2607
+ let spec = entity_insert_spec();
2608
+ let error = entity_lix_state_write_rows_from_batch(
2609
+ &spec,
2610
+ &entity_insert_batch(false, false),
2611
+ &InsertColumnIntents::all_explicit(),
2612
+ None,
2613
+ )
2614
+ .expect_err("by-version entity insert should require version id");
2615
+
2616
+ assert!(
2617
+ error.to_string().contains("requires lixcol_version_id"),
2618
+ "unexpected error: {error}"
2619
+ );
2620
+ }
2621
+
2622
+ #[test]
2623
+ fn by_version_entity_insert_global_row_uses_global_version() {
2624
+ let spec = entity_insert_spec();
2625
+ let rows = entity_lix_state_write_rows_from_batch(
2626
+ &spec,
2627
+ &entity_insert_batch(false, true),
2628
+ &InsertColumnIntents::all_explicit(),
2629
+ None,
2630
+ )
2631
+ .expect("global entity batch should decode");
2632
+
2633
+ assert_eq!(rows.len(), 1);
2634
+ assert!(rows[0].global);
2635
+ assert_eq!(rows[0].version_id, crate::GLOBAL_VERSION_ID);
2636
+ }
2637
+
2638
+ #[test]
2639
+ fn entity_insert_rejects_global_with_non_global_version_id() {
2640
+ let spec = entity_insert_spec();
2641
+ let error = entity_lix_state_write_rows_from_batch(
2642
+ &spec,
2643
+ &entity_insert_batch(true, true),
2644
+ &InsertColumnIntents::all_explicit(),
2645
+ None,
2646
+ )
2647
+ .expect_err("global entity write should reject conflicting version id");
2648
+
2649
+ assert!(
2650
+ error
2651
+ .to_string()
2652
+ .contains("cannot set lixcol_global=true with non-global lixcol_version_id"),
2653
+ "unexpected error: {error}"
2654
+ );
2655
+ }
2656
+
2657
+ #[tokio::test]
2658
+ async fn entity_insert_sink_stages_decoded_lix_state_rows() {
2659
+ let spec = entity_insert_spec();
2660
+ let mut write_context = CapturingWriteContext::default();
2661
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2662
+ let batch = entity_insert_batch(true, false);
2663
+ let sink = EntityInsertSink::new(
2664
+ Arc::clone(&spec),
2665
+ batch.schema(),
2666
+ InsertColumnIntents::all_explicit(),
2667
+ write_ctx,
2668
+ super::VersionBinding::explicit(),
2669
+ );
2670
+ let count = sink
2671
+ .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2672
+ .await
2673
+ .expect("entity sink should stage write");
2674
+
2675
+ assert_eq!(count, 1);
2676
+ assert_eq!(
2677
+ write_context.writes.as_slice(),
2678
+ &[StageWrite::Rows { mode: StageWriteMode::Insert, rows: vec![StageRow {
2679
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2680
+ schema_key: "project_message".to_string(),
2681
+ file_id: None,
2682
+ snapshot_content: Some(
2683
+ "{\"body\":\"hello\",\"count\":7,\"enabled\":true,\"meta\":{\"x\":1},\"rating\":4.5}"
2684
+ .to_string()
2685
+ ),
2686
+ metadata: Some(json!({"source": "entity"})),
2687
+ origin: None,
2688
+ schema_version: "1".to_string(),
2689
+ created_at: None,
2690
+ updated_at: None,
2691
+ global: false,
2692
+ change_id: None,
2693
+ commit_id: None,
2694
+ untracked: false,
2695
+ version_id: "version-a".to_string(),
2696
+ }]
2697
+ }]
2698
+ );
2699
+ }
2700
+ }