@lix-js/sdk 0.6.0-preview.4 → 0.6.0-preview.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. package/README.md +1 -1
  2. package/SKILL.md +65 -64
  3. package/dist/engine-wasm/index.js +4 -4
  4. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -5
  5. package/dist/engine-wasm/wasm/lix_engine.js +130 -118
  6. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  7. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +9 -8
  8. package/dist/generated/builtin-schemas.d.ts +69 -69
  9. package/dist/generated/builtin-schemas.js +94 -94
  10. package/dist/open-lix.d.ts +33 -26
  11. package/dist/open-lix.js +10 -10
  12. package/dist/sqlite/index.js +86 -30
  13. package/dist-engine-src/README.md +3 -3
  14. package/dist-engine-src/src/backend/capabilities.rs +67 -0
  15. package/dist-engine-src/src/backend/conformance/baseline.rs +1127 -0
  16. package/dist-engine-src/src/backend/conformance/factory.rs +93 -0
  17. package/dist-engine-src/src/backend/conformance/failure_tests.rs +608 -0
  18. package/dist-engine-src/src/backend/conformance/fixtures.rs +26 -0
  19. package/dist-engine-src/src/backend/conformance/mod.rs +75 -0
  20. package/dist-engine-src/src/backend/conformance/model.rs +28 -0
  21. package/dist-engine-src/src/backend/conformance/model_based.rs +257 -0
  22. package/dist-engine-src/src/backend/conformance/persistence.rs +204 -0
  23. package/dist-engine-src/src/backend/conformance/projection.rs +21 -0
  24. package/dist-engine-src/src/backend/conformance/pushdown.rs +24 -0
  25. package/dist-engine-src/src/backend/conformance/runner.rs +90 -0
  26. package/dist-engine-src/src/backend/conformance/scan.rs +24 -0
  27. package/dist-engine-src/src/backend/conformance/write.rs +16 -0
  28. package/dist-engine-src/src/backend/error.rs +94 -0
  29. package/dist-engine-src/src/backend/in_memory.rs +670 -0
  30. package/dist-engine-src/src/backend/mod.rs +36 -9
  31. package/dist-engine-src/src/backend/predicate.rs +80 -0
  32. package/dist-engine-src/src/backend/traits.rs +260 -0
  33. package/dist-engine-src/src/backend/types.rs +224 -81
  34. package/dist-engine-src/src/binary_cas/context.rs +8 -8
  35. package/dist-engine-src/src/binary_cas/kv.rs +234 -259
  36. package/dist-engine-src/src/{version → branch}/context.rs +12 -12
  37. package/dist-engine-src/src/branch/lifecycle.rs +221 -0
  38. package/dist-engine-src/src/branch/mod.rs +13 -0
  39. package/dist-engine-src/src/branch/refs.rs +321 -0
  40. package/dist-engine-src/src/branch/stage_rows.rs +67 -0
  41. package/dist-engine-src/src/branch/types.rs +21 -0
  42. package/dist-engine-src/src/catalog/context.rs +18 -18
  43. package/dist-engine-src/src/catalog/snapshot.rs +8 -8
  44. package/dist-engine-src/src/changelog/bench_support.rs +785 -0
  45. package/dist-engine-src/src/changelog/change.rs +1 -0
  46. package/dist-engine-src/src/changelog/codec.rs +497 -0
  47. package/dist-engine-src/src/changelog/commit.rs +1 -0
  48. package/dist-engine-src/src/changelog/context.rs +1614 -0
  49. package/dist-engine-src/src/changelog/mod.rs +29 -0
  50. package/dist-engine-src/src/changelog/store.rs +163 -0
  51. package/dist-engine-src/src/changelog/test_support.rs +54 -0
  52. package/dist-engine-src/src/changelog/types.rs +213 -0
  53. package/dist-engine-src/src/commit_graph/context.rs +317 -274
  54. package/dist-engine-src/src/commit_graph/mod.rs +2 -4
  55. package/dist-engine-src/src/commit_graph/types.rs +22 -42
  56. package/dist-engine-src/src/commit_graph/walker.rs +133 -103
  57. package/dist-engine-src/src/common/error.rs +52 -18
  58. package/dist-engine-src/src/common/identity.rs +2 -2
  59. package/dist-engine-src/src/common/mod.rs +1 -1
  60. package/dist-engine-src/src/domain.rs +42 -46
  61. package/dist-engine-src/src/engine.rs +74 -96
  62. package/dist-engine-src/src/{entity_identity.rs → entity_pk.rs} +89 -92
  63. package/dist-engine-src/src/functions/context.rs +56 -52
  64. package/dist-engine-src/src/functions/state.rs +51 -52
  65. package/dist-engine-src/src/init.rs +288 -154
  66. package/dist-engine-src/src/json_store/context.rs +15 -266
  67. package/dist-engine-src/src/json_store/mod.rs +26 -0
  68. package/dist-engine-src/src/json_store/store.rs +103 -718
  69. package/dist-engine-src/src/json_store/types.rs +4 -9
  70. package/dist-engine-src/src/lib.rs +49 -19
  71. package/dist-engine-src/src/live_state/context.rs +654 -790
  72. package/dist-engine-src/src/live_state/mod.rs +9 -3
  73. package/dist-engine-src/src/live_state/overlay.rs +4 -4
  74. package/dist-engine-src/src/live_state/types.rs +30 -21
  75. package/dist-engine-src/src/live_state/visibility.rs +514 -71
  76. package/dist-engine-src/src/plugin/install.rs +48 -48
  77. package/dist-engine-src/src/plugin/manifest.rs +7 -7
  78. package/dist-engine-src/src/plugin/materializer.rs +0 -275
  79. package/dist-engine-src/src/plugin/plugin_manifest.json +4 -3
  80. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +2 -2
  81. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +34 -0
  82. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +48 -0
  83. package/dist-engine-src/src/schema/builtin/lix_change.json +3 -3
  84. package/dist-engine-src/src/schema/builtin/lix_commit.json +1 -1
  85. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +6 -6
  86. package/dist-engine-src/src/schema/builtin/mod.rs +18 -20
  87. package/dist-engine-src/src/schema/compatibility.rs +11 -11
  88. package/dist-engine-src/src/schema/definition.json +2 -2
  89. package/dist-engine-src/src/schema/definition.rs +5 -5
  90. package/dist-engine-src/src/schema/key.rs +3 -3
  91. package/dist-engine-src/src/schema/mod.rs +1 -1
  92. package/dist-engine-src/src/schema/tests.rs +18 -18
  93. package/dist-engine-src/src/session/context.rs +803 -148
  94. package/dist-engine-src/src/session/create_branch.rs +94 -0
  95. package/dist-engine-src/src/session/execute.rs +223 -83
  96. package/dist-engine-src/src/session/merge/analysis.rs +9 -3
  97. package/dist-engine-src/src/session/merge/{version.rs → branch.rs} +119 -129
  98. package/dist-engine-src/src/session/merge/conflicts.rs +2 -2
  99. package/dist-engine-src/src/session/merge/mod.rs +5 -6
  100. package/dist-engine-src/src/session/merge/stats.rs +7 -11
  101. package/dist-engine-src/src/session/mod.rs +15 -12
  102. package/dist-engine-src/src/session/switch_branch.rs +113 -0
  103. package/dist-engine-src/src/session/transaction.rs +495 -14
  104. package/dist-engine-src/src/sql2/{classify.rs → bind/classify.rs} +3 -75
  105. package/dist-engine-src/src/sql2/bind/error.rs +5 -0
  106. package/dist-engine-src/src/sql2/bind/expr.rs +29 -0
  107. package/dist-engine-src/src/sql2/bind/mod.rs +12 -0
  108. package/dist-engine-src/src/sql2/{udfs/public_call.rs → bind/public_udf.rs} +71 -3
  109. package/dist-engine-src/src/sql2/bind/read.rs +65 -0
  110. package/dist-engine-src/src/sql2/bind/statement.rs +2236 -0
  111. package/dist-engine-src/src/sql2/bind/table.rs +273 -0
  112. package/dist-engine-src/src/sql2/bind/write.rs +86 -0
  113. package/dist-engine-src/src/sql2/branch_scope.rs +436 -0
  114. package/dist-engine-src/src/sql2/catalog/capability.rs +20 -0
  115. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +296 -0
  116. package/dist-engine-src/src/sql2/catalog/mod.rs +15 -0
  117. package/dist-engine-src/src/sql2/catalog/registry.rs +556 -0
  118. package/dist-engine-src/src/sql2/catalog/schema.rs +88 -0
  119. package/dist-engine-src/src/sql2/catalog/surface.rs +41 -0
  120. package/dist-engine-src/src/sql2/change_materialization.rs +122 -0
  121. package/dist-engine-src/src/sql2/context.rs +36 -30
  122. package/dist-engine-src/src/sql2/error.rs +1 -1
  123. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +1593 -0
  124. package/dist-engine-src/src/sql2/exec/datafusion.rs +5266 -0
  125. package/dist-engine-src/src/sql2/exec/fast_write.rs +82 -0
  126. package/dist-engine-src/src/sql2/exec/mod.rs +24 -0
  127. package/dist-engine-src/src/sql2/exec/write.rs +661 -0
  128. package/dist-engine-src/src/sql2/filesystem_planner.rs +72 -77
  129. package/dist-engine-src/src/sql2/filesystem_visibility.rs +21 -21
  130. package/dist-engine-src/src/sql2/history_projection.rs +8 -8
  131. package/dist-engine-src/src/sql2/history_route.rs +35 -31
  132. package/dist-engine-src/src/sql2/mod.rs +28 -23
  133. package/dist-engine-src/src/sql2/optimize/datafusion.rs +1 -0
  134. package/dist-engine-src/src/sql2/optimize/mod.rs +2 -0
  135. package/dist-engine-src/src/sql2/optimize/simple_write.rs +116 -0
  136. package/dist-engine-src/src/sql2/parse/mod.rs +69 -0
  137. package/dist-engine-src/src/sql2/parse/normalize.rs +1 -0
  138. package/dist-engine-src/src/sql2/plan/branch_scope.rs +24 -0
  139. package/dist-engine-src/src/sql2/plan/mod.rs +5 -0
  140. package/dist-engine-src/src/sql2/plan/predicate.rs +22 -0
  141. package/dist-engine-src/src/sql2/plan/write.rs +147 -0
  142. package/dist-engine-src/src/sql2/predicate_typecheck.rs +258 -0
  143. package/dist-engine-src/src/sql2/{version_provider.rs → providers/branch.rs} +218 -214
  144. package/dist-engine-src/src/sql2/{change_provider.rs → providers/change.rs} +156 -42
  145. package/dist-engine-src/src/sql2/{directory_provider.rs → providers/directory.rs} +291 -322
  146. package/dist-engine-src/src/sql2/{directory_history_provider.rs → providers/directory_history.rs} +56 -42
  147. package/dist-engine-src/src/sql2/providers/entity.rs +1484 -0
  148. package/dist-engine-src/src/sql2/{entity_history_provider.rs → providers/entity_history.rs} +43 -31
  149. package/dist-engine-src/src/sql2/{file_provider.rs → providers/file.rs} +323 -316
  150. package/dist-engine-src/src/sql2/{file_history_provider.rs → providers/file_history.rs} +60 -46
  151. package/dist-engine-src/src/sql2/{history_provider.rs → providers/history.rs} +46 -32
  152. package/dist-engine-src/src/sql2/{lix_state_provider.rs → providers/lix_state.rs} +359 -329
  153. package/dist-engine-src/src/sql2/providers/mod.rs +508 -0
  154. package/dist-engine-src/src/sql2/read_only.rs +2 -2
  155. package/dist-engine-src/src/sql2/session.rs +47 -96
  156. package/dist-engine-src/src/sql2/storage/constraints.rs +1 -0
  157. package/dist-engine-src/src/sql2/storage/mod.rs +1 -0
  158. package/dist-engine-src/src/sql2/test_support/differential.rs +712 -0
  159. package/dist-engine-src/src/sql2/test_support/generators.rs +354 -0
  160. package/dist-engine-src/src/sql2/test_support/mod.rs +2 -0
  161. package/dist-engine-src/src/sql2/udfs/{lix_active_version_commit_id.rs → lix_active_branch_commit_id.rs} +7 -7
  162. package/dist-engine-src/src/sql2/udfs/mod.rs +3 -6
  163. package/dist-engine-src/src/sql2/write_normalization.rs +45 -22
  164. package/dist-engine-src/src/storage/conformance.rs +399 -0
  165. package/dist-engine-src/src/storage/context.rs +552 -288
  166. package/dist-engine-src/src/storage/mod.rs +48 -10
  167. package/dist-engine-src/src/storage/point.rs +440 -0
  168. package/dist-engine-src/src/storage/read_scope.rs +43 -64
  169. package/dist-engine-src/src/storage/reader.rs +867 -0
  170. package/dist-engine-src/src/storage/scan.rs +784 -0
  171. package/dist-engine-src/src/storage/spaces.rs +236 -0
  172. package/dist-engine-src/src/storage/stats.rs +80 -0
  173. package/dist-engine-src/src/storage/write_set.rs +962 -0
  174. package/dist-engine-src/src/storage_bench.rs +136 -4828
  175. package/dist-engine-src/src/test_support.rs +360 -138
  176. package/dist-engine-src/src/tracked_state/bench_support.rs +394 -0
  177. package/dist-engine-src/src/tracked_state/codec.rs +155 -1057
  178. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +358 -0
  179. package/dist-engine-src/src/tracked_state/context.rs +1927 -993
  180. package/dist-engine-src/src/tracked_state/diff.rs +1715 -261
  181. package/dist-engine-src/src/tracked_state/merge.rs +74 -88
  182. package/dist-engine-src/src/tracked_state/mod.rs +19 -16
  183. package/dist-engine-src/src/tracked_state/{materialization.rs → row_materialization.rs} +50 -178
  184. package/dist-engine-src/src/tracked_state/storage.rs +243 -191
  185. package/dist-engine-src/src/tracked_state/tree.rs +247 -371
  186. package/dist-engine-src/src/tracked_state/types.rs +49 -42
  187. package/dist-engine-src/src/transaction/bench_support.rs +407 -0
  188. package/dist-engine-src/src/transaction/commit.rs +821 -713
  189. package/dist-engine-src/src/transaction/context.rs +705 -600
  190. package/dist-engine-src/src/transaction/mod.rs +13 -2
  191. package/dist-engine-src/src/transaction/normalization.rs +63 -76
  192. package/dist-engine-src/src/transaction/prep.rs +13 -13
  193. package/dist-engine-src/src/transaction/schema_resolver.rs +19 -5
  194. package/dist-engine-src/src/transaction/staging.rs +228 -434
  195. package/dist-engine-src/src/transaction/types.rs +41 -98
  196. package/dist-engine-src/src/transaction/validation.rs +382 -446
  197. package/dist-engine-src/src/untracked_state/codec.rs +337 -29
  198. package/dist-engine-src/src/untracked_state/context.rs +7 -7
  199. package/dist-engine-src/src/untracked_state/materialization.rs +2 -2
  200. package/dist-engine-src/src/untracked_state/mod.rs +1 -1
  201. package/dist-engine-src/src/untracked_state/storage.rs +659 -157
  202. package/dist-engine-src/src/untracked_state/types.rs +21 -21
  203. package/package.json +71 -68
  204. package/dist-engine-src/src/backend/kv.rs +0 -358
  205. package/dist-engine-src/src/backend/testing.rs +0 -658
  206. package/dist-engine-src/src/commit_store/codec.rs +0 -887
  207. package/dist-engine-src/src/commit_store/context.rs +0 -944
  208. package/dist-engine-src/src/commit_store/materialization.rs +0 -84
  209. package/dist-engine-src/src/commit_store/mod.rs +0 -16
  210. package/dist-engine-src/src/commit_store/storage.rs +0 -600
  211. package/dist-engine-src/src/commit_store/types.rs +0 -215
  212. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -34
  213. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -48
  214. package/dist-engine-src/src/session/create_version.rs +0 -88
  215. package/dist-engine-src/src/session/merge/apply.rs +0 -23
  216. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +0 -100
  217. package/dist-engine-src/src/session/switch_version.rs +0 -110
  218. package/dist-engine-src/src/sql2/entity_provider.rs +0 -3211
  219. package/dist-engine-src/src/sql2/execute.rs +0 -3533
  220. package/dist-engine-src/src/sql2/public_bind/assignment.rs +0 -46
  221. package/dist-engine-src/src/sql2/public_bind/capability.rs +0 -41
  222. package/dist-engine-src/src/sql2/public_bind/dml.rs +0 -172
  223. package/dist-engine-src/src/sql2/public_bind/mod.rs +0 -26
  224. package/dist-engine-src/src/sql2/public_bind/table.rs +0 -168
  225. package/dist-engine-src/src/sql2/version_scope.rs +0 -394
  226. package/dist-engine-src/src/storage/types.rs +0 -501
  227. package/dist-engine-src/src/tracked_state/by_file_index.rs +0 -98
  228. package/dist-engine-src/src/tracked_state/materializer.rs +0 -488
  229. package/dist-engine-src/src/transaction/live_state_overlay.rs +0 -35
  230. package/dist-engine-src/src/version/lifecycle.rs +0 -221
  231. package/dist-engine-src/src/version/mod.rs +0 -13
  232. package/dist-engine-src/src/version/refs.rs +0 -330
  233. package/dist-engine-src/src/version/stage_rows.rs +0 -67
  234. package/dist-engine-src/src/version/types.rs +0 -21
@@ -1,3211 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{
7
- ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, UInt64Array,
8
- };
9
- use datafusion::arrow::compute::{and, filter_record_batch};
10
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
- use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
12
- use datafusion::catalog::{Session, TableProvider};
13
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
14
- use datafusion::datasource::TableType;
15
- use datafusion::execution::TaskContext;
16
- use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::expr::InList;
18
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
19
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
20
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
21
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
22
- use datafusion::physical_plan::{
23
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
24
- };
25
- use datafusion::prelude::SessionContext;
26
- use futures_util::{stream, TryStreamExt};
27
- use serde_json::Value as JsonValue;
28
-
29
- use crate::commit_graph::CommitGraphReader;
30
- use crate::entity_identity::EntityIdentity;
31
- use crate::live_state::MaterializedLiveStateRow;
32
- use crate::live_state::{
33
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
34
- };
35
- use crate::sql2::dml::{InsertExec, InsertSink};
36
- use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
37
- use crate::sql2::read_only::reject_read_only_entity_surface;
38
- use crate::sql2::version_scope::{
39
- explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
40
- resolve_write_version_scope, VersionBinding,
41
- };
42
- use crate::sql2::write_normalization::{
43
- InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues, UpdateCell,
44
- };
45
- use crate::transaction::types::{TransactionJson, TransactionWriteRow};
46
- use crate::version::VersionRefReader;
47
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
48
-
49
- use super::entity_history_provider::EntityHistoryProvider;
50
- use super::history_route::{
51
- HISTORY_COL_CHANGE_ID, HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID,
52
- HISTORY_COL_FILE_ID, HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID,
53
- HISTORY_COL_SCHEMA_KEY, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
54
- };
55
- use super::result_metadata::{json_field, mark_json_field};
56
- use crate::sql2::{
57
- SqlCommitStoreQuerySource, SqlWriteContext, WriteAccess, WriteContextLiveStateReader,
58
- WriteContextVersionRefReader,
59
- };
60
- use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
61
-
62
- pub(crate) async fn register_entity_providers(
63
- ctx: &SessionContext,
64
- active_version_id: &str,
65
- live_state: Arc<dyn LiveStateReader>,
66
- version_ref: Arc<dyn VersionRefReader>,
67
- commit_graph: Arc<tokio::sync::Mutex<Box<dyn CommitGraphReader>>>,
68
- query_source: SqlCommitStoreQuerySource,
69
- schema_definitions: &[JsonValue],
70
- ) -> Result<(), LixError> {
71
- for schema in schema_definitions {
72
- let spec = match derive_entity_surface_spec_from_schema(schema) {
73
- Ok(spec) => Arc::new(spec),
74
- Err(_) => continue,
75
- };
76
-
77
- if !schema_exposed_as_entity_surface(&spec.schema_key) {
78
- continue;
79
- }
80
-
81
- let by_version_name = format!("{}_by_version", spec.schema_key);
82
- ctx.register_table(
83
- &by_version_name,
84
- Arc::new(EntityProvider::by_version(
85
- Arc::clone(&spec),
86
- Arc::clone(&live_state),
87
- Arc::clone(&version_ref),
88
- )),
89
- )
90
- .map_err(datafusion_error_to_lix_error)?;
91
-
92
- ctx.register_table(
93
- &spec.schema_key,
94
- Arc::new(EntityProvider::active(
95
- Arc::clone(&spec),
96
- Arc::clone(&live_state),
97
- Arc::clone(&version_ref),
98
- active_version_id.to_string(),
99
- )),
100
- )
101
- .map_err(datafusion_error_to_lix_error)?;
102
-
103
- if schema_exposed_as_entity_history_surface(&spec.schema_key) {
104
- let history_name = format!("{}_history", spec.schema_key);
105
- ctx.register_table(
106
- &history_name,
107
- Arc::new(EntityHistoryProvider::new(
108
- Arc::clone(&spec),
109
- Arc::clone(&commit_graph),
110
- query_source.clone(),
111
- )),
112
- )
113
- .map_err(datafusion_error_to_lix_error)?;
114
- }
115
- }
116
-
117
- Ok(())
118
- }
119
-
120
- pub(crate) async fn register_entity_write_providers(
121
- ctx: &SessionContext,
122
- write_ctx: SqlWriteContext,
123
- schema_definitions: &[JsonValue],
124
- ) -> Result<(), LixError> {
125
- for schema in schema_definitions {
126
- let spec = match derive_entity_surface_spec_from_schema(schema) {
127
- Ok(spec) => Arc::new(spec),
128
- Err(_) => continue,
129
- };
130
-
131
- if !schema_exposed_as_entity_surface(&spec.schema_key) {
132
- continue;
133
- }
134
-
135
- let by_version_name = format!("{}_by_version", spec.schema_key);
136
- ctx.register_table(
137
- &by_version_name,
138
- Arc::new(EntityProvider::by_version_with_write(
139
- Arc::clone(&spec),
140
- write_ctx.clone(),
141
- )),
142
- )
143
- .map_err(datafusion_error_to_lix_error)?;
144
-
145
- ctx.register_table(
146
- &spec.schema_key,
147
- Arc::new(EntityProvider::active_with_write(
148
- Arc::clone(&spec),
149
- write_ctx.clone(),
150
- )),
151
- )
152
- .map_err(datafusion_error_to_lix_error)?;
153
- }
154
-
155
- Ok(())
156
- }
157
-
158
- #[derive(Debug, Clone, Copy, PartialEq, Eq)]
159
- pub(super) enum EntityProviderVariant {
160
- Active,
161
- ByVersion,
162
- History,
163
- }
164
-
165
- #[derive(Debug, Clone, Copy, PartialEq, Eq)]
166
- pub(super) enum EntityColumnType {
167
- String,
168
- Json,
169
- Integer,
170
- Number,
171
- Boolean,
172
- }
173
-
174
- #[derive(Debug, Clone, PartialEq, Eq)]
175
- pub(super) struct EntitySurfaceColumn {
176
- pub(super) name: String,
177
- pub(super) column_type: EntityColumnType,
178
- }
179
-
180
- #[derive(Debug, Clone, PartialEq, Eq)]
181
- pub(super) struct EntitySurfaceSpec {
182
- pub(super) schema_key: String,
183
- pub(super) primary_key_paths: Vec<Vec<String>>,
184
- pub(super) columns: Vec<EntitySurfaceColumn>,
185
- }
186
-
187
- impl EntitySurfaceSpec {
188
- #[cfg(test)]
189
- fn visible_column_names(&self) -> impl Iterator<Item = &str> {
190
- self.columns.iter().map(|column| column.name.as_str())
191
- }
192
-
193
- pub(super) fn visible_column(&self, column_name: &str) -> Option<&EntitySurfaceColumn> {
194
- self.columns
195
- .iter()
196
- .find(|column| column.name == column_name)
197
- }
198
-
199
- fn is_visible_column(&self, column_name: &str) -> bool {
200
- self.visible_column(column_name).is_some()
201
- }
202
- }
203
-
204
- pub(crate) struct EntityProvider {
205
- spec: Arc<EntitySurfaceSpec>,
206
- live_state: Arc<dyn LiveStateReader>,
207
- version_ref: Arc<dyn VersionRefReader>,
208
- write_access: WriteAccess,
209
- schema: SchemaRef,
210
- variant: EntityProviderVariant,
211
- version_binding: VersionBinding,
212
- }
213
-
214
- impl std::fmt::Debug for EntityProvider {
215
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216
- f.debug_struct("EntityProvider")
217
- .field("schema_key", &self.spec.schema_key)
218
- .field("variant", &self.variant)
219
- .finish()
220
- }
221
- }
222
-
223
- impl EntityProvider {
224
- fn active(
225
- spec: Arc<EntitySurfaceSpec>,
226
- live_state: Arc<dyn LiveStateReader>,
227
- version_ref: Arc<dyn VersionRefReader>,
228
- active_version_id: String,
229
- ) -> Self {
230
- Self {
231
- schema: entity_surface_schema(&spec, EntityProviderVariant::Active),
232
- spec,
233
- live_state,
234
- version_ref,
235
- write_access: WriteAccess::read_only(),
236
- variant: EntityProviderVariant::Active,
237
- version_binding: VersionBinding::active(active_version_id),
238
- }
239
- }
240
-
241
- fn active_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
242
- let active_version_id = write_ctx.active_version_id();
243
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
244
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
245
- Self {
246
- schema: entity_surface_schema(&spec, EntityProviderVariant::Active),
247
- spec,
248
- live_state,
249
- version_ref,
250
- write_access: WriteAccess::write(write_ctx),
251
- variant: EntityProviderVariant::Active,
252
- version_binding: VersionBinding::active(active_version_id),
253
- }
254
- }
255
-
256
- fn by_version(
257
- spec: Arc<EntitySurfaceSpec>,
258
- live_state: Arc<dyn LiveStateReader>,
259
- version_ref: Arc<dyn VersionRefReader>,
260
- ) -> Self {
261
- Self {
262
- schema: entity_surface_schema(&spec, EntityProviderVariant::ByVersion),
263
- spec,
264
- live_state,
265
- version_ref,
266
- write_access: WriteAccess::read_only(),
267
- variant: EntityProviderVariant::ByVersion,
268
- version_binding: VersionBinding::explicit(),
269
- }
270
- }
271
-
272
- fn by_version_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
273
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
274
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
275
- Self {
276
- schema: entity_surface_schema(&spec, EntityProviderVariant::ByVersion),
277
- spec,
278
- live_state,
279
- version_ref,
280
- write_access: WriteAccess::write(write_ctx),
281
- variant: EntityProviderVariant::ByVersion,
282
- version_binding: VersionBinding::explicit(),
283
- }
284
- }
285
- }
286
-
287
- #[async_trait]
288
- impl TableProvider for EntityProvider {
289
- fn as_any(&self) -> &dyn Any {
290
- self
291
- }
292
-
293
- fn schema(&self) -> SchemaRef {
294
- Arc::clone(&self.schema)
295
- }
296
-
297
- fn table_type(&self) -> TableType {
298
- TableType::Base
299
- }
300
-
301
- fn supports_filters_pushdown(
302
- &self,
303
- filters: &[&Expr],
304
- ) -> Result<Vec<TableProviderFilterPushDown>> {
305
- let analyzer = EntityPrimaryKeyFilterAnalyzer::new(&self.spec);
306
- Ok(filters
307
- .iter()
308
- .map(|filter| {
309
- if ExactVersionIdFilterAnalyzer.supports(filter) || analyzer.supports(filter) {
310
- TableProviderFilterPushDown::Exact
311
- } else {
312
- TableProviderFilterPushDown::Unsupported
313
- }
314
- })
315
- .collect())
316
- }
317
-
318
- async fn scan(
319
- &self,
320
- _state: &dyn Session,
321
- projection: Option<&Vec<usize>>,
322
- filters: &[Expr],
323
- limit: Option<usize>,
324
- ) -> Result<Arc<dyn ExecutionPlan>> {
325
- let projected_schema = projected_schema(&self.schema, projection)?;
326
- let mut request = entity_live_state_scan_request(
327
- &self.spec.schema_key,
328
- self.version_binding.active_version_id(),
329
- Some(projected_schema.as_ref()),
330
- limit,
331
- );
332
- if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
333
- {
334
- request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
335
- if request.filter.version_ids.is_empty() {
336
- return Err(DataFusionError::Plan(format!(
337
- "DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
338
- self.spec.schema_key
339
- )));
340
- }
341
- }
342
- request.filter.version_ids = resolve_provider_version_ids(
343
- self.version_ref.as_ref(),
344
- &self.version_binding,
345
- request.filter.version_ids,
346
- )
347
- .await
348
- .map_err(lix_error_to_datafusion_error)?;
349
- apply_exact_version_id_filter(&mut request, exact_version_ids_from_filters(filters)?);
350
- apply_exact_entity_id_filters(&mut request, &self.spec, filters)?;
351
-
352
- Ok(Arc::new(EntityScanExec::new(
353
- Arc::clone(&self.spec),
354
- Arc::clone(&self.live_state),
355
- projected_schema,
356
- request,
357
- )))
358
- }
359
-
360
- async fn insert_into(
361
- &self,
362
- _state: &dyn Session,
363
- input: Arc<dyn ExecutionPlan>,
364
- insert_op: InsertOp,
365
- ) -> Result<Arc<dyn ExecutionPlan>> {
366
- if insert_op != InsertOp::Append {
367
- return not_impl_err!("{insert_op} not implemented for entity surfaces yet");
368
- }
369
- reject_read_only_entity_surface(&self.spec.schema_key, "INSERT")?;
370
-
371
- let write_ctx = self.write_access.require_write(&format!(
372
- "INSERT into {} entity surface",
373
- self.spec.schema_key
374
- ))?;
375
-
376
- let insert_version_binding = match self.variant {
377
- EntityProviderVariant::Active => self.version_binding.clone(),
378
- EntityProviderVariant::ByVersion => VersionBinding::explicit(),
379
- EntityProviderVariant::History => {
380
- return not_impl_err!("INSERT is not implemented for entity history surfaces");
381
- }
382
- };
383
-
384
- let sink = EntityInsertSink::new(
385
- Arc::clone(&self.spec),
386
- input.schema(),
387
- InsertColumnIntents::from_input(&input),
388
- write_ctx.clone(),
389
- insert_version_binding,
390
- );
391
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
392
- }
393
-
394
- async fn delete_from(
395
- &self,
396
- state: &dyn Session,
397
- filters: Vec<Expr>,
398
- ) -> Result<Arc<dyn ExecutionPlan>> {
399
- reject_read_only_entity_surface(&self.spec.schema_key, "DELETE")?;
400
-
401
- let write_ctx = self.write_access.require_write(&format!(
402
- "DELETE FROM {} entity surface",
403
- self.spec.schema_key
404
- ))?;
405
-
406
- let version_binding = match self.variant {
407
- EntityProviderVariant::Active => self.version_binding.clone(),
408
- EntityProviderVariant::ByVersion => VersionBinding::explicit(),
409
- EntityProviderVariant::History => {
410
- return not_impl_err!("DELETE is not implemented for entity history surfaces");
411
- }
412
- };
413
-
414
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
415
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
416
- let physical_filters = filters
417
- .iter()
418
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
419
- .collect::<Result<Vec<_>>>()?;
420
- let mut request = entity_live_state_scan_request(
421
- &self.spec.schema_key,
422
- version_binding.active_version_id(),
423
- None,
424
- None,
425
- );
426
- if matches!(version_binding, VersionBinding::Explicit) {
427
- let exact_version_ids = exact_version_ids_from_filters(&filters)?;
428
- if exact_version_ids.is_none() {
429
- return Err(DataFusionError::Plan(format!(
430
- "DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
431
- self.spec.schema_key
432
- )));
433
- }
434
- apply_exact_version_id_filter(&mut request, exact_version_ids);
435
- }
436
- apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
437
-
438
- Ok(Arc::new(EntityDeleteExec::new(
439
- Arc::clone(&self.spec),
440
- write_ctx.clone(),
441
- Arc::clone(&self.schema),
442
- version_binding,
443
- request,
444
- physical_filters,
445
- )))
446
- }
447
-
448
- async fn update(
449
- &self,
450
- state: &dyn Session,
451
- assignments: Vec<(String, Expr)>,
452
- filters: Vec<Expr>,
453
- ) -> Result<Arc<dyn ExecutionPlan>> {
454
- reject_read_only_entity_surface(&self.spec.schema_key, "UPDATE")?;
455
-
456
- let write_ctx = self
457
- .write_access
458
- .require_write(&format!("UPDATE {} entity surface", self.spec.schema_key))?;
459
-
460
- validate_entity_update_assignments(&self.spec, &self.schema, &assignments)?;
461
-
462
- let version_binding = match self.variant {
463
- EntityProviderVariant::Active => self.version_binding.clone(),
464
- EntityProviderVariant::ByVersion => VersionBinding::explicit(),
465
- EntityProviderVariant::History => {
466
- return not_impl_err!("UPDATE is not implemented for entity history surfaces");
467
- }
468
- };
469
-
470
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
471
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
472
- let physical_assignments = assignments
473
- .iter()
474
- .map(|(column_name, expr)| {
475
- Ok((
476
- column_name.clone(),
477
- create_physical_expr(expr, &df_schema, state.execution_props())?,
478
- ))
479
- })
480
- .collect::<Result<Vec<_>>>()?;
481
- let physical_filters = filters
482
- .iter()
483
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
484
- .collect::<Result<Vec<_>>>()?;
485
- let mut request = entity_live_state_scan_request(
486
- &self.spec.schema_key,
487
- version_binding.active_version_id(),
488
- None,
489
- None,
490
- );
491
- apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
492
-
493
- Ok(Arc::new(EntityUpdateExec::new(
494
- Arc::clone(&self.spec),
495
- write_ctx.clone(),
496
- Arc::clone(&self.schema),
497
- version_binding,
498
- request,
499
- physical_assignments,
500
- physical_filters,
501
- )))
502
- }
503
- }
504
-
505
- fn entity_ids_from_primary_key_filters(
506
- spec: &EntitySurfaceSpec,
507
- filters: &[Expr],
508
- ) -> Result<Option<Vec<EntityIdentity>>> {
509
- let analyzer = EntityPrimaryKeyFilterAnalyzer::new(spec);
510
- let mut entity_ids: Option<BTreeSet<EntityIdentity>> = None;
511
- for filter in filters {
512
- let Some(filter_ids) = analyzer.analyze(filter)? else {
513
- continue;
514
- };
515
- entity_ids = Some(match entity_ids {
516
- Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
517
- None => filter_ids,
518
- });
519
- }
520
-
521
- Ok(entity_ids.map(|ids| ids.into_iter().collect()))
522
- }
523
-
524
- fn apply_exact_entity_id_filters(
525
- request: &mut LiveStateScanRequest,
526
- spec: &EntitySurfaceSpec,
527
- filters: &[Expr],
528
- ) -> Result<()> {
529
- if let Some(entity_ids) = entity_ids_from_primary_key_filters(spec, filters)? {
530
- if entity_ids.is_empty() {
531
- request.limit = Some(0);
532
- }
533
- request.filter.entity_ids = entity_ids;
534
- }
535
- Ok(())
536
- }
537
-
538
- fn exact_version_ids_from_filters(filters: &[Expr]) -> Result<Option<Vec<String>>> {
539
- let analyzer = ExactVersionIdFilterAnalyzer;
540
- let mut version_ids: Option<BTreeSet<String>> = None;
541
- for filter in filters {
542
- let Some(filter_ids) = analyzer.analyze(filter)? else {
543
- continue;
544
- };
545
- version_ids = Some(match version_ids {
546
- Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
547
- None => filter_ids,
548
- });
549
- }
550
- Ok(version_ids.map(|ids| ids.into_iter().collect()))
551
- }
552
-
553
- fn apply_exact_version_id_filter(
554
- request: &mut LiveStateScanRequest,
555
- version_ids: Option<Vec<String>>,
556
- ) {
557
- if let Some(version_ids) = version_ids {
558
- if version_ids.is_empty() {
559
- request.limit = Some(0);
560
- }
561
- request.filter.version_ids = version_ids;
562
- }
563
- }
564
-
565
- struct EntityPrimaryKeyFilterAnalyzer<'a> {
566
- primary_key_columns: Vec<&'a str>,
567
- }
568
-
569
- struct ExactVersionIdFilterAnalyzer;
570
-
571
- impl ExactVersionIdFilterAnalyzer {
572
- fn supports(&self, expr: &Expr) -> bool {
573
- self.analyze(expr)
574
- .is_ok_and(|constraint| constraint.is_some())
575
- }
576
-
577
- fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<String>>> {
578
- match expr {
579
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
580
- let Some(left) = self.analyze(&binary_expr.left)? else {
581
- return Ok(None);
582
- };
583
- let Some(right) = self.analyze(&binary_expr.right)? else {
584
- return Ok(None);
585
- };
586
- Ok(Some(left.intersection(&right).cloned().collect()))
587
- }
588
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
589
- let Some(mut left) = self.analyze(&binary_expr.left)? else {
590
- return Ok(None);
591
- };
592
- let Some(right) = self.analyze(&binary_expr.right)? else {
593
- return Ok(None);
594
- };
595
- left.extend(right);
596
- Ok(Some(left))
597
- }
598
- Expr::BinaryExpr(binary_expr) => {
599
- Ok(version_id_from_binary_filter(binary_expr).map(|value| BTreeSet::from([value])))
600
- }
601
- Expr::InList(in_list) => {
602
- Ok(version_ids_from_in_list_filter(in_list)
603
- .map(|values| values.into_iter().collect()))
604
- }
605
- _ => Ok(None),
606
- }
607
- }
608
- }
609
-
610
- fn version_id_from_binary_filter(binary_expr: &BinaryExpr) -> Option<String> {
611
- if binary_expr.op != Operator::Eq {
612
- return None;
613
- }
614
-
615
- version_id_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
616
- .or_else(|| version_id_from_column_literal_filter(&binary_expr.right, &binary_expr.left))
617
- }
618
-
619
- fn version_ids_from_in_list_filter(in_list: &InList) -> Option<Vec<String>> {
620
- if in_list.negated {
621
- return None;
622
- }
623
- let Expr::Column(column) = in_list.expr.as_ref() else {
624
- return None;
625
- };
626
- if column.name != "lixcol_version_id" {
627
- return None;
628
- }
629
-
630
- let values = in_list
631
- .list
632
- .iter()
633
- .map(string_expr_literal)
634
- .collect::<Option<Vec<_>>>()?;
635
- if values.is_empty() {
636
- return None;
637
- }
638
- Some(values)
639
- }
640
-
641
- fn version_id_from_column_literal_filter(
642
- column_expr: &Expr,
643
- literal_expr: &Expr,
644
- ) -> Option<String> {
645
- let Expr::Column(column) = column_expr else {
646
- return None;
647
- };
648
- if column.name != "lixcol_version_id" {
649
- return None;
650
- }
651
- string_expr_literal(literal_expr)
652
- }
653
-
654
- impl<'a> EntityPrimaryKeyFilterAnalyzer<'a> {
655
- fn new(spec: &'a EntitySurfaceSpec) -> Self {
656
- Self {
657
- primary_key_columns: string_primary_key_columns(spec),
658
- }
659
- }
660
-
661
- fn supports(&self, expr: &Expr) -> bool {
662
- self.analyze(expr)
663
- .is_ok_and(|constraint| constraint.is_some())
664
- }
665
-
666
- fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<EntityIdentity>>> {
667
- if self.primary_key_columns.is_empty() {
668
- return Ok(None);
669
- };
670
- let Some(constraint) = self.analyze_constraint(expr)? else {
671
- return Ok(None);
672
- };
673
- Ok(constraint.into_entity_ids(&self.primary_key_columns))
674
- }
675
-
676
- fn analyze_constraint(&self, expr: &Expr) -> Result<Option<EntityIdentityConstraint>> {
677
- match expr {
678
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
679
- let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
680
- return Ok(None);
681
- };
682
- let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
683
- return Ok(None);
684
- };
685
- Ok(Some(left.intersect(right, &self.primary_key_columns)))
686
- }
687
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
688
- let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
689
- return Ok(None);
690
- };
691
- let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
692
- return Ok(None);
693
- };
694
- let Some(left_ids) = left.into_entity_ids(&self.primary_key_columns) else {
695
- return Ok(None);
696
- };
697
- let Some(mut right_ids) = right.into_entity_ids(&self.primary_key_columns) else {
698
- return Ok(None);
699
- };
700
- right_ids.extend(left_ids);
701
- Ok(Some(EntityIdentityConstraint::Full(right_ids)))
702
- }
703
- Expr::BinaryExpr(binary_expr) => Ok(entity_identity_constraint_from_binary_filter(
704
- binary_expr,
705
- &self.primary_key_columns,
706
- )),
707
- Expr::InList(in_list) => Ok(entity_identity_constraint_from_in_list_filter(
708
- in_list,
709
- &self.primary_key_columns,
710
- )),
711
- _ => Ok(None),
712
- }
713
- }
714
- }
715
-
716
- #[derive(Debug, Clone, PartialEq, Eq)]
717
- enum EntityIdentityConstraint {
718
- Full(BTreeSet<EntityIdentity>),
719
- Parts(BTreeMap<String, BTreeSet<String>>),
720
- }
721
-
722
- impl EntityIdentityConstraint {
723
- fn intersect(self, other: Self, primary_key_columns: &[&str]) -> Self {
724
- match (self, other) {
725
- (Self::Full(left), Self::Full(right)) => {
726
- Self::Full(left.intersection(&right).cloned().collect())
727
- }
728
- (Self::Full(ids), Self::Parts(parts)) | (Self::Parts(parts), Self::Full(ids)) => {
729
- Self::Full(
730
- ids.into_iter()
731
- .filter(|identity| {
732
- identity_matches_parts(identity, primary_key_columns, &parts)
733
- })
734
- .collect(),
735
- )
736
- }
737
- (Self::Parts(mut left), Self::Parts(right)) => {
738
- for (column, right_values) in right {
739
- left.entry(column)
740
- .and_modify(|left_values| {
741
- *left_values =
742
- left_values.intersection(&right_values).cloned().collect();
743
- })
744
- .or_insert(right_values);
745
- }
746
- Self::Parts(left)
747
- }
748
- }
749
- }
750
-
751
- fn into_entity_ids(self, primary_key_columns: &[&str]) -> Option<BTreeSet<EntityIdentity>> {
752
- match self {
753
- Self::Full(ids) => Some(ids),
754
- Self::Parts(parts) => entity_ids_from_primary_key_parts(primary_key_columns, parts),
755
- }
756
- }
757
- }
758
-
759
- fn string_primary_key_columns(spec: &EntitySurfaceSpec) -> Vec<&str> {
760
- spec.primary_key_paths
761
- .iter()
762
- .map(|path| {
763
- let [column_name] = path.as_slice() else {
764
- return None;
765
- };
766
- let column = spec.visible_column(column_name)?;
767
- (column.column_type == EntityColumnType::String).then_some(column.name.as_str())
768
- })
769
- .collect::<Option<Vec<_>>>()
770
- .unwrap_or_default()
771
- }
772
-
773
- fn entity_identity_constraint_from_binary_filter(
774
- binary_expr: &BinaryExpr,
775
- primary_key_columns: &[&str],
776
- ) -> Option<EntityIdentityConstraint> {
777
- if binary_expr.op != Operator::Eq {
778
- return None;
779
- }
780
- entity_identity_constraint_from_column_literal_filter(
781
- &binary_expr.left,
782
- &binary_expr.right,
783
- primary_key_columns,
784
- )
785
- .or_else(|| {
786
- entity_identity_constraint_from_column_literal_filter(
787
- &binary_expr.right,
788
- &binary_expr.left,
789
- primary_key_columns,
790
- )
791
- })
792
- }
793
-
794
- fn entity_identity_constraint_from_in_list_filter(
795
- in_list: &InList,
796
- primary_key_columns: &[&str],
797
- ) -> Option<EntityIdentityConstraint> {
798
- if in_list.negated {
799
- return None;
800
- }
801
- let Expr::Column(column) = in_list.expr.as_ref() else {
802
- return None;
803
- };
804
- let values = in_list
805
- .list
806
- .iter()
807
- .map(string_expr_literal)
808
- .collect::<Option<Vec<_>>>()?;
809
- if values.is_empty() {
810
- return None;
811
- }
812
- match column.name.as_str() {
813
- "lixcol_entity_id" => values
814
- .into_iter()
815
- .map(|value| EntityIdentity::from_json_array_text(&value).ok())
816
- .collect::<Option<BTreeSet<_>>>()
817
- .map(EntityIdentityConstraint::Full),
818
- column_name if primary_key_columns.contains(&column_name) => {
819
- Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
820
- column_name.to_string(),
821
- values.into_iter().collect(),
822
- )])))
823
- }
824
- _ => None,
825
- }
826
- }
827
-
828
- fn entity_identity_constraint_from_column_literal_filter(
829
- column_expr: &Expr,
830
- literal_expr: &Expr,
831
- primary_key_columns: &[&str],
832
- ) -> Option<EntityIdentityConstraint> {
833
- let Expr::Column(column) = column_expr else {
834
- return None;
835
- };
836
- let value = string_expr_literal(literal_expr)?;
837
- match column.name.as_str() {
838
- "lixcol_entity_id" => EntityIdentity::from_json_array_text(&value)
839
- .ok()
840
- .map(|identity| EntityIdentityConstraint::Full(BTreeSet::from([identity]))),
841
- column_name if primary_key_columns.contains(&column_name) => {
842
- Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
843
- column_name.to_string(),
844
- BTreeSet::from([value]),
845
- )])))
846
- }
847
- _ => None,
848
- }
849
- }
850
-
851
- fn entity_ids_from_primary_key_parts(
852
- primary_key_columns: &[&str],
853
- parts: BTreeMap<String, BTreeSet<String>>,
854
- ) -> Option<BTreeSet<EntityIdentity>> {
855
- if primary_key_columns
856
- .iter()
857
- .any(|column| !parts.contains_key(*column))
858
- {
859
- return None;
860
- }
861
-
862
- let mut identities = BTreeSet::from([Vec::<String>::new()]);
863
- for column in primary_key_columns {
864
- let values = parts.get(*column)?;
865
- identities = identities
866
- .into_iter()
867
- .flat_map(|prefix| {
868
- values.iter().map(move |value| {
869
- let mut parts = prefix.clone();
870
- parts.push(value.clone());
871
- parts
872
- })
873
- })
874
- .collect();
875
- }
876
- Some(
877
- identities
878
- .into_iter()
879
- .map(|parts| EntityIdentity { parts })
880
- .collect(),
881
- )
882
- }
883
-
884
- fn identity_matches_parts(
885
- identity: &EntityIdentity,
886
- primary_key_columns: &[&str],
887
- parts: &BTreeMap<String, BTreeSet<String>>,
888
- ) -> bool {
889
- let identity_parts = identity.parts.as_slice();
890
- primary_key_columns
891
- .iter()
892
- .zip(identity_parts.iter())
893
- .all(|(column, value)| {
894
- parts
895
- .get(*column)
896
- .is_none_or(|values| values.contains(value))
897
- })
898
- }
899
-
900
- fn string_expr_literal(expr: &Expr) -> Option<String> {
901
- let Expr::Literal(literal, _) = expr else {
902
- return None;
903
- };
904
- match literal {
905
- ScalarValue::Utf8(Some(value))
906
- | ScalarValue::Utf8View(Some(value))
907
- | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
908
- _ => None,
909
- }
910
- }
911
-
912
- struct EntityInsertSink {
913
- spec: Arc<EntitySurfaceSpec>,
914
- insert_column_intents: InsertColumnIntents,
915
- write_ctx: SqlWriteContext,
916
- version_binding: VersionBinding,
917
- }
918
-
919
- impl std::fmt::Debug for EntityInsertSink {
920
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
921
- f.debug_struct("EntityInsertSink")
922
- .field("schema_key", &self.spec.schema_key)
923
- .finish()
924
- }
925
- }
926
-
927
- impl EntityInsertSink {
928
- fn new(
929
- spec: Arc<EntitySurfaceSpec>,
930
- _schema: SchemaRef,
931
- insert_column_intents: InsertColumnIntents,
932
- write_ctx: SqlWriteContext,
933
- version_binding: VersionBinding,
934
- ) -> Self {
935
- Self {
936
- spec,
937
- insert_column_intents,
938
- write_ctx,
939
- version_binding,
940
- }
941
- }
942
- }
943
-
944
- impl DisplayAs for EntityInsertSink {
945
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
946
- match t {
947
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
948
- write!(f, "EntityInsertSink(schema_key={})", self.spec.schema_key)
949
- }
950
- DisplayFormatType::TreeRender => write!(f, "EntityInsertSink"),
951
- }
952
- }
953
- }
954
-
955
- #[async_trait]
956
- impl InsertSink for EntityInsertSink {
957
- async fn write_batches(
958
- &self,
959
- batches: Vec<RecordBatch>,
960
- _context: &Arc<TaskContext>,
961
- ) -> Result<u64> {
962
- let mut rows = Vec::new();
963
- for batch in batches {
964
- rows.extend(entity_lix_state_write_rows_from_batch(
965
- &self.spec,
966
- &batch,
967
- &self.insert_column_intents,
968
- self.version_binding.active_version_id(),
969
- )?);
970
- }
971
- let count = u64::try_from(rows.len())
972
- .map_err(|_| DataFusionError::Execution("entity INSERT row count overflow".into()))?;
973
-
974
- self.write_ctx
975
- .stage_write(TransactionWrite::Rows {
976
- mode: TransactionWriteMode::Insert,
977
- rows,
978
- })
979
- .await
980
- .map_err(lix_error_to_datafusion_error)?;
981
-
982
- Ok(count)
983
- }
984
- }
985
-
986
- #[allow(dead_code)]
987
- struct EntityDeleteExec {
988
- spec: Arc<EntitySurfaceSpec>,
989
- write_ctx: SqlWriteContext,
990
- table_schema: SchemaRef,
991
- version_binding: VersionBinding,
992
- request: LiveStateScanRequest,
993
- filters: Vec<Arc<dyn PhysicalExpr>>,
994
- result_schema: SchemaRef,
995
- properties: Arc<PlanProperties>,
996
- }
997
-
998
- impl std::fmt::Debug for EntityDeleteExec {
999
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1000
- f.debug_struct("EntityDeleteExec")
1001
- .field("schema_key", &self.spec.schema_key)
1002
- .finish()
1003
- }
1004
- }
1005
-
1006
- impl EntityDeleteExec {
1007
- fn new(
1008
- spec: Arc<EntitySurfaceSpec>,
1009
- write_ctx: SqlWriteContext,
1010
- table_schema: SchemaRef,
1011
- version_binding: VersionBinding,
1012
- request: LiveStateScanRequest,
1013
- filters: Vec<Arc<dyn PhysicalExpr>>,
1014
- ) -> Self {
1015
- let result_schema = dml_count_schema();
1016
- let properties = PlanProperties::new(
1017
- EquivalenceProperties::new(Arc::clone(&result_schema)),
1018
- Partitioning::UnknownPartitioning(1),
1019
- EmissionType::Final,
1020
- Boundedness::Bounded,
1021
- );
1022
- Self {
1023
- spec,
1024
- write_ctx,
1025
- table_schema,
1026
- version_binding,
1027
- request,
1028
- filters,
1029
- result_schema,
1030
- properties: Arc::new(properties),
1031
- }
1032
- }
1033
- }
1034
-
1035
- impl DisplayAs for EntityDeleteExec {
1036
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1037
- match t {
1038
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
1039
- write!(f, "EntityDeleteExec(schema_key={})", self.spec.schema_key)
1040
- }
1041
- DisplayFormatType::TreeRender => write!(f, "EntityDeleteExec"),
1042
- }
1043
- }
1044
- }
1045
-
1046
- impl ExecutionPlan for EntityDeleteExec {
1047
- fn name(&self) -> &str {
1048
- "EntityDeleteExec"
1049
- }
1050
-
1051
- fn as_any(&self) -> &dyn Any {
1052
- self
1053
- }
1054
-
1055
- fn properties(&self) -> &Arc<PlanProperties> {
1056
- &self.properties
1057
- }
1058
-
1059
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1060
- Vec::new()
1061
- }
1062
-
1063
- fn with_new_children(
1064
- self: Arc<Self>,
1065
- children: Vec<Arc<dyn ExecutionPlan>>,
1066
- ) -> Result<Arc<dyn ExecutionPlan>> {
1067
- if !children.is_empty() {
1068
- return Err(DataFusionError::Execution(
1069
- "EntityDeleteExec does not accept children".to_string(),
1070
- ));
1071
- }
1072
- Ok(self)
1073
- }
1074
-
1075
- fn execute(
1076
- &self,
1077
- partition: usize,
1078
- _context: Arc<TaskContext>,
1079
- ) -> Result<SendableRecordBatchStream> {
1080
- if partition != 0 {
1081
- return Err(DataFusionError::Execution(format!(
1082
- "EntityDeleteExec only exposes one partition, got {partition}"
1083
- )));
1084
- }
1085
-
1086
- let spec = Arc::clone(&self.spec);
1087
- let write_ctx = self.write_ctx.clone();
1088
- let table_schema = Arc::clone(&self.table_schema);
1089
- let version_binding = self.version_binding.clone();
1090
- let request = self.request.clone();
1091
- let filters = self.filters.clone();
1092
- let result_schema = Arc::clone(&self.result_schema);
1093
- let stream_schema = Arc::clone(&result_schema);
1094
-
1095
- let stream = stream::once(async move {
1096
- let rows = if request.limit == Some(0) {
1097
- Vec::new()
1098
- } else {
1099
- write_ctx
1100
- .scan_live_state(&request)
1101
- .await
1102
- .map_err(lix_error_to_datafusion_error)?
1103
- };
1104
- let source_batch = entity_record_batch(&spec, Arc::clone(&table_schema), &rows)?;
1105
- let matched_batch = filter_entity_batch(source_batch, &filters)?;
1106
- let mut write_rows = entity_existing_lix_state_write_rows_from_batch(
1107
- &spec,
1108
- &matched_batch,
1109
- version_binding.active_version_id(),
1110
- )?;
1111
- for row in &mut write_rows {
1112
- row.snapshot = None;
1113
- }
1114
- let count = u64::try_from(write_rows.len()).map_err(|_| {
1115
- DataFusionError::Execution("entity DELETE row count overflow".to_string())
1116
- })?;
1117
-
1118
- if count > 0 {
1119
- write_ctx
1120
- .stage_write(TransactionWrite::Rows {
1121
- mode: TransactionWriteMode::Replace,
1122
- rows: write_rows,
1123
- })
1124
- .await
1125
- .map_err(lix_error_to_datafusion_error)?;
1126
- }
1127
-
1128
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1129
- dml_count_batch(Arc::clone(&stream_schema), count)?,
1130
- )]))
1131
- })
1132
- .try_flatten();
1133
-
1134
- Ok(Box::pin(RecordBatchStreamAdapter::new(
1135
- result_schema,
1136
- stream,
1137
- )))
1138
- }
1139
- }
1140
-
1141
- #[allow(dead_code)]
1142
- struct EntityUpdateExec {
1143
- spec: Arc<EntitySurfaceSpec>,
1144
- write_ctx: SqlWriteContext,
1145
- table_schema: SchemaRef,
1146
- version_binding: VersionBinding,
1147
- request: LiveStateScanRequest,
1148
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
1149
- filters: Vec<Arc<dyn PhysicalExpr>>,
1150
- result_schema: SchemaRef,
1151
- properties: Arc<PlanProperties>,
1152
- }
1153
-
1154
- impl std::fmt::Debug for EntityUpdateExec {
1155
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1156
- f.debug_struct("EntityUpdateExec")
1157
- .field("schema_key", &self.spec.schema_key)
1158
- .finish()
1159
- }
1160
- }
1161
-
1162
- impl EntityUpdateExec {
1163
- fn new(
1164
- spec: Arc<EntitySurfaceSpec>,
1165
- write_ctx: SqlWriteContext,
1166
- table_schema: SchemaRef,
1167
- version_binding: VersionBinding,
1168
- request: LiveStateScanRequest,
1169
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
1170
- filters: Vec<Arc<dyn PhysicalExpr>>,
1171
- ) -> Self {
1172
- let result_schema = dml_count_schema();
1173
- let properties = PlanProperties::new(
1174
- EquivalenceProperties::new(Arc::clone(&result_schema)),
1175
- Partitioning::UnknownPartitioning(1),
1176
- EmissionType::Final,
1177
- Boundedness::Bounded,
1178
- );
1179
- Self {
1180
- spec,
1181
- write_ctx,
1182
- table_schema,
1183
- version_binding,
1184
- request,
1185
- assignments,
1186
- filters,
1187
- result_schema,
1188
- properties: Arc::new(properties),
1189
- }
1190
- }
1191
- }
1192
-
1193
- impl DisplayAs for EntityUpdateExec {
1194
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1195
- match t {
1196
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
1197
- write!(
1198
- f,
1199
- "EntityUpdateExec(schema_key={}, assignments={})",
1200
- self.spec.schema_key,
1201
- self.assignments.len()
1202
- )
1203
- }
1204
- DisplayFormatType::TreeRender => write!(f, "EntityUpdateExec"),
1205
- }
1206
- }
1207
- }
1208
-
1209
- impl ExecutionPlan for EntityUpdateExec {
1210
- fn name(&self) -> &str {
1211
- "EntityUpdateExec"
1212
- }
1213
-
1214
- fn as_any(&self) -> &dyn Any {
1215
- self
1216
- }
1217
-
1218
- fn properties(&self) -> &Arc<PlanProperties> {
1219
- &self.properties
1220
- }
1221
-
1222
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1223
- Vec::new()
1224
- }
1225
-
1226
- fn with_new_children(
1227
- self: Arc<Self>,
1228
- children: Vec<Arc<dyn ExecutionPlan>>,
1229
- ) -> Result<Arc<dyn ExecutionPlan>> {
1230
- if !children.is_empty() {
1231
- return Err(DataFusionError::Execution(
1232
- "EntityUpdateExec does not accept children".to_string(),
1233
- ));
1234
- }
1235
- Ok(self)
1236
- }
1237
-
1238
- fn execute(
1239
- &self,
1240
- partition: usize,
1241
- _context: Arc<TaskContext>,
1242
- ) -> Result<SendableRecordBatchStream> {
1243
- if partition != 0 {
1244
- return Err(DataFusionError::Execution(format!(
1245
- "EntityUpdateExec only exposes one partition, got {partition}"
1246
- )));
1247
- }
1248
-
1249
- let spec = Arc::clone(&self.spec);
1250
- let write_ctx = self.write_ctx.clone();
1251
- let table_schema = Arc::clone(&self.table_schema);
1252
- let version_binding = self.version_binding.clone();
1253
- let request = self.request.clone();
1254
- let assignments = self.assignments.clone();
1255
- let filters = self.filters.clone();
1256
- let result_schema = Arc::clone(&self.result_schema);
1257
- let stream_schema = Arc::clone(&result_schema);
1258
-
1259
- let stream = stream::once(async move {
1260
- let rows = if request.limit == Some(0) {
1261
- Vec::new()
1262
- } else {
1263
- write_ctx
1264
- .scan_live_state(&request)
1265
- .await
1266
- .map_err(lix_error_to_datafusion_error)?
1267
- };
1268
- let source_batch = entity_record_batch(&spec, Arc::clone(&table_schema), &rows)?;
1269
- let matched_batch = filter_entity_batch(source_batch, &filters)?;
1270
- let write_rows = entity_update_write_rows_from_batch(
1271
- &spec,
1272
- &matched_batch,
1273
- &assignments,
1274
- version_binding.active_version_id(),
1275
- )?;
1276
- let count = u64::try_from(write_rows.len()).map_err(|_| {
1277
- DataFusionError::Execution("entity UPDATE row count overflow".to_string())
1278
- })?;
1279
-
1280
- if count > 0 {
1281
- write_ctx
1282
- .stage_write(TransactionWrite::Rows {
1283
- mode: TransactionWriteMode::Replace,
1284
- rows: write_rows,
1285
- })
1286
- .await
1287
- .map_err(lix_error_to_datafusion_error)?;
1288
- }
1289
-
1290
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1291
- dml_count_batch(Arc::clone(&stream_schema), count)?,
1292
- )]))
1293
- })
1294
- .try_flatten();
1295
-
1296
- Ok(Box::pin(RecordBatchStreamAdapter::new(
1297
- result_schema,
1298
- stream,
1299
- )))
1300
- }
1301
- }
1302
-
1303
- fn validate_entity_update_assignments(
1304
- spec: &EntitySurfaceSpec,
1305
- schema: &SchemaRef,
1306
- assignments: &[(String, Expr)],
1307
- ) -> Result<()> {
1308
- for (column_name, _) in assignments {
1309
- schema.field_with_name(column_name).map_err(|_| {
1310
- DataFusionError::Plan(format!(
1311
- "UPDATE entity surface '{}' failed: column '{column_name}' does not exist",
1312
- spec.schema_key
1313
- ))
1314
- })?;
1315
- if !spec.is_visible_column(column_name) && column_name != "lixcol_metadata" {
1316
- return Err(DataFusionError::Execution(format!(
1317
- "UPDATE entity surface '{}' cannot stage read-only column '{column_name}'",
1318
- spec.schema_key
1319
- )));
1320
- }
1321
- }
1322
- Ok(())
1323
- }
1324
-
1325
- fn filter_entity_batch(
1326
- batch: RecordBatch,
1327
- filters: &[Arc<dyn PhysicalExpr>],
1328
- ) -> Result<RecordBatch> {
1329
- let Some(mask) = evaluate_entity_filters(&batch, filters)? else {
1330
- return Ok(batch);
1331
- };
1332
- Ok(filter_record_batch(&batch, &mask)?)
1333
- }
1334
-
1335
- fn evaluate_entity_filters(
1336
- batch: &RecordBatch,
1337
- filters: &[Arc<dyn PhysicalExpr>],
1338
- ) -> Result<Option<BooleanArray>> {
1339
- if filters.is_empty() {
1340
- return Ok(None);
1341
- }
1342
-
1343
- let mut combined_mask: Option<BooleanArray> = None;
1344
- for filter in filters {
1345
- let result = filter.evaluate(batch)?;
1346
- let array = result.into_array(batch.num_rows())?;
1347
- let bool_array = array
1348
- .as_any()
1349
- .downcast_ref::<BooleanArray>()
1350
- .ok_or_else(|| {
1351
- DataFusionError::Execution("entity surface filter was not boolean".to_string())
1352
- })?;
1353
- let normalized = bool_array
1354
- .iter()
1355
- .map(|value| Some(value == Some(true)))
1356
- .collect::<BooleanArray>();
1357
- combined_mask = Some(match combined_mask {
1358
- Some(existing) => and(&existing, &normalized)?,
1359
- None => normalized,
1360
- });
1361
- }
1362
- Ok(combined_mask)
1363
- }
1364
-
1365
- fn entity_update_write_rows_from_batch(
1366
- spec: &EntitySurfaceSpec,
1367
- batch: &RecordBatch,
1368
- assignments: &[(String, Arc<dyn PhysicalExpr>)],
1369
- version_binding: Option<&str>,
1370
- ) -> Result<Vec<TransactionWriteRow>> {
1371
- let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
1372
- (0..batch.num_rows())
1373
- .map(|row_index| {
1374
- let scope = resolve_write_version_scope(
1375
- optional_bool_value(batch, row_index, "lixcol_global")?,
1376
- optional_string_value(batch, row_index, "lixcol_version_id")?,
1377
- version_binding,
1378
- &format!("UPDATE into {}_by_version", spec.schema_key),
1379
- &spec.schema_key,
1380
- )?;
1381
-
1382
- Ok(TransactionWriteRow {
1383
- entity_id: optional_string_value(batch, row_index, "lixcol_entity_id")?
1384
- .map(|entity_id| {
1385
- EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
1386
- DataFusionError::Execution(format!(
1387
- "UPDATE entity surface '{}' has invalid lixcol_entity_id: {error}",
1388
- spec.schema_key
1389
- ))
1390
- })
1391
- })
1392
- .transpose()?,
1393
- schema_key: spec.schema_key.clone(),
1394
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1395
- snapshot: Some(
1396
- TransactionJson::from_value(
1397
- entity_update_snapshot_content_from_batch(
1398
- spec,
1399
- batch,
1400
- &assignment_values,
1401
- row_index,
1402
- )?,
1403
- &format!("{} update snapshot_content", spec.schema_key),
1404
- )
1405
- .map_err(super::error::lix_error_to_datafusion_error)?,
1406
- ),
1407
- metadata: entity_update_optional_metadata_value(
1408
- batch,
1409
- &assignment_values,
1410
- row_index,
1411
- "lixcol_metadata",
1412
- &spec.schema_key,
1413
- )?,
1414
- origin: None,
1415
- created_at: None,
1416
- updated_at: None,
1417
- global: scope.global,
1418
- change_id: None,
1419
- commit_id: None,
1420
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?
1421
- .unwrap_or(false),
1422
- version_id: scope.version_id,
1423
- })
1424
- })
1425
- .collect()
1426
- }
1427
-
1428
- fn entity_update_snapshot_content_from_batch(
1429
- spec: &EntitySurfaceSpec,
1430
- batch: &RecordBatch,
1431
- assignment_values: &UpdateAssignmentValues,
1432
- row_index: usize,
1433
- ) -> Result<JsonValue> {
1434
- let snapshot_content = optional_string_value(batch, row_index, "lixcol_snapshot_content")?
1435
- .ok_or_else(|| {
1436
- DataFusionError::Execution(format!(
1437
- "UPDATE entity surface '{}' requires existing lixcol_snapshot_content",
1438
- spec.schema_key
1439
- ))
1440
- })?;
1441
- let mut object = match serde_json::from_str::<JsonValue>(&snapshot_content).map_err(|error| {
1442
- DataFusionError::Execution(format!(
1443
- "UPDATE entity surface '{}' expected existing snapshot_content to be valid JSON: {error}",
1444
- spec.schema_key
1445
- ))
1446
- })? {
1447
- JsonValue::Object(object) => object,
1448
- other => {
1449
- return Err(DataFusionError::Execution(format!(
1450
- "UPDATE entity surface '{}' expected existing snapshot_content to be a JSON object, got {other}",
1451
- spec.schema_key
1452
- )))
1453
- }
1454
- };
1455
-
1456
- for column in &spec.columns {
1457
- let value = match entity_update_json_value(
1458
- assignment_values,
1459
- row_index,
1460
- &column.name,
1461
- column.column_type,
1462
- )? {
1463
- Some(value) => value,
1464
- None => continue,
1465
- };
1466
- object.insert(column.name.clone(), value);
1467
- }
1468
- Ok(JsonValue::Object(object))
1469
- }
1470
-
1471
- fn entity_update_optional_string_value(
1472
- batch: &RecordBatch,
1473
- assignment_values: &UpdateAssignmentValues,
1474
- row_index: usize,
1475
- column_name: &str,
1476
- ) -> Result<Option<String>> {
1477
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1478
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1479
- InsertCell::Provided(SqlCell::Value(
1480
- ScalarValue::Utf8(Some(value))
1481
- | ScalarValue::Utf8View(Some(value))
1482
- | ScalarValue::LargeUtf8(Some(value)),
1483
- )) => Ok(Some(value)),
1484
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1485
- "UPDATE entity surface expected text-compatible column '{column_name}', got {other:?}"
1486
- ))),
1487
- }
1488
- }
1489
-
1490
- fn entity_update_optional_metadata_value(
1491
- batch: &RecordBatch,
1492
- assignment_values: &UpdateAssignmentValues,
1493
- row_index: usize,
1494
- column_name: &str,
1495
- context: &str,
1496
- ) -> Result<Option<TransactionJson>> {
1497
- entity_update_optional_string_value(batch, assignment_values, row_index, column_name)?
1498
- .map(|value| {
1499
- let metadata = parse_row_metadata_value(&value, context)
1500
- .map_err(super::error::lix_error_to_datafusion_error)?;
1501
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1502
- .map_err(super::error::lix_error_to_datafusion_error)
1503
- })
1504
- .transpose()
1505
- }
1506
-
1507
- fn entity_update_json_value(
1508
- assignment_values: &UpdateAssignmentValues,
1509
- row_index: usize,
1510
- column_name: &str,
1511
- column_type: EntityColumnType,
1512
- ) -> Result<Option<JsonValue>> {
1513
- match assignment_values.assigned_cell(row_index, column_name)? {
1514
- UpdateCell::Unassigned => Ok(None),
1515
- UpdateCell::Assigned(SqlCell::Null) => Ok(Some(JsonValue::Null)),
1516
- UpdateCell::Assigned(SqlCell::Value(value)) => {
1517
- entity_json_value_from_scalar(Some(value), column_type).map(Some)
1518
- }
1519
- }
1520
- }
1521
-
1522
- fn dml_count_schema() -> SchemaRef {
1523
- Arc::new(Schema::new(vec![Field::new(
1524
- "count",
1525
- DataType::UInt64,
1526
- false,
1527
- )]))
1528
- }
1529
-
1530
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1531
- RecordBatch::try_new(
1532
- schema,
1533
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1534
- )
1535
- .map_err(DataFusionError::from)
1536
- }
1537
-
1538
- fn entity_lix_state_write_rows_from_batch(
1539
- spec: &EntitySurfaceSpec,
1540
- batch: &RecordBatch,
1541
- insert_column_intents: &InsertColumnIntents,
1542
- version_binding: Option<&str>,
1543
- ) -> Result<Vec<TransactionWriteRow>> {
1544
- entity_lix_state_write_rows_from_batch_with_options(
1545
- spec,
1546
- batch,
1547
- insert_column_intents,
1548
- version_binding,
1549
- true,
1550
- )
1551
- }
1552
-
1553
- fn entity_existing_lix_state_write_rows_from_batch(
1554
- spec: &EntitySurfaceSpec,
1555
- batch: &RecordBatch,
1556
- version_binding: Option<&str>,
1557
- ) -> Result<Vec<TransactionWriteRow>> {
1558
- entity_lix_state_write_rows_from_batch_with_options(
1559
- spec,
1560
- batch,
1561
- &InsertColumnIntents::all_explicit(),
1562
- version_binding,
1563
- false,
1564
- )
1565
- }
1566
-
1567
- fn entity_lix_state_write_rows_from_batch_with_options(
1568
- spec: &EntitySurfaceSpec,
1569
- batch: &RecordBatch,
1570
- insert_column_intents: &InsertColumnIntents,
1571
- version_binding: Option<&str>,
1572
- reject_read_only_fields: bool,
1573
- ) -> Result<Vec<TransactionWriteRow>> {
1574
- (0..batch.num_rows())
1575
- .map(|row_index| {
1576
- let scope = resolve_write_version_scope(
1577
- optional_bool_value(batch, row_index, "lixcol_global")?,
1578
- optional_string_value(batch, row_index, "lixcol_version_id")?,
1579
- version_binding,
1580
- &format!(
1581
- "INSERT into {}_by_version",
1582
- spec.schema_key
1583
- ),
1584
- &spec.schema_key,
1585
- )?;
1586
-
1587
- if let Some(schema_key) = optional_string_value(batch, row_index, "lixcol_schema_key")?
1588
- {
1589
- if schema_key != spec.schema_key {
1590
- return Err(DataFusionError::Execution(format!(
1591
- "INSERT into entity surface '{}' cannot set lixcol_schema_key to '{}'",
1592
- spec.schema_key, schema_key
1593
- )));
1594
- }
1595
- }
1596
-
1597
- if reject_read_only_fields {
1598
- reject_present_entity_insert_field(batch, row_index, "lixcol_snapshot_content")?;
1599
- reject_present_entity_insert_field(batch, row_index, "lixcol_created_at")?;
1600
- reject_present_entity_insert_field(batch, row_index, "lixcol_updated_at")?;
1601
- reject_present_entity_insert_field(batch, row_index, "lixcol_change_id")?;
1602
- reject_present_entity_insert_field(batch, row_index, "lixcol_commit_id")?;
1603
- }
1604
-
1605
- let snapshot_content =
1606
- entity_snapshot_content_from_batch(spec, batch, insert_column_intents, row_index)?;
1607
- let explicit_entity_id = optional_string_value(batch, row_index, "lixcol_entity_id")?;
1608
- let entity_id = if spec.primary_key_paths.is_empty() {
1609
- let entity_id = explicit_entity_id.ok_or_else(|| {
1610
- DataFusionError::Execution(format!(
1611
- "INSERT into entity surface '{}' requires lixcol_entity_id because the schema has no x-lix-primary-key",
1612
- spec.schema_key
1613
- ))
1614
- })?;
1615
- Some(EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
1616
- DataFusionError::Execution(format!(
1617
- "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1618
- spec.schema_key
1619
- ))
1620
- })?)
1621
- } else {
1622
- explicit_entity_id
1623
- .map(|entity_id| {
1624
- EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
1625
- DataFusionError::Execution(format!(
1626
- "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1627
- spec.schema_key
1628
- ))
1629
- })
1630
- })
1631
- .transpose()?
1632
- };
1633
-
1634
- Ok(TransactionWriteRow {
1635
- entity_id,
1636
- schema_key: spec.schema_key.clone(),
1637
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1638
- snapshot: Some(TransactionJson::from_value(
1639
- snapshot_content,
1640
- &format!("{} insert snapshot_content", spec.schema_key),
1641
- )
1642
- .map_err(super::error::lix_error_to_datafusion_error)?),
1643
- metadata: optional_metadata_value(
1644
- batch,
1645
- row_index,
1646
- "lixcol_metadata",
1647
- &spec.schema_key,
1648
- )?,
1649
- origin: None,
1650
- created_at: None,
1651
- updated_at: None,
1652
- global: scope.global,
1653
- change_id: None,
1654
- commit_id: None,
1655
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?
1656
- .unwrap_or(false),
1657
- version_id: scope.version_id,
1658
- })
1659
- })
1660
- .collect()
1661
- }
1662
-
1663
- fn entity_snapshot_content_from_batch(
1664
- spec: &EntitySurfaceSpec,
1665
- batch: &RecordBatch,
1666
- insert_column_intents: &InsertColumnIntents,
1667
- row_index: usize,
1668
- ) -> Result<JsonValue> {
1669
- let mut object = serde_json::Map::new();
1670
- for column in &spec.columns {
1671
- let value = match insert_column_intents.cell(batch, row_index, &column.name)? {
1672
- InsertCell::Omitted => {
1673
- continue;
1674
- }
1675
- InsertCell::Provided(SqlCell::Null) => JsonValue::Null,
1676
- InsertCell::Provided(SqlCell::Value(value)) => {
1677
- entity_json_value_from_scalar(Some(value), column.column_type)?
1678
- }
1679
- };
1680
- object.insert(column.name.clone(), value);
1681
- }
1682
- Ok(JsonValue::Object(object))
1683
- }
1684
-
1685
- fn entity_json_value_from_scalar(
1686
- value: Option<ScalarValue>,
1687
- column_type: EntityColumnType,
1688
- ) -> Result<JsonValue> {
1689
- let Some(value) = value else {
1690
- return Ok(JsonValue::Null);
1691
- };
1692
- match value {
1693
- ScalarValue::Null
1694
- | ScalarValue::Utf8(None)
1695
- | ScalarValue::Utf8View(None)
1696
- | ScalarValue::LargeUtf8(None)
1697
- | ScalarValue::Boolean(None)
1698
- | ScalarValue::Int64(None)
1699
- | ScalarValue::Int32(None)
1700
- | ScalarValue::UInt64(None)
1701
- | ScalarValue::UInt32(None)
1702
- | ScalarValue::Float64(None)
1703
- | ScalarValue::Float32(None) => Ok(JsonValue::Null),
1704
- ScalarValue::Utf8(Some(value))
1705
- | ScalarValue::Utf8View(Some(value))
1706
- | ScalarValue::LargeUtf8(Some(value)) => match column_type {
1707
- EntityColumnType::Json => {
1708
- // JSON surface columns accept SQL strings as JSON string values,
1709
- // while still allowing callers to pass serialized JSON text for
1710
- // objects, arrays, numbers, booleans, and null.
1711
- Ok(serde_json::from_str(&value).unwrap_or(JsonValue::String(value)))
1712
- }
1713
- EntityColumnType::Integer => {
1714
- value.parse::<i64>().map(JsonValue::from).map_err(|error| {
1715
- DataFusionError::Execution(format!(
1716
- "entity integer column expected integer text, got error: {error}"
1717
- ))
1718
- })
1719
- }
1720
- EntityColumnType::Number => value
1721
- .parse::<f64>()
1722
- .map_err(|error| {
1723
- DataFusionError::Execution(format!(
1724
- "entity number column expected number text, got error: {error}"
1725
- ))
1726
- })
1727
- .and_then(json_number_from_f64),
1728
- EntityColumnType::Boolean => {
1729
- value.parse::<bool>().map(JsonValue::from).map_err(|error| {
1730
- DataFusionError::Execution(format!(
1731
- "entity boolean column expected boolean text, got error: {error}"
1732
- ))
1733
- })
1734
- }
1735
- EntityColumnType::String => Ok(JsonValue::String(value)),
1736
- },
1737
- ScalarValue::Boolean(Some(value)) => Ok(JsonValue::Bool(value)),
1738
- ScalarValue::Int64(Some(value)) => Ok(JsonValue::from(value)),
1739
- ScalarValue::Int32(Some(value)) => Ok(JsonValue::from(value)),
1740
- ScalarValue::UInt64(Some(value)) => Ok(JsonValue::from(value)),
1741
- ScalarValue::UInt32(Some(value)) => Ok(JsonValue::from(value)),
1742
- ScalarValue::Float64(Some(value)) => json_number_from_f64(value),
1743
- ScalarValue::Float32(Some(value)) => json_number_from_f64(value as f64),
1744
- ScalarValue::Binary(Some(_))
1745
- | ScalarValue::LargeBinary(Some(_))
1746
- | ScalarValue::FixedSizeBinary(_, Some(_)) => Err(lix_error_to_datafusion_error(
1747
- LixError::new(
1748
- LixError::CODE_TYPE_MISMATCH,
1749
- "entity JSON columns cannot store blob values directly",
1750
- )
1751
- .with_hint(
1752
- "Encode bytes explicitly as JSON text/object, or store raw bytes in a blob-native surface such as lix_file.data.",
1753
- ),
1754
- )),
1755
- ScalarValue::Binary(None)
1756
- | ScalarValue::LargeBinary(None)
1757
- | ScalarValue::FixedSizeBinary(_, None) => Ok(JsonValue::Null),
1758
- other => Err(DataFusionError::Execution(format!(
1759
- "entity insert does not support scalar value {other:?}"
1760
- ))),
1761
- }
1762
- }
1763
-
1764
- fn json_number_from_f64(value: f64) -> Result<JsonValue> {
1765
- serde_json::Number::from_f64(value)
1766
- .map(JsonValue::Number)
1767
- .ok_or_else(|| {
1768
- DataFusionError::Execution(format!("entity number column cannot store {value}"))
1769
- })
1770
- }
1771
-
1772
- fn reject_present_entity_insert_field(
1773
- batch: &RecordBatch,
1774
- row_index: usize,
1775
- column_name: &str,
1776
- ) -> Result<()> {
1777
- if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
1778
- return Err(DataFusionError::Execution(format!(
1779
- "INSERT into entity surface cannot stage read-only column '{column_name}'"
1780
- )));
1781
- }
1782
- Ok(())
1783
- }
1784
-
1785
- fn optional_string_value(
1786
- batch: &RecordBatch,
1787
- row_index: usize,
1788
- column_name: &str,
1789
- ) -> Result<Option<String>> {
1790
- match optional_scalar_value(batch, row_index, column_name)? {
1791
- None
1792
- | Some(ScalarValue::Null)
1793
- | Some(ScalarValue::Utf8(None))
1794
- | Some(ScalarValue::Utf8View(None))
1795
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1796
- Some(ScalarValue::Utf8(Some(value)))
1797
- | Some(ScalarValue::Utf8View(Some(value)))
1798
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1799
- Some(other) => Err(DataFusionError::Execution(format!(
1800
- "INSERT into entity surface expected text-compatible column '{column_name}', got {other:?}"
1801
- ))),
1802
- }
1803
- }
1804
-
1805
- fn optional_metadata_value(
1806
- batch: &RecordBatch,
1807
- row_index: usize,
1808
- column_name: &str,
1809
- context: &str,
1810
- ) -> Result<Option<TransactionJson>> {
1811
- optional_string_value(batch, row_index, column_name)?
1812
- .map(|value| {
1813
- let metadata = parse_row_metadata_value(&value, context)
1814
- .map_err(super::error::lix_error_to_datafusion_error)?;
1815
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1816
- .map_err(super::error::lix_error_to_datafusion_error)
1817
- })
1818
- .transpose()
1819
- }
1820
-
1821
- fn optional_bool_value(
1822
- batch: &RecordBatch,
1823
- row_index: usize,
1824
- column_name: &str,
1825
- ) -> Result<Option<bool>> {
1826
- match optional_scalar_value(batch, row_index, column_name)? {
1827
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1828
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1829
- Some(other) => Err(DataFusionError::Execution(format!(
1830
- "INSERT into entity surface expected boolean column '{column_name}', got {other:?}"
1831
- ))),
1832
- }
1833
- }
1834
-
1835
- fn optional_scalar_value(
1836
- batch: &RecordBatch,
1837
- row_index: usize,
1838
- column_name: &str,
1839
- ) -> Result<Option<ScalarValue>> {
1840
- let schema = batch.schema();
1841
- let column_index = match schema.index_of(column_name) {
1842
- Ok(column_index) => column_index,
1843
- Err(_) => return Ok(None),
1844
- };
1845
- if row_index >= batch.num_rows() {
1846
- return Err(DataFusionError::Execution(format!(
1847
- "row index {row_index} out of bounds for entity batch with {} rows",
1848
- batch.num_rows()
1849
- )));
1850
- }
1851
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1852
- .map(Some)
1853
- .map_err(|error| {
1854
- DataFusionError::Execution(format!(
1855
- "failed to decode entity column '{column_name}' at row {row_index}: {error}"
1856
- ))
1857
- })
1858
- }
1859
-
1860
- struct EntityScanExec {
1861
- spec: Arc<EntitySurfaceSpec>,
1862
- live_state: Arc<dyn LiveStateReader>,
1863
- schema: SchemaRef,
1864
- request: LiveStateScanRequest,
1865
- properties: Arc<PlanProperties>,
1866
- }
1867
-
1868
- impl std::fmt::Debug for EntityScanExec {
1869
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1870
- f.debug_struct("EntityScanExec")
1871
- .field("schema_key", &self.spec.schema_key)
1872
- .finish()
1873
- }
1874
- }
1875
-
1876
- impl EntityScanExec {
1877
- fn new(
1878
- spec: Arc<EntitySurfaceSpec>,
1879
- live_state: Arc<dyn LiveStateReader>,
1880
- schema: SchemaRef,
1881
- request: LiveStateScanRequest,
1882
- ) -> Self {
1883
- let properties = PlanProperties::new(
1884
- EquivalenceProperties::new(Arc::clone(&schema)),
1885
- Partitioning::UnknownPartitioning(1),
1886
- EmissionType::Incremental,
1887
- Boundedness::Bounded,
1888
- );
1889
- Self {
1890
- spec,
1891
- live_state,
1892
- schema,
1893
- request,
1894
- properties: Arc::new(properties),
1895
- }
1896
- }
1897
- }
1898
-
1899
- impl DisplayAs for EntityScanExec {
1900
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1901
- match t {
1902
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
1903
- write!(
1904
- f,
1905
- "EntityScanExec(schema_key={}, limit={:?})",
1906
- self.spec.schema_key, self.request.limit
1907
- )
1908
- }
1909
- DisplayFormatType::TreeRender => write!(f, "EntityScanExec"),
1910
- }
1911
- }
1912
- }
1913
-
1914
- impl ExecutionPlan for EntityScanExec {
1915
- fn name(&self) -> &str {
1916
- "EntityScanExec"
1917
- }
1918
-
1919
- fn as_any(&self) -> &dyn Any {
1920
- self
1921
- }
1922
-
1923
- fn properties(&self) -> &Arc<PlanProperties> {
1924
- &self.properties
1925
- }
1926
-
1927
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1928
- Vec::new()
1929
- }
1930
-
1931
- fn with_new_children(
1932
- self: Arc<Self>,
1933
- children: Vec<Arc<dyn ExecutionPlan>>,
1934
- ) -> Result<Arc<dyn ExecutionPlan>> {
1935
- if !children.is_empty() {
1936
- return Err(DataFusionError::Execution(
1937
- "EntityScanExec does not accept children".to_string(),
1938
- ));
1939
- }
1940
- Ok(self)
1941
- }
1942
-
1943
- fn execute(
1944
- &self,
1945
- partition: usize,
1946
- _context: Arc<TaskContext>,
1947
- ) -> Result<SendableRecordBatchStream> {
1948
- if partition != 0 {
1949
- return Err(DataFusionError::Execution(format!(
1950
- "EntityScanExec only exposes one partition, got {partition}"
1951
- )));
1952
- }
1953
-
1954
- let spec = Arc::clone(&self.spec);
1955
- let live_state = Arc::clone(&self.live_state);
1956
- let schema = Arc::clone(&self.schema);
1957
- let request = self.request.clone();
1958
- let stream_schema = Arc::clone(&schema);
1959
- let stream = stream::once(async move {
1960
- let rows = if request.limit == Some(0) {
1961
- Vec::new()
1962
- } else {
1963
- live_state
1964
- .scan_rows(&request)
1965
- .await
1966
- .map_err(lix_error_to_datafusion_error)?
1967
- };
1968
- let batch = entity_record_batch(&spec, Arc::clone(&stream_schema), &rows)?;
1969
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1970
- batch,
1971
- )]))
1972
- })
1973
- .try_flatten();
1974
-
1975
- Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1976
- }
1977
- }
1978
-
1979
- fn entity_live_state_scan_request(
1980
- schema_key: &str,
1981
- active_version_id: Option<&str>,
1982
- projected_schema: Option<&Schema>,
1983
- limit: Option<usize>,
1984
- ) -> LiveStateScanRequest {
1985
- LiveStateScanRequest {
1986
- filter: LiveStateFilter {
1987
- schema_keys: vec![schema_key.to_string()],
1988
- version_ids: active_version_id
1989
- .map(|version_id| vec![version_id.to_string()])
1990
- .unwrap_or_default(),
1991
- ..LiveStateFilter::default()
1992
- },
1993
- projection: entity_live_state_projection(projected_schema),
1994
- limit,
1995
- }
1996
- }
1997
-
1998
- fn entity_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1999
- let Some(schema) = projected_schema else {
2000
- return LiveStateProjection::default();
2001
- };
2002
- let mut columns = projection_column_names(schema);
2003
- if schema
2004
- .fields()
2005
- .iter()
2006
- .any(|field| !field.name().starts_with("lixcol_"))
2007
- && !columns.iter().any(|column| column == "snapshot_content")
2008
- {
2009
- columns.push("snapshot_content".to_string());
2010
- }
2011
- LiveStateProjection { columns }
2012
- }
2013
-
2014
- fn projection_column_names(schema: &Schema) -> Vec<String> {
2015
- schema
2016
- .fields()
2017
- .iter()
2018
- .filter_map(|field| field.name().strip_prefix("lixcol_"))
2019
- .map(str::to_string)
2020
- .collect()
2021
- }
2022
-
2023
- fn entity_record_batch(
2024
- spec: &EntitySurfaceSpec,
2025
- schema: SchemaRef,
2026
- rows: &[MaterializedLiveStateRow],
2027
- ) -> Result<RecordBatch> {
2028
- if schema.fields().is_empty() {
2029
- let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
2030
- return RecordBatch::try_new_with_options(schema, vec![], &options)
2031
- .map_err(DataFusionError::from);
2032
- }
2033
-
2034
- let snapshots = rows
2035
- .iter()
2036
- .map(|row| parse_snapshot(row.snapshot_content.as_deref()))
2037
- .collect::<Result<Vec<_>>>()?;
2038
-
2039
- let columns = schema
2040
- .fields()
2041
- .iter()
2042
- .map(|field| entity_column_array(spec, field.name(), rows, &snapshots))
2043
- .collect::<Result<Vec<_>>>()?;
2044
-
2045
- RecordBatch::try_new(schema, columns).map_err(DataFusionError::from)
2046
- }
2047
-
2048
- fn entity_column_array(
2049
- spec: &EntitySurfaceSpec,
2050
- column_name: &str,
2051
- rows: &[MaterializedLiveStateRow],
2052
- snapshots: &[Option<JsonValue>],
2053
- ) -> Result<ArrayRef> {
2054
- if let Some(property_name) = column_name.strip_prefix("lixcol_") {
2055
- return entity_system_column_array(property_name, rows);
2056
- }
2057
-
2058
- let column_type = spec
2059
- .visible_column(column_name)
2060
- .ok_or_else(|| {
2061
- DataFusionError::Execution(format!(
2062
- "sql2 entity provider '{}' does not expose column '{}'",
2063
- spec.schema_key, column_name
2064
- ))
2065
- })?
2066
- .column_type;
2067
-
2068
- let values = snapshots
2069
- .iter()
2070
- .map(|snapshot| snapshot.as_ref().and_then(|value| value.get(column_name)))
2071
- .collect::<Vec<_>>();
2072
- Ok(match column_type {
2073
- EntityColumnType::String | EntityColumnType::Json => Arc::new(StringArray::from(
2074
- values
2075
- .iter()
2076
- .map(|value| entity_json_text_value(*value, column_type))
2077
- .collect::<Result<Vec<_>>>()?,
2078
- )) as ArrayRef,
2079
- EntityColumnType::Integer => Arc::new(Int64Array::from(
2080
- values
2081
- .iter()
2082
- .map(|value| entity_i64_value(*value))
2083
- .collect::<Vec<_>>(),
2084
- )) as ArrayRef,
2085
- EntityColumnType::Number => Arc::new(Float64Array::from(
2086
- values
2087
- .iter()
2088
- .map(|value| entity_f64_value(*value))
2089
- .collect::<Vec<_>>(),
2090
- )) as ArrayRef,
2091
- EntityColumnType::Boolean => Arc::new(BooleanArray::from(
2092
- values
2093
- .iter()
2094
- .map(|value| value.and_then(JsonValue::as_bool))
2095
- .collect::<Vec<_>>(),
2096
- )) as ArrayRef,
2097
- })
2098
- }
2099
-
2100
- fn entity_system_column_array(
2101
- column_name: &str,
2102
- rows: &[MaterializedLiveStateRow],
2103
- ) -> Result<ArrayRef> {
2104
- Ok(match column_name {
2105
- "entity_id" => Arc::new(StringArray::from(
2106
- rows.iter()
2107
- .map(|row| {
2108
- row.entity_id
2109
- .as_json_array_text()
2110
- .map(Some)
2111
- .map_err(lix_error_to_datafusion_error)
2112
- })
2113
- .collect::<Result<Vec<_>>>()?,
2114
- )) as ArrayRef,
2115
- "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
2116
- "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
2117
- "snapshot_content" => string_array(rows.iter().map(|row| row.snapshot_content.as_deref())),
2118
- "metadata" => Arc::new(StringArray::from(
2119
- rows.iter()
2120
- .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
2121
- .collect::<Vec<_>>(),
2122
- )) as ArrayRef,
2123
- "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
2124
- "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
2125
- "global" => Arc::new(BooleanArray::from(
2126
- rows.iter().map(|row| row.global).collect::<Vec<_>>(),
2127
- )) as ArrayRef,
2128
- "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
2129
- "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
2130
- "untracked" => Arc::new(BooleanArray::from(
2131
- rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
2132
- )) as ArrayRef,
2133
- "version_id" => string_array(rows.iter().map(|row| Some(row.version_id.as_str()))),
2134
- other => {
2135
- return Err(DataFusionError::Execution(format!(
2136
- "sql2 entity provider does not support system column 'lixcol_{other}'"
2137
- )))
2138
- }
2139
- })
2140
- }
2141
-
2142
- pub(super) fn parse_snapshot(snapshot_content: Option<&str>) -> Result<Option<JsonValue>> {
2143
- snapshot_content
2144
- .map(|snapshot| {
2145
- serde_json::from_str::<JsonValue>(snapshot).map_err(|error| {
2146
- DataFusionError::Execution(format!(
2147
- "sql2 entity provider expected valid snapshot_content JSON: {error}"
2148
- ))
2149
- })
2150
- })
2151
- .transpose()
2152
- }
2153
-
2154
- pub(super) fn entity_json_text_value(
2155
- value: Option<&JsonValue>,
2156
- column_type: EntityColumnType,
2157
- ) -> Result<Option<String>> {
2158
- Ok(match (column_type, value) {
2159
- (_, None) | (_, Some(JsonValue::Null)) => None,
2160
- (EntityColumnType::String, Some(JsonValue::Bool(value))) => Some(if *value {
2161
- "true".to_string()
2162
- } else {
2163
- "false".to_string()
2164
- }),
2165
- (EntityColumnType::String, Some(JsonValue::String(value))) => Some(value.clone()),
2166
- (EntityColumnType::String, Some(other)) => Some(json_to_string(other)?),
2167
- (EntityColumnType::Json, Some(other)) => Some(json_to_string(other)?),
2168
- _ => None,
2169
- })
2170
- }
2171
-
2172
- pub(super) fn entity_i64_value(value: Option<&JsonValue>) -> Option<i64> {
2173
- match value {
2174
- Some(JsonValue::Number(number)) => number.as_i64(),
2175
- Some(JsonValue::String(value)) => value.parse::<i64>().ok(),
2176
- _ => None,
2177
- }
2178
- }
2179
-
2180
- pub(super) fn entity_f64_value(value: Option<&JsonValue>) -> Option<f64> {
2181
- match value {
2182
- Some(JsonValue::Number(number)) => number.as_f64(),
2183
- Some(JsonValue::String(value)) => value.parse::<f64>().ok(),
2184
- _ => None,
2185
- }
2186
- }
2187
-
2188
- fn json_to_string(value: &JsonValue) -> Result<String> {
2189
- serde_json::to_string(value).map_err(|error| {
2190
- DataFusionError::Execution(format!("failed to render JSON value: {error}"))
2191
- })
2192
- }
2193
-
2194
- pub(super) fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
2195
- let values = values
2196
- .map(|value| value.map(ToOwned::to_owned))
2197
- .collect::<Vec<_>>();
2198
- Arc::new(StringArray::from(values)) as ArrayRef
2199
- }
2200
-
2201
- pub(super) fn entity_surface_schema(
2202
- spec: &EntitySurfaceSpec,
2203
- variant: EntityProviderVariant,
2204
- ) -> SchemaRef {
2205
- let mut fields = spec
2206
- .columns
2207
- .iter()
2208
- .map(|column| {
2209
- let field = Field::new(
2210
- &column.name,
2211
- arrow_data_type_for_entity_column_type(column.column_type),
2212
- true,
2213
- );
2214
- if column.column_type == EntityColumnType::Json {
2215
- mark_json_field(field)
2216
- } else {
2217
- field
2218
- }
2219
- })
2220
- .collect::<Vec<_>>();
2221
-
2222
- fields.extend(entity_system_fields(variant));
2223
- Arc::new(Schema::new(fields))
2224
- }
2225
-
2226
- fn arrow_data_type_for_entity_column_type(column_type: EntityColumnType) -> DataType {
2227
- match column_type {
2228
- EntityColumnType::String | EntityColumnType::Json => DataType::Utf8,
2229
- EntityColumnType::Integer => DataType::Int64,
2230
- EntityColumnType::Number => DataType::Float64,
2231
- EntityColumnType::Boolean => DataType::Boolean,
2232
- }
2233
- }
2234
-
2235
- pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field> {
2236
- if variant == EntityProviderVariant::History {
2237
- return vec![
2238
- json_field(HISTORY_COL_ENTITY_ID, false),
2239
- Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
2240
- Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
2241
- json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
2242
- json_field(HISTORY_COL_METADATA, true),
2243
- Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
2244
- Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
2245
- Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
2246
- Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
2247
- Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
2248
- ];
2249
- }
2250
-
2251
- let mut fields = vec![
2252
- json_field("lixcol_entity_id", true),
2253
- Field::new("lixcol_schema_key", DataType::Utf8, false),
2254
- Field::new("lixcol_file_id", DataType::Utf8, true),
2255
- json_field("lixcol_snapshot_content", true),
2256
- json_field("lixcol_metadata", true),
2257
- Field::new("lixcol_created_at", DataType::Utf8, true),
2258
- Field::new("lixcol_updated_at", DataType::Utf8, true),
2259
- Field::new("lixcol_global", DataType::Boolean, true),
2260
- Field::new("lixcol_change_id", DataType::Utf8, true),
2261
- Field::new("lixcol_commit_id", DataType::Utf8, true),
2262
- Field::new("lixcol_untracked", DataType::Boolean, true),
2263
- ];
2264
- if variant == EntityProviderVariant::ByVersion {
2265
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2266
- }
2267
- fields
2268
- }
2269
-
2270
- fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
2271
- let Some(projection) = projection else {
2272
- return Ok(Arc::clone(schema));
2273
- };
2274
- Ok(Arc::new(schema.project(projection)?))
2275
- }
2276
-
2277
- fn derive_entity_surface_spec_from_schema(
2278
- schema: &JsonValue,
2279
- ) -> std::result::Result<EntitySurfaceSpec, LixError> {
2280
- let schema_key = schema
2281
- .get("x-lix-key")
2282
- .and_then(JsonValue::as_str)
2283
- .ok_or_else(|| {
2284
- LixError::new(
2285
- "LIX_ERROR_UNKNOWN",
2286
- "schema is missing string x-lix-key".to_string(),
2287
- )
2288
- })?;
2289
-
2290
- let properties = schema
2291
- .get("properties")
2292
- .and_then(JsonValue::as_object)
2293
- .ok_or_else(|| {
2294
- LixError::new(
2295
- LixError::CODE_SCHEMA_DEFINITION,
2296
- format!("schema '{schema_key}' must define object properties"),
2297
- )
2298
- })?;
2299
-
2300
- let mut columns = properties
2301
- .iter()
2302
- .filter(|(key, _)| !key.starts_with("lixcol_"))
2303
- .map(|(key, property_schema)| {
2304
- let column_type = entity_column_type_from_schema(property_schema).ok_or_else(|| {
2305
- LixError::new(
2306
- LixError::CODE_SCHEMA_DEFINITION,
2307
- format!(
2308
- "schema '{schema_key}' property '/{key}' must declare a SQL-projectable JSON Schema type"
2309
- ),
2310
- )
2311
- .with_hint("Use an explicit type such as string, number, integer, boolean, object, array, or a supported union of those types.")
2312
- })?;
2313
- Ok(EntitySurfaceColumn {
2314
- name: key.clone(),
2315
- column_type,
2316
- })
2317
- })
2318
- .collect::<std::result::Result<Vec<_>, LixError>>()?;
2319
- columns.sort_by(|left, right| left.name.cmp(&right.name));
2320
-
2321
- let primary_key_paths = parse_primary_key_paths(schema)?;
2322
-
2323
- Ok(EntitySurfaceSpec {
2324
- schema_key: schema_key.to_string(),
2325
- primary_key_paths,
2326
- columns,
2327
- })
2328
- }
2329
-
2330
- fn parse_primary_key_paths(schema: &JsonValue) -> std::result::Result<Vec<Vec<String>>, LixError> {
2331
- let Some(primary_key) = schema.get("x-lix-primary-key") else {
2332
- return Ok(Vec::new());
2333
- };
2334
- let primary_key = primary_key.as_array().ok_or_else(|| {
2335
- LixError::new(
2336
- "LIX_ERROR_UNKNOWN",
2337
- "schema x-lix-primary-key must be an array of JSON Pointers".to_string(),
2338
- )
2339
- })?;
2340
-
2341
- primary_key
2342
- .iter()
2343
- .enumerate()
2344
- .map(|(index, pointer)| {
2345
- let pointer = pointer.as_str().ok_or_else(|| {
2346
- LixError::new(
2347
- "LIX_ERROR_UNKNOWN",
2348
- format!("schema x-lix-primary-key entry at index {index} must be a string"),
2349
- )
2350
- })?;
2351
- parse_json_pointer(pointer)
2352
- })
2353
- .collect()
2354
- }
2355
-
2356
- // TODO(engine): share JSON Pointer parsing with schema/canonical validation once
2357
- // those helpers have a clean module boundary for SQL providers.
2358
- fn parse_json_pointer(pointer: &str) -> std::result::Result<Vec<String>, LixError> {
2359
- if pointer.is_empty() {
2360
- return Ok(Vec::new());
2361
- }
2362
- if !pointer.starts_with('/') {
2363
- return Err(LixError::new(
2364
- "LIX_ERROR_UNKNOWN",
2365
- format!("invalid JSON pointer '{pointer}'"),
2366
- ));
2367
- }
2368
- pointer[1..]
2369
- .split('/')
2370
- .map(decode_json_pointer_segment)
2371
- .collect()
2372
- }
2373
-
2374
- fn decode_json_pointer_segment(segment: &str) -> std::result::Result<String, LixError> {
2375
- let mut out = String::new();
2376
- let mut chars = segment.chars();
2377
- while let Some(ch) = chars.next() {
2378
- if ch == '~' {
2379
- match chars.next() {
2380
- Some('0') => out.push('~'),
2381
- Some('1') => out.push('/'),
2382
- _ => {
2383
- return Err(LixError::new(
2384
- "LIX_ERROR_UNKNOWN",
2385
- format!("invalid JSON pointer segment '{segment}'"),
2386
- ))
2387
- }
2388
- }
2389
- } else {
2390
- out.push(ch);
2391
- }
2392
- }
2393
- Ok(out)
2394
- }
2395
-
2396
- fn schema_exposed_as_entity_surface(schema_key: &str) -> bool {
2397
- !matches!(schema_key, "lix_active_account" | "lix_change")
2398
- }
2399
-
2400
- fn schema_exposed_as_entity_history_surface(schema_key: &str) -> bool {
2401
- !matches!(schema_key, "lix_commit" | "lix_commit_edge")
2402
- }
2403
-
2404
- fn entity_column_type_from_schema(schema: &JsonValue) -> Option<EntityColumnType> {
2405
- let mut kinds = BTreeSet::new();
2406
- collect_entity_type_kinds(schema, &mut kinds);
2407
- kinds.remove("null");
2408
-
2409
- if kinds.is_empty() {
2410
- return None;
2411
- }
2412
-
2413
- if kinds.len() == 1 {
2414
- return match kinds.into_iter().next() {
2415
- Some("boolean") => Some(EntityColumnType::Boolean),
2416
- Some("integer") => Some(EntityColumnType::Integer),
2417
- Some("number") => Some(EntityColumnType::Number),
2418
- Some("string") => Some(EntityColumnType::String),
2419
- Some("object" | "array") => Some(EntityColumnType::Json),
2420
- _ => None,
2421
- };
2422
- }
2423
-
2424
- Some(EntityColumnType::Json)
2425
- }
2426
-
2427
- fn collect_entity_type_kinds<'a>(schema: &'a JsonValue, out: &mut BTreeSet<&'a str>) {
2428
- match schema.get("type") {
2429
- Some(JsonValue::String(kind)) => {
2430
- out.insert(kind.as_str());
2431
- }
2432
- Some(JsonValue::Array(kinds)) => {
2433
- for kind in kinds.iter().filter_map(JsonValue::as_str) {
2434
- out.insert(kind);
2435
- }
2436
- }
2437
- _ => {}
2438
- }
2439
-
2440
- for keyword in ["anyOf", "oneOf", "allOf"] {
2441
- if let Some(JsonValue::Array(branches)) = schema.get(keyword) {
2442
- for branch in branches {
2443
- collect_entity_type_kinds(branch, out);
2444
- }
2445
- }
2446
- }
2447
- }
2448
-
2449
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
2450
- super::error::datafusion_error_to_lix_error(error)
2451
- }
2452
-
2453
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
2454
- DataFusionError::External(Box::new(error))
2455
- }
2456
-
2457
- #[cfg(test)]
2458
- mod tests {
2459
- use std::sync::Arc;
2460
-
2461
- use async_trait::async_trait;
2462
- use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
2463
- use datafusion::arrow::datatypes::{DataType, Field, Schema};
2464
- use datafusion::arrow::record_batch::RecordBatch;
2465
- use datafusion::common::{Column, ScalarValue};
2466
- use datafusion::execution::TaskContext;
2467
- use datafusion::logical_expr::expr::InList;
2468
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
2469
- use serde_json::json;
2470
-
2471
- use super::{
2472
- derive_entity_surface_spec_from_schema, entity_lix_state_write_rows_from_batch,
2473
- entity_record_batch, entity_surface_schema, schema_exposed_as_entity_surface,
2474
- EntityColumnType, EntityInsertSink, EntityProviderVariant,
2475
- };
2476
- use crate::binary_cas::BlobDataReader;
2477
- use crate::functions::{
2478
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2479
- };
2480
- use crate::live_state::{
2481
- LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
2482
- };
2483
- use crate::sql2::dml::InsertSink;
2484
- use crate::sql2::write_normalization::InsertColumnIntents;
2485
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2486
- use crate::transaction::types::{
2487
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
2488
- TransactionWriteRow,
2489
- };
2490
- use crate::version::{VersionHead, VersionRefReader};
2491
- use crate::LixError;
2492
-
2493
- struct EmptyLiveStateReader;
2494
- struct EmptyVersionRefReader;
2495
- #[derive(Default)]
2496
- struct CapturingWriteContext {
2497
- rows: Vec<MaterializedLiveStateRow>,
2498
- writes: Vec<TransactionWrite>,
2499
- }
2500
-
2501
- #[async_trait]
2502
- impl LiveStateReader for EmptyLiveStateReader {
2503
- async fn scan_rows(
2504
- &self,
2505
- _request: &LiveStateScanRequest,
2506
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2507
- Ok(vec![])
2508
- }
2509
-
2510
- async fn load_row(
2511
- &self,
2512
- _request: &LiveStateRowRequest,
2513
- ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
2514
- Ok(None)
2515
- }
2516
- }
2517
-
2518
- #[async_trait]
2519
- impl VersionRefReader for EmptyVersionRefReader {
2520
- async fn load_head(&self, _version_id: &str) -> Result<Option<VersionHead>, LixError> {
2521
- Ok(None)
2522
- }
2523
-
2524
- async fn scan_heads(&self) -> Result<Vec<VersionHead>, LixError> {
2525
- Ok(Vec::new())
2526
- }
2527
- }
2528
-
2529
- fn empty_version_ref() -> Arc<dyn VersionRefReader> {
2530
- Arc::new(EmptyVersionRefReader)
2531
- }
2532
-
2533
- fn test_functions() -> FunctionProviderHandle {
2534
- SharedFunctionProvider::new(
2535
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
2536
- )
2537
- }
2538
-
2539
- #[async_trait]
2540
- impl BlobDataReader for CapturingWriteContext {
2541
- async fn load_bytes_many(
2542
- &self,
2543
- hashes: &[crate::binary_cas::BlobHash],
2544
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2545
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
2546
- None;
2547
- hashes.len()
2548
- ]))
2549
- }
2550
- }
2551
-
2552
- #[async_trait]
2553
- impl SqlWriteExecutionContext for CapturingWriteContext {
2554
- fn active_version_id(&self) -> &str {
2555
- "version-a"
2556
- }
2557
-
2558
- fn functions(&self) -> FunctionProviderHandle {
2559
- test_functions()
2560
- }
2561
-
2562
- fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
2563
- Ok(Vec::new())
2564
- }
2565
-
2566
- async fn load_bytes_many(
2567
- &mut self,
2568
- hashes: &[crate::binary_cas::BlobHash],
2569
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2570
- BlobDataReader::load_bytes_many(self, hashes).await
2571
- }
2572
-
2573
- async fn scan_live_state(
2574
- &mut self,
2575
- _request: &LiveStateScanRequest,
2576
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2577
- Ok(self.rows.clone())
2578
- }
2579
-
2580
- async fn load_version_head(
2581
- &mut self,
2582
- version_id: &str,
2583
- ) -> Result<Option<String>, LixError> {
2584
- if version_id == "ghost-version" {
2585
- return Ok(None);
2586
- }
2587
- Ok(Some(format!("commit-{version_id}")))
2588
- }
2589
-
2590
- async fn stage_write(
2591
- &mut self,
2592
- write: TransactionWrite,
2593
- ) -> Result<TransactionWriteOutcome, LixError> {
2594
- self.writes.push(write);
2595
- Ok(TransactionWriteOutcome { count: 0 })
2596
- }
2597
- }
2598
-
2599
- fn live_row() -> MaterializedLiveStateRow {
2600
- MaterializedLiveStateRow {
2601
- entity_id: crate::entity_identity::EntityIdentity::single("entity-1"),
2602
- schema_key: "project_message".to_string(),
2603
- file_id: None,
2604
- snapshot_content: Some(
2605
- "{\"body\":\"hello\",\"rating\":4.5,\"count\":7,\"enabled\":true,\"meta\":{\"x\":1}}"
2606
- .to_string(),
2607
- ),
2608
- metadata: Some(json!({"source": "test"}).to_string()),
2609
- deleted: false,
2610
- version_id: "version-a".to_string(),
2611
- change_id: Some("change-a".to_string()),
2612
- commit_id: Some("commit-a".to_string()),
2613
- global: false,
2614
- untracked: false,
2615
- created_at: "2026-04-23T00:00:00Z".to_string(),
2616
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2617
- }
2618
- }
2619
-
2620
- fn entity_insert_spec() -> Arc<super::EntitySurfaceSpec> {
2621
- Arc::new(
2622
- derive_entity_surface_spec_from_schema(&json!({
2623
- "x-lix-key": "project_message",
2624
- "type": "object",
2625
- "properties": {
2626
- "body": { "type": "string" },
2627
- "count": { "type": "integer" },
2628
- "enabled": { "type": "boolean" },
2629
- "meta": { "type": "object" },
2630
- "rating": { "type": "number" }
2631
- }
2632
- }))
2633
- .expect("schema should derive entity surface spec"),
2634
- )
2635
- }
2636
-
2637
- fn entity_insert_spec_with_primary_key() -> Arc<super::EntitySurfaceSpec> {
2638
- Arc::new(
2639
- derive_entity_surface_spec_from_schema(&json!({
2640
- "x-lix-key": "project_message",
2641
- "x-lix-primary-key": ["/id"],
2642
- "type": "object",
2643
- "properties": {
2644
- "id": { "type": "string" },
2645
- "body": { "type": "string" }
2646
- },
2647
- "required": ["id", "body"]
2648
- }))
2649
- .expect("schema should derive entity surface spec"),
2650
- )
2651
- }
2652
-
2653
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2654
- Arc::new(StringArray::from(values)) as ArrayRef
2655
- }
2656
-
2657
- fn string_literal(value: &str) -> Expr {
2658
- Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
2659
- }
2660
-
2661
- fn column(name: &str) -> Expr {
2662
- Expr::Column(Column::from_name(name))
2663
- }
2664
-
2665
- fn eq_filter(column_name: &str, value: &str) -> Expr {
2666
- Expr::BinaryExpr(BinaryExpr::new(
2667
- Box::new(column(column_name)),
2668
- Operator::Eq,
2669
- Box::new(string_literal(value)),
2670
- ))
2671
- }
2672
-
2673
- fn entity_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2674
- let mut fields = vec![
2675
- Field::new("body", DataType::Utf8, true),
2676
- Field::new("count", DataType::Int64, true),
2677
- Field::new("enabled", DataType::Boolean, true),
2678
- Field::new("meta", DataType::Utf8, true),
2679
- Field::new("rating", DataType::Float64, true),
2680
- Field::new("lixcol_entity_id", DataType::Utf8, false),
2681
- Field::new("lixcol_metadata", DataType::Utf8, true),
2682
- Field::new("lixcol_global", DataType::Boolean, false),
2683
- Field::new("lixcol_untracked", DataType::Boolean, false),
2684
- ];
2685
- let mut columns = vec![
2686
- string_column(vec![Some("hello")]),
2687
- Arc::new(Int64Array::from(vec![7])) as ArrayRef,
2688
- Arc::new(BooleanArray::from(vec![true])) as ArrayRef,
2689
- string_column(vec![Some("{\"x\":1}")]),
2690
- Arc::new(Float64Array::from(vec![4.5])) as ArrayRef,
2691
- string_column(vec![Some("[\"entity-1\"]")]),
2692
- string_column(vec![Some("{\"source\":\"entity\"}")]),
2693
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2694
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2695
- ];
2696
- if include_version {
2697
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2698
- columns.push(string_column(vec![Some("version-a")]));
2699
- }
2700
-
2701
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2702
- .expect("entity insert batch should build")
2703
- }
2704
-
2705
- fn primary_key_entity_insert_batch(include_entity_id: bool) -> RecordBatch {
2706
- let mut fields = vec![
2707
- Field::new("id", DataType::Utf8, false),
2708
- Field::new("body", DataType::Utf8, true),
2709
- Field::new("lixcol_version_id", DataType::Utf8, false),
2710
- ];
2711
- let mut columns = vec![
2712
- string_column(vec![Some("message-1")]),
2713
- string_column(vec![Some("hello")]),
2714
- string_column(vec![Some("version-a")]),
2715
- ];
2716
- if include_entity_id {
2717
- fields.push(Field::new("lixcol_entity_id", DataType::Utf8, false));
2718
- columns.push(string_column(vec![Some("[\"message-1\"]")]));
2719
- }
2720
-
2721
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2722
- .expect("primary-key entity insert batch should build")
2723
- }
2724
-
2725
- #[test]
2726
- fn excludes_non_entity_builtin_session_surfaces() {
2727
- assert!(!schema_exposed_as_entity_surface("lix_active_account"));
2728
- assert!(schema_exposed_as_entity_surface("project_message"));
2729
- }
2730
-
2731
- #[test]
2732
- fn derives_entity_surface_spec_from_schema_definition() {
2733
- let spec = derive_entity_surface_spec_from_schema(&json!({
2734
- "x-lix-key": "project_message",
2735
- "type": "object",
2736
- "properties": {
2737
- "body": { "type": "string" },
2738
- "rating": { "type": "number" },
2739
- "meta": { "type": "object" },
2740
- "lixcol_entity_id": { "type": "string" }
2741
- }
2742
- }))
2743
- .expect("schema should derive entity surface spec");
2744
-
2745
- assert_eq!(spec.schema_key, "project_message");
2746
- assert_eq!(
2747
- spec.visible_column_names().collect::<Vec<_>>(),
2748
- vec!["body", "meta", "rating"]
2749
- );
2750
- assert_eq!(
2751
- spec.visible_column("body").map(|column| column.column_type),
2752
- Some(EntityColumnType::String)
2753
- );
2754
- assert_eq!(
2755
- spec.visible_column("rating")
2756
- .map(|column| column.column_type),
2757
- Some(EntityColumnType::Number)
2758
- );
2759
- assert_eq!(
2760
- spec.visible_column("meta").map(|column| column.column_type),
2761
- Some(EntityColumnType::Json)
2762
- );
2763
- assert!(!spec.is_visible_column("lixcol_entity_id"));
2764
- }
2765
-
2766
- #[test]
2767
- fn entity_surface_spec_rejects_properties_without_projection_type() {
2768
- let error = derive_entity_surface_spec_from_schema(&json!({
2769
- "x-lix-key": "project_message",
2770
- "x-lix-primary-key": ["/id"],
2771
- "type": "object",
2772
- "properties": {
2773
- "id": { "type": "string" },
2774
- "kind": {}
2775
- },
2776
- "required": ["id", "kind"],
2777
- "additionalProperties": false
2778
- }))
2779
- .expect_err("unprojectable property should be rejected");
2780
-
2781
- assert_eq!(error.code, LixError::CODE_SCHEMA_DEFINITION);
2782
- assert!(
2783
- error.message.contains("property '/kind'"),
2784
- "error should identify the property: {error:?}"
2785
- );
2786
- }
2787
-
2788
- #[test]
2789
- fn by_version_schema_includes_version_system_column() {
2790
- let spec = derive_entity_surface_spec_from_schema(&json!({
2791
- "x-lix-key": "project_message",
2792
- "type": "object",
2793
- "properties": {
2794
- "body": { "type": "string" }
2795
- }
2796
- }))
2797
- .expect("schema should derive entity surface spec");
2798
-
2799
- let schema = entity_surface_schema(&spec, EntityProviderVariant::ByVersion);
2800
- assert!(schema.field_with_name("body").is_ok());
2801
- assert!(schema.field_with_name("lixcol_entity_id").is_ok());
2802
- assert!(schema.field_with_name("lixcol_version_id").is_ok());
2803
- }
2804
-
2805
- #[test]
2806
- fn active_schema_excludes_version_system_column() {
2807
- let spec = derive_entity_surface_spec_from_schema(&json!({
2808
- "x-lix-key": "project_message",
2809
- "type": "object",
2810
- "properties": {
2811
- "body": { "type": "string" }
2812
- }
2813
- }))
2814
- .expect("schema should derive entity surface spec");
2815
-
2816
- let schema = entity_surface_schema(&spec, EntityProviderVariant::Active);
2817
- assert!(schema.field_with_name("body").is_ok());
2818
- assert!(schema.field_with_name("lixcol_entity_id").is_ok());
2819
- assert!(schema.field_with_name("lixcol_version_id").is_err());
2820
- }
2821
-
2822
- #[test]
2823
- fn insert_schema_allows_defaulted_identity_columns_to_be_omitted() {
2824
- let spec = derive_entity_surface_spec_from_schema(&json!({
2825
- "x-lix-key": "project_message",
2826
- "x-lix-primary-key": ["/id"],
2827
- "type": "object",
2828
- "properties": {
2829
- "id": { "type": "string", "x-lix-default": "lix_uuid_v7()" },
2830
- "body": { "type": "string" }
2831
- }
2832
- }))
2833
- .expect("schema should derive entity surface spec");
2834
-
2835
- let schema = entity_surface_schema(&spec, EntityProviderVariant::Active);
2836
- assert!(
2837
- schema
2838
- .field_with_name("id")
2839
- .expect("id field")
2840
- .is_nullable(),
2841
- "defaulted primary-key property should be nullable at SQL input"
2842
- );
2843
- assert!(
2844
- schema
2845
- .field_with_name("lixcol_entity_id")
2846
- .expect("entity id field")
2847
- .is_nullable(),
2848
- "opaque identity projection should be nullable for normal primary-key inserts"
2849
- );
2850
- }
2851
-
2852
- #[test]
2853
- fn record_batch_projects_payload_and_system_columns() {
2854
- let spec = Arc::new(
2855
- derive_entity_surface_spec_from_schema(&json!({
2856
- "x-lix-key": "project_message",
2857
- "type": "object",
2858
- "properties": {
2859
- "body": { "type": "string" },
2860
- "rating": { "type": "number" },
2861
- "count": { "type": "integer" },
2862
- "enabled": { "type": "boolean" },
2863
- "meta": { "type": "object" }
2864
- }
2865
- }))
2866
- .expect("schema should derive entity surface spec"),
2867
- );
2868
- let schema = entity_surface_schema(&spec, EntityProviderVariant::ByVersion);
2869
-
2870
- let batch =
2871
- entity_record_batch(&spec, schema, &[live_row()]).expect("entity batch should build");
2872
-
2873
- assert_eq!(batch.num_rows(), 1);
2874
- assert_eq!(
2875
- batch
2876
- .column_by_name("body")
2877
- .expect("body column")
2878
- .as_any()
2879
- .downcast_ref::<datafusion::arrow::array::StringArray>()
2880
- .expect("body is string")
2881
- .value(0),
2882
- "hello"
2883
- );
2884
- assert_eq!(
2885
- batch
2886
- .column_by_name("rating")
2887
- .expect("rating column")
2888
- .as_any()
2889
- .downcast_ref::<Float64Array>()
2890
- .expect("rating is f64")
2891
- .value(0),
2892
- 4.5
2893
- );
2894
- assert_eq!(
2895
- batch
2896
- .column_by_name("count")
2897
- .expect("count column")
2898
- .as_any()
2899
- .downcast_ref::<Int64Array>()
2900
- .expect("count is i64")
2901
- .value(0),
2902
- 7
2903
- );
2904
- assert_eq!(
2905
- batch
2906
- .column_by_name("lixcol_entity_id")
2907
- .expect("entity id column")
2908
- .as_any()
2909
- .downcast_ref::<datafusion::arrow::array::StringArray>()
2910
- .expect("entity id is string")
2911
- .value(0),
2912
- "[\"entity-1\"]"
2913
- );
2914
- assert_eq!(
2915
- batch
2916
- .column_by_name("lixcol_version_id")
2917
- .expect("version id column")
2918
- .as_any()
2919
- .downcast_ref::<datafusion::arrow::array::StringArray>()
2920
- .expect("version id is string")
2921
- .value(0),
2922
- "version-a"
2923
- );
2924
- }
2925
-
2926
- #[tokio::test]
2927
- async fn provider_registers_as_table_provider() {
2928
- let spec = Arc::new(
2929
- derive_entity_surface_spec_from_schema(&json!({
2930
- "x-lix-key": "project_message",
2931
- "type": "object",
2932
- "properties": {
2933
- "body": { "type": "string" }
2934
- }
2935
- }))
2936
- .expect("schema should derive entity surface spec"),
2937
- );
2938
- let provider = super::EntityProvider::by_version(
2939
- spec,
2940
- Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>,
2941
- empty_version_ref(),
2942
- );
2943
-
2944
- assert!(provider.schema.field_with_name("lixcol_version_id").is_ok());
2945
- }
2946
-
2947
- #[test]
2948
- fn primary_key_filters_route_entity_ids_for_string_primary_key() {
2949
- let spec = entity_insert_spec_with_primary_key();
2950
- let filters = vec![
2951
- eq_filter("id", "entity-a"),
2952
- Expr::InList(InList::new(
2953
- Box::new(column("id")),
2954
- vec![string_literal("entity-b"), string_literal("entity-a")],
2955
- false,
2956
- )),
2957
- ];
2958
-
2959
- let entity_ids = super::entity_ids_from_primary_key_filters(&spec, &filters)
2960
- .expect("primary-key filters should analyze")
2961
- .expect("primary-key filters should produce a constraint");
2962
-
2963
- assert_eq!(
2964
- entity_ids,
2965
- vec![crate::entity_identity::EntityIdentity::single("entity-a")]
2966
- );
2967
- }
2968
-
2969
- #[test]
2970
- fn primary_key_filter_analyzer_models_boolean_predicates() {
2971
- let spec = entity_insert_spec_with_primary_key();
2972
- let analyzer = super::EntityPrimaryKeyFilterAnalyzer::new(&spec);
2973
- let disjunction = Expr::BinaryExpr(BinaryExpr::new(
2974
- Box::new(eq_filter("id", "entity-a")),
2975
- Operator::Or,
2976
- Box::new(eq_filter("id", "entity-b")),
2977
- ));
2978
- let contradiction = Expr::BinaryExpr(BinaryExpr::new(
2979
- Box::new(eq_filter("id", "entity-a")),
2980
- Operator::And,
2981
- Box::new(eq_filter("id", "entity-b")),
2982
- ));
2983
-
2984
- let disjunction_ids = analyzer
2985
- .analyze(&disjunction)
2986
- .expect("OR should analyze")
2987
- .expect("OR should produce an entity-id set");
2988
- let contradiction_ids = analyzer
2989
- .analyze(&contradiction)
2990
- .expect("AND should analyze")
2991
- .expect("AND should produce an entity-id set");
2992
-
2993
- assert_eq!(
2994
- disjunction_ids.into_iter().collect::<Vec<_>>(),
2995
- vec![
2996
- crate::entity_identity::EntityIdentity::single("entity-a"),
2997
- crate::entity_identity::EntityIdentity::single("entity-b"),
2998
- ]
2999
- );
3000
- assert!(contradiction_ids.is_empty());
3001
- }
3002
-
3003
- #[test]
3004
- fn primary_key_filters_ignore_non_key_and_negated_predicates() {
3005
- let spec = entity_insert_spec_with_primary_key();
3006
- let filters = vec![
3007
- eq_filter("body", "hello"),
3008
- Expr::InList(InList::new(
3009
- Box::new(column("id")),
3010
- vec![string_literal("entity-a")],
3011
- true,
3012
- )),
3013
- ];
3014
-
3015
- assert!(super::entity_ids_from_primary_key_filters(&spec, &filters)
3016
- .expect("ignored filters should analyze")
3017
- .unwrap_or_default()
3018
- .is_empty());
3019
- }
3020
-
3021
- #[test]
3022
- fn decodes_by_version_entity_insert_into_lix_state_write_row() {
3023
- let spec = entity_insert_spec();
3024
- let rows = entity_lix_state_write_rows_from_batch(
3025
- &spec,
3026
- &entity_insert_batch(true, false),
3027
- &InsertColumnIntents::all_explicit(),
3028
- None,
3029
- )
3030
- .expect("entity batch should decode");
3031
-
3032
- assert_eq!(rows.len(), 1);
3033
- assert_eq!(
3034
- rows[0].entity_id.as_ref(),
3035
- Some(&crate::entity_identity::EntityIdentity::single("entity-1"))
3036
- );
3037
- assert_eq!(rows[0].schema_key, "project_message");
3038
- assert_eq!(rows[0].version_id, "version-a");
3039
- assert_eq!(
3040
- rows[0].metadata.as_ref(),
3041
- Some(&TransactionJson::from_value_for_test(
3042
- json!({"source": "entity"})
3043
- ))
3044
- );
3045
- assert!(!rows[0].global);
3046
- assert_eq!(
3047
- rows[0].snapshot.as_ref().expect("snapshot_content"),
3048
- &json!({
3049
- "body": "hello",
3050
- "count": 7,
3051
- "enabled": true,
3052
- "meta": {"x": 1},
3053
- "rating": 4.5
3054
- })
3055
- );
3056
- }
3057
-
3058
- #[test]
3059
- fn primary_key_entity_insert_stages_partial_row_for_normalization() {
3060
- let spec = entity_insert_spec_with_primary_key();
3061
- let rows = entity_lix_state_write_rows_from_batch(
3062
- &spec,
3063
- &primary_key_entity_insert_batch(false),
3064
- &InsertColumnIntents::all_explicit(),
3065
- None,
3066
- )
3067
- .expect("entity batch should decode");
3068
-
3069
- assert_eq!(rows.len(), 1);
3070
- assert_eq!(rows[0].entity_id, None);
3071
- assert_eq!(
3072
- rows[0].snapshot.as_ref().expect("snapshot_content"),
3073
- &json!({
3074
- "body": "hello",
3075
- "id": "message-1"
3076
- })
3077
- );
3078
- }
3079
-
3080
- #[test]
3081
- fn primary_key_entity_insert_preserves_explicit_opaque_projection_for_normalization() {
3082
- let spec = entity_insert_spec_with_primary_key();
3083
- let rows = entity_lix_state_write_rows_from_batch(
3084
- &spec,
3085
- &primary_key_entity_insert_batch(true),
3086
- &InsertColumnIntents::all_explicit(),
3087
- None,
3088
- )
3089
- .expect("primary-key entity insert should stage explicit lixcol_entity_id");
3090
-
3091
- assert_eq!(rows.len(), 1);
3092
- assert_eq!(
3093
- rows[0].entity_id.as_ref(),
3094
- Some(&crate::entity_identity::EntityIdentity::single("message-1"))
3095
- );
3096
- }
3097
-
3098
- #[test]
3099
- fn active_entity_insert_defaults_version_id() {
3100
- let spec = entity_insert_spec();
3101
- let rows = entity_lix_state_write_rows_from_batch(
3102
- &spec,
3103
- &entity_insert_batch(false, false),
3104
- &InsertColumnIntents::all_explicit(),
3105
- Some("version-active"),
3106
- )
3107
- .expect("active entity batch should decode");
3108
-
3109
- assert_eq!(rows.len(), 1);
3110
- assert_eq!(rows[0].version_id, "version-active");
3111
- assert!(!rows[0].global);
3112
- }
3113
-
3114
- #[test]
3115
- fn by_version_entity_insert_requires_version_id_for_non_global_rows() {
3116
- let spec = entity_insert_spec();
3117
- let error = entity_lix_state_write_rows_from_batch(
3118
- &spec,
3119
- &entity_insert_batch(false, false),
3120
- &InsertColumnIntents::all_explicit(),
3121
- None,
3122
- )
3123
- .expect_err("by-version entity insert should require version id");
3124
-
3125
- assert!(
3126
- error.to_string().contains("requires lixcol_version_id"),
3127
- "unexpected error: {error}"
3128
- );
3129
- }
3130
-
3131
- #[test]
3132
- fn by_version_entity_insert_global_row_uses_global_version() {
3133
- let spec = entity_insert_spec();
3134
- let rows = entity_lix_state_write_rows_from_batch(
3135
- &spec,
3136
- &entity_insert_batch(false, true),
3137
- &InsertColumnIntents::all_explicit(),
3138
- None,
3139
- )
3140
- .expect("global entity batch should decode");
3141
-
3142
- assert_eq!(rows.len(), 1);
3143
- assert!(rows[0].global);
3144
- assert_eq!(rows[0].version_id, crate::GLOBAL_VERSION_ID);
3145
- }
3146
-
3147
- #[test]
3148
- fn entity_insert_rejects_global_with_non_global_version_id() {
3149
- let spec = entity_insert_spec();
3150
- let error = entity_lix_state_write_rows_from_batch(
3151
- &spec,
3152
- &entity_insert_batch(true, true),
3153
- &InsertColumnIntents::all_explicit(),
3154
- None,
3155
- )
3156
- .expect_err("global entity write should reject conflicting version id");
3157
-
3158
- assert!(
3159
- error
3160
- .to_string()
3161
- .contains("cannot set lixcol_global=true with non-global lixcol_version_id"),
3162
- "unexpected error: {error}"
3163
- );
3164
- }
3165
-
3166
- #[tokio::test]
3167
- async fn entity_insert_sink_stages_decoded_lix_state_rows() {
3168
- let spec = entity_insert_spec();
3169
- let mut write_context = CapturingWriteContext::default();
3170
- let write_ctx = SqlWriteContext::new(&mut write_context);
3171
- let batch = entity_insert_batch(true, false);
3172
- let sink = EntityInsertSink::new(
3173
- Arc::clone(&spec),
3174
- batch.schema(),
3175
- InsertColumnIntents::all_explicit(),
3176
- write_ctx,
3177
- super::VersionBinding::explicit(),
3178
- );
3179
- let count = sink
3180
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3181
- .await
3182
- .expect("entity sink should stage write");
3183
-
3184
- assert_eq!(count, 1);
3185
- assert_eq!(
3186
- write_context.writes.as_slice(),
3187
- &[TransactionWrite::Rows {
3188
- mode: TransactionWriteMode::Insert,
3189
- rows: vec![TransactionWriteRow {
3190
- entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
3191
- schema_key: "project_message".to_string(),
3192
- file_id: None,
3193
- snapshot: Some(TransactionJson::from_value_for_test(
3194
- json!({"body":"hello","count":7,"enabled":true,"meta":{"x":1},"rating":4.5})
3195
- )),
3196
- metadata: Some(TransactionJson::from_value_for_test(
3197
- json!({"source": "entity"})
3198
- )),
3199
- origin: None,
3200
- created_at: None,
3201
- updated_at: None,
3202
- global: false,
3203
- change_id: None,
3204
- commit_id: None,
3205
- untracked: false,
3206
- version_id: "version-a".to_string(),
3207
- }]
3208
- }]
3209
- );
3210
- }
3211
- }