@lix-js/sdk 0.6.0-preview.4 → 0.6.0-preview.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. package/README.md +1 -1
  2. package/SKILL.md +65 -64
  3. package/dist/engine-wasm/index.js +4 -4
  4. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -5
  5. package/dist/engine-wasm/wasm/lix_engine.js +130 -118
  6. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  7. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +9 -8
  8. package/dist/generated/builtin-schemas.d.ts +69 -69
  9. package/dist/generated/builtin-schemas.js +94 -94
  10. package/dist/open-lix.d.ts +33 -26
  11. package/dist/open-lix.js +10 -10
  12. package/dist/sqlite/index.js +86 -30
  13. package/dist-engine-src/README.md +3 -3
  14. package/dist-engine-src/src/backend/capabilities.rs +67 -0
  15. package/dist-engine-src/src/backend/conformance/baseline.rs +1127 -0
  16. package/dist-engine-src/src/backend/conformance/factory.rs +93 -0
  17. package/dist-engine-src/src/backend/conformance/failure_tests.rs +608 -0
  18. package/dist-engine-src/src/backend/conformance/fixtures.rs +26 -0
  19. package/dist-engine-src/src/backend/conformance/mod.rs +75 -0
  20. package/dist-engine-src/src/backend/conformance/model.rs +28 -0
  21. package/dist-engine-src/src/backend/conformance/model_based.rs +257 -0
  22. package/dist-engine-src/src/backend/conformance/persistence.rs +204 -0
  23. package/dist-engine-src/src/backend/conformance/projection.rs +21 -0
  24. package/dist-engine-src/src/backend/conformance/pushdown.rs +24 -0
  25. package/dist-engine-src/src/backend/conformance/runner.rs +90 -0
  26. package/dist-engine-src/src/backend/conformance/scan.rs +24 -0
  27. package/dist-engine-src/src/backend/conformance/write.rs +16 -0
  28. package/dist-engine-src/src/backend/error.rs +94 -0
  29. package/dist-engine-src/src/backend/in_memory.rs +670 -0
  30. package/dist-engine-src/src/backend/mod.rs +36 -9
  31. package/dist-engine-src/src/backend/predicate.rs +80 -0
  32. package/dist-engine-src/src/backend/traits.rs +260 -0
  33. package/dist-engine-src/src/backend/types.rs +224 -81
  34. package/dist-engine-src/src/binary_cas/context.rs +8 -8
  35. package/dist-engine-src/src/binary_cas/kv.rs +234 -259
  36. package/dist-engine-src/src/{version → branch}/context.rs +12 -12
  37. package/dist-engine-src/src/branch/lifecycle.rs +221 -0
  38. package/dist-engine-src/src/branch/mod.rs +13 -0
  39. package/dist-engine-src/src/branch/refs.rs +321 -0
  40. package/dist-engine-src/src/branch/stage_rows.rs +67 -0
  41. package/dist-engine-src/src/branch/types.rs +21 -0
  42. package/dist-engine-src/src/catalog/context.rs +18 -18
  43. package/dist-engine-src/src/catalog/snapshot.rs +8 -8
  44. package/dist-engine-src/src/changelog/bench_support.rs +785 -0
  45. package/dist-engine-src/src/changelog/change.rs +1 -0
  46. package/dist-engine-src/src/changelog/codec.rs +497 -0
  47. package/dist-engine-src/src/changelog/commit.rs +1 -0
  48. package/dist-engine-src/src/changelog/context.rs +1614 -0
  49. package/dist-engine-src/src/changelog/mod.rs +29 -0
  50. package/dist-engine-src/src/changelog/store.rs +163 -0
  51. package/dist-engine-src/src/changelog/test_support.rs +54 -0
  52. package/dist-engine-src/src/changelog/types.rs +213 -0
  53. package/dist-engine-src/src/commit_graph/context.rs +317 -274
  54. package/dist-engine-src/src/commit_graph/mod.rs +2 -4
  55. package/dist-engine-src/src/commit_graph/types.rs +22 -42
  56. package/dist-engine-src/src/commit_graph/walker.rs +133 -103
  57. package/dist-engine-src/src/common/error.rs +52 -18
  58. package/dist-engine-src/src/common/identity.rs +2 -2
  59. package/dist-engine-src/src/common/mod.rs +1 -1
  60. package/dist-engine-src/src/domain.rs +42 -46
  61. package/dist-engine-src/src/engine.rs +74 -96
  62. package/dist-engine-src/src/{entity_identity.rs → entity_pk.rs} +89 -92
  63. package/dist-engine-src/src/functions/context.rs +56 -52
  64. package/dist-engine-src/src/functions/state.rs +51 -52
  65. package/dist-engine-src/src/init.rs +288 -154
  66. package/dist-engine-src/src/json_store/context.rs +15 -266
  67. package/dist-engine-src/src/json_store/mod.rs +26 -0
  68. package/dist-engine-src/src/json_store/store.rs +103 -718
  69. package/dist-engine-src/src/json_store/types.rs +4 -9
  70. package/dist-engine-src/src/lib.rs +49 -19
  71. package/dist-engine-src/src/live_state/context.rs +654 -790
  72. package/dist-engine-src/src/live_state/mod.rs +9 -3
  73. package/dist-engine-src/src/live_state/overlay.rs +4 -4
  74. package/dist-engine-src/src/live_state/types.rs +30 -21
  75. package/dist-engine-src/src/live_state/visibility.rs +514 -71
  76. package/dist-engine-src/src/plugin/install.rs +48 -48
  77. package/dist-engine-src/src/plugin/manifest.rs +7 -7
  78. package/dist-engine-src/src/plugin/materializer.rs +0 -275
  79. package/dist-engine-src/src/plugin/plugin_manifest.json +4 -3
  80. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +2 -2
  81. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +34 -0
  82. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +48 -0
  83. package/dist-engine-src/src/schema/builtin/lix_change.json +3 -3
  84. package/dist-engine-src/src/schema/builtin/lix_commit.json +1 -1
  85. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +6 -6
  86. package/dist-engine-src/src/schema/builtin/mod.rs +18 -20
  87. package/dist-engine-src/src/schema/compatibility.rs +11 -11
  88. package/dist-engine-src/src/schema/definition.json +2 -2
  89. package/dist-engine-src/src/schema/definition.rs +5 -5
  90. package/dist-engine-src/src/schema/key.rs +3 -3
  91. package/dist-engine-src/src/schema/mod.rs +1 -1
  92. package/dist-engine-src/src/schema/tests.rs +18 -18
  93. package/dist-engine-src/src/session/context.rs +803 -148
  94. package/dist-engine-src/src/session/create_branch.rs +94 -0
  95. package/dist-engine-src/src/session/execute.rs +223 -83
  96. package/dist-engine-src/src/session/merge/analysis.rs +9 -3
  97. package/dist-engine-src/src/session/merge/{version.rs → branch.rs} +119 -129
  98. package/dist-engine-src/src/session/merge/conflicts.rs +2 -2
  99. package/dist-engine-src/src/session/merge/mod.rs +5 -6
  100. package/dist-engine-src/src/session/merge/stats.rs +7 -11
  101. package/dist-engine-src/src/session/mod.rs +15 -12
  102. package/dist-engine-src/src/session/switch_branch.rs +113 -0
  103. package/dist-engine-src/src/session/transaction.rs +495 -14
  104. package/dist-engine-src/src/sql2/{classify.rs → bind/classify.rs} +3 -75
  105. package/dist-engine-src/src/sql2/bind/error.rs +5 -0
  106. package/dist-engine-src/src/sql2/bind/expr.rs +29 -0
  107. package/dist-engine-src/src/sql2/bind/mod.rs +12 -0
  108. package/dist-engine-src/src/sql2/{udfs/public_call.rs → bind/public_udf.rs} +71 -3
  109. package/dist-engine-src/src/sql2/bind/read.rs +65 -0
  110. package/dist-engine-src/src/sql2/bind/statement.rs +2236 -0
  111. package/dist-engine-src/src/sql2/bind/table.rs +273 -0
  112. package/dist-engine-src/src/sql2/bind/write.rs +86 -0
  113. package/dist-engine-src/src/sql2/branch_scope.rs +436 -0
  114. package/dist-engine-src/src/sql2/catalog/capability.rs +20 -0
  115. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +296 -0
  116. package/dist-engine-src/src/sql2/catalog/mod.rs +15 -0
  117. package/dist-engine-src/src/sql2/catalog/registry.rs +556 -0
  118. package/dist-engine-src/src/sql2/catalog/schema.rs +88 -0
  119. package/dist-engine-src/src/sql2/catalog/surface.rs +41 -0
  120. package/dist-engine-src/src/sql2/change_materialization.rs +122 -0
  121. package/dist-engine-src/src/sql2/context.rs +36 -30
  122. package/dist-engine-src/src/sql2/error.rs +1 -1
  123. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +1593 -0
  124. package/dist-engine-src/src/sql2/exec/datafusion.rs +5266 -0
  125. package/dist-engine-src/src/sql2/exec/fast_write.rs +82 -0
  126. package/dist-engine-src/src/sql2/exec/mod.rs +24 -0
  127. package/dist-engine-src/src/sql2/exec/write.rs +661 -0
  128. package/dist-engine-src/src/sql2/filesystem_planner.rs +72 -77
  129. package/dist-engine-src/src/sql2/filesystem_visibility.rs +21 -21
  130. package/dist-engine-src/src/sql2/history_projection.rs +8 -8
  131. package/dist-engine-src/src/sql2/history_route.rs +35 -31
  132. package/dist-engine-src/src/sql2/mod.rs +28 -23
  133. package/dist-engine-src/src/sql2/optimize/datafusion.rs +1 -0
  134. package/dist-engine-src/src/sql2/optimize/mod.rs +2 -0
  135. package/dist-engine-src/src/sql2/optimize/simple_write.rs +116 -0
  136. package/dist-engine-src/src/sql2/parse/mod.rs +69 -0
  137. package/dist-engine-src/src/sql2/parse/normalize.rs +1 -0
  138. package/dist-engine-src/src/sql2/plan/branch_scope.rs +24 -0
  139. package/dist-engine-src/src/sql2/plan/mod.rs +5 -0
  140. package/dist-engine-src/src/sql2/plan/predicate.rs +22 -0
  141. package/dist-engine-src/src/sql2/plan/write.rs +147 -0
  142. package/dist-engine-src/src/sql2/predicate_typecheck.rs +258 -0
  143. package/dist-engine-src/src/sql2/{version_provider.rs → providers/branch.rs} +218 -214
  144. package/dist-engine-src/src/sql2/{change_provider.rs → providers/change.rs} +156 -42
  145. package/dist-engine-src/src/sql2/{directory_provider.rs → providers/directory.rs} +291 -322
  146. package/dist-engine-src/src/sql2/{directory_history_provider.rs → providers/directory_history.rs} +56 -42
  147. package/dist-engine-src/src/sql2/providers/entity.rs +1484 -0
  148. package/dist-engine-src/src/sql2/{entity_history_provider.rs → providers/entity_history.rs} +43 -31
  149. package/dist-engine-src/src/sql2/{file_provider.rs → providers/file.rs} +323 -316
  150. package/dist-engine-src/src/sql2/{file_history_provider.rs → providers/file_history.rs} +60 -46
  151. package/dist-engine-src/src/sql2/{history_provider.rs → providers/history.rs} +46 -32
  152. package/dist-engine-src/src/sql2/{lix_state_provider.rs → providers/lix_state.rs} +359 -329
  153. package/dist-engine-src/src/sql2/providers/mod.rs +508 -0
  154. package/dist-engine-src/src/sql2/read_only.rs +2 -2
  155. package/dist-engine-src/src/sql2/session.rs +47 -96
  156. package/dist-engine-src/src/sql2/storage/constraints.rs +1 -0
  157. package/dist-engine-src/src/sql2/storage/mod.rs +1 -0
  158. package/dist-engine-src/src/sql2/test_support/differential.rs +712 -0
  159. package/dist-engine-src/src/sql2/test_support/generators.rs +354 -0
  160. package/dist-engine-src/src/sql2/test_support/mod.rs +2 -0
  161. package/dist-engine-src/src/sql2/udfs/{lix_active_version_commit_id.rs → lix_active_branch_commit_id.rs} +7 -7
  162. package/dist-engine-src/src/sql2/udfs/mod.rs +3 -6
  163. package/dist-engine-src/src/sql2/write_normalization.rs +45 -22
  164. package/dist-engine-src/src/storage/conformance.rs +399 -0
  165. package/dist-engine-src/src/storage/context.rs +552 -288
  166. package/dist-engine-src/src/storage/mod.rs +48 -10
  167. package/dist-engine-src/src/storage/point.rs +440 -0
  168. package/dist-engine-src/src/storage/read_scope.rs +43 -64
  169. package/dist-engine-src/src/storage/reader.rs +867 -0
  170. package/dist-engine-src/src/storage/scan.rs +784 -0
  171. package/dist-engine-src/src/storage/spaces.rs +236 -0
  172. package/dist-engine-src/src/storage/stats.rs +80 -0
  173. package/dist-engine-src/src/storage/write_set.rs +962 -0
  174. package/dist-engine-src/src/storage_bench.rs +136 -4828
  175. package/dist-engine-src/src/test_support.rs +360 -138
  176. package/dist-engine-src/src/tracked_state/bench_support.rs +394 -0
  177. package/dist-engine-src/src/tracked_state/codec.rs +155 -1057
  178. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +358 -0
  179. package/dist-engine-src/src/tracked_state/context.rs +1927 -993
  180. package/dist-engine-src/src/tracked_state/diff.rs +1715 -261
  181. package/dist-engine-src/src/tracked_state/merge.rs +74 -88
  182. package/dist-engine-src/src/tracked_state/mod.rs +19 -16
  183. package/dist-engine-src/src/tracked_state/{materialization.rs → row_materialization.rs} +50 -178
  184. package/dist-engine-src/src/tracked_state/storage.rs +243 -191
  185. package/dist-engine-src/src/tracked_state/tree.rs +247 -371
  186. package/dist-engine-src/src/tracked_state/types.rs +49 -42
  187. package/dist-engine-src/src/transaction/bench_support.rs +407 -0
  188. package/dist-engine-src/src/transaction/commit.rs +821 -713
  189. package/dist-engine-src/src/transaction/context.rs +705 -600
  190. package/dist-engine-src/src/transaction/mod.rs +13 -2
  191. package/dist-engine-src/src/transaction/normalization.rs +63 -76
  192. package/dist-engine-src/src/transaction/prep.rs +13 -13
  193. package/dist-engine-src/src/transaction/schema_resolver.rs +19 -5
  194. package/dist-engine-src/src/transaction/staging.rs +228 -434
  195. package/dist-engine-src/src/transaction/types.rs +41 -98
  196. package/dist-engine-src/src/transaction/validation.rs +382 -446
  197. package/dist-engine-src/src/untracked_state/codec.rs +337 -29
  198. package/dist-engine-src/src/untracked_state/context.rs +7 -7
  199. package/dist-engine-src/src/untracked_state/materialization.rs +2 -2
  200. package/dist-engine-src/src/untracked_state/mod.rs +1 -1
  201. package/dist-engine-src/src/untracked_state/storage.rs +659 -157
  202. package/dist-engine-src/src/untracked_state/types.rs +21 -21
  203. package/package.json +71 -68
  204. package/dist-engine-src/src/backend/kv.rs +0 -358
  205. package/dist-engine-src/src/backend/testing.rs +0 -658
  206. package/dist-engine-src/src/commit_store/codec.rs +0 -887
  207. package/dist-engine-src/src/commit_store/context.rs +0 -944
  208. package/dist-engine-src/src/commit_store/materialization.rs +0 -84
  209. package/dist-engine-src/src/commit_store/mod.rs +0 -16
  210. package/dist-engine-src/src/commit_store/storage.rs +0 -600
  211. package/dist-engine-src/src/commit_store/types.rs +0 -215
  212. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -34
  213. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -48
  214. package/dist-engine-src/src/session/create_version.rs +0 -88
  215. package/dist-engine-src/src/session/merge/apply.rs +0 -23
  216. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +0 -100
  217. package/dist-engine-src/src/session/switch_version.rs +0 -110
  218. package/dist-engine-src/src/sql2/entity_provider.rs +0 -3211
  219. package/dist-engine-src/src/sql2/execute.rs +0 -3533
  220. package/dist-engine-src/src/sql2/public_bind/assignment.rs +0 -46
  221. package/dist-engine-src/src/sql2/public_bind/capability.rs +0 -41
  222. package/dist-engine-src/src/sql2/public_bind/dml.rs +0 -172
  223. package/dist-engine-src/src/sql2/public_bind/mod.rs +0 -26
  224. package/dist-engine-src/src/sql2/public_bind/table.rs +0 -168
  225. package/dist-engine-src/src/sql2/version_scope.rs +0 -394
  226. package/dist-engine-src/src/storage/types.rs +0 -501
  227. package/dist-engine-src/src/tracked_state/by_file_index.rs +0 -98
  228. package/dist-engine-src/src/tracked_state/materializer.rs +0 -488
  229. package/dist-engine-src/src/transaction/live_state_overlay.rs +0 -35
  230. package/dist-engine-src/src/version/lifecycle.rs +0 -221
  231. package/dist-engine-src/src/version/mod.rs +0 -13
  232. package/dist-engine-src/src/version/refs.rs +0 -330
  233. package/dist-engine-src/src/version/stage_rows.rs +0 -67
  234. package/dist-engine-src/src/version/types.rs +0 -21
@@ -0,0 +1,1484 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{not_impl_err, DataFusionError, Result, ScalarValue};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::dml::InsertOp;
14
+ use datafusion::logical_expr::expr::InList;
15
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
16
+ use datafusion::physical_expr::EquivalenceProperties;
17
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
18
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
19
+ use datafusion::physical_plan::{
20
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
21
+ };
22
+ use datafusion::prelude::SessionContext;
23
+ use futures_util::{stream, TryStreamExt};
24
+ use serde_json::Value as JsonValue;
25
+
26
+ use crate::branch::BranchRefReader;
27
+ use crate::commit_graph::CommitGraphReader;
28
+ use crate::entity_pk::EntityPk;
29
+ use crate::live_state::MaterializedLiveStateRow;
30
+ use crate::live_state::{
31
+ LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateRowFilter, LiveStateScanRequest,
32
+ };
33
+ use crate::sql2::branch_scope::{resolve_provider_branch_ids, BranchBinding};
34
+ use crate::sql2::catalog::{
35
+ entity_surface_schema, EntityColumnType, EntitySurfaceShape, EntitySurfaceSpec, PublicCatalog,
36
+ PublicSurfaceKind,
37
+ };
38
+ use crate::{serialize_row_metadata, LixError};
39
+
40
+ use crate::sql2::{
41
+ SqlHistoryQuerySource, SqlWriteContext, WriteContextBranchRefReader,
42
+ WriteContextLiveStateReader,
43
+ };
44
+
45
+ use super::entity_history::EntityHistoryProvider;
46
+ use crate::storage::StorageRead;
47
+
48
+ pub(crate) async fn register_entity_providers<S>(
49
+ ctx: &SessionContext,
50
+ active_branch_id: &str,
51
+ live_state: Arc<dyn LiveStateReader>,
52
+ branch_ref: Arc<dyn BranchRefReader>,
53
+ commit_graph: Arc<tokio::sync::Mutex<Box<dyn CommitGraphReader>>>,
54
+ query_source: SqlHistoryQuerySource<S>,
55
+ catalog: &PublicCatalog,
56
+ ) -> Result<(), LixError>
57
+ where
58
+ S: StorageRead + Clone + Send + Sync + 'static,
59
+ {
60
+ for surface in catalog.surfaces() {
61
+ match &surface.kind {
62
+ PublicSurfaceKind::EntityBase { schema_key } => {
63
+ let spec = catalog_entity_spec(catalog, schema_key)?;
64
+ ctx.register_table(
65
+ &surface.name,
66
+ Arc::new(EntityProvider::active(
67
+ spec,
68
+ Arc::clone(&live_state),
69
+ Arc::clone(&branch_ref),
70
+ active_branch_id.to_string(),
71
+ )),
72
+ )
73
+ .map_err(datafusion_error_to_lix_error)?;
74
+ }
75
+ PublicSurfaceKind::EntityByBranch { schema_key } => {
76
+ let spec = catalog_entity_spec(catalog, schema_key)?;
77
+ ctx.register_table(
78
+ &surface.name,
79
+ Arc::new(EntityProvider::by_branch(
80
+ spec,
81
+ Arc::clone(&live_state),
82
+ Arc::clone(&branch_ref),
83
+ )),
84
+ )
85
+ .map_err(datafusion_error_to_lix_error)?;
86
+ }
87
+ PublicSurfaceKind::EntityHistory { schema_key } => {
88
+ let spec = catalog_entity_spec(catalog, schema_key)?;
89
+ ctx.register_table(
90
+ &surface.name,
91
+ Arc::new(EntityHistoryProvider::new(
92
+ spec,
93
+ Arc::clone(&commit_graph),
94
+ query_source.clone(),
95
+ )),
96
+ )
97
+ .map_err(datafusion_error_to_lix_error)?;
98
+ }
99
+ _ => {}
100
+ }
101
+ }
102
+
103
+ Ok(())
104
+ }
105
+
106
+ pub(crate) async fn register_entity_write_providers(
107
+ ctx: &SessionContext,
108
+ write_ctx: SqlWriteContext,
109
+ catalog: &PublicCatalog,
110
+ ) -> Result<(), LixError> {
111
+ for surface in catalog.surfaces() {
112
+ match &surface.kind {
113
+ PublicSurfaceKind::EntityBase { schema_key } => {
114
+ let spec = catalog_entity_spec(catalog, schema_key)?;
115
+ ctx.register_table(
116
+ &surface.name,
117
+ Arc::new(EntityProvider::active_with_write(spec, write_ctx.clone())),
118
+ )
119
+ .map_err(datafusion_error_to_lix_error)?;
120
+ }
121
+ PublicSurfaceKind::EntityByBranch { schema_key } => {
122
+ let spec = catalog_entity_spec(catalog, schema_key)?;
123
+ ctx.register_table(
124
+ &surface.name,
125
+ Arc::new(EntityProvider::by_branch_with_write(
126
+ spec,
127
+ write_ctx.clone(),
128
+ )),
129
+ )
130
+ .map_err(datafusion_error_to_lix_error)?;
131
+ }
132
+ _ => {}
133
+ }
134
+ }
135
+
136
+ Ok(())
137
+ }
138
+
139
+ fn catalog_entity_spec(
140
+ catalog: &PublicCatalog,
141
+ schema_key: &str,
142
+ ) -> Result<Arc<EntitySurfaceSpec>, LixError> {
143
+ catalog
144
+ .entity_spec(schema_key)
145
+ .cloned()
146
+ .map(Arc::new)
147
+ .ok_or_else(|| {
148
+ LixError::new(
149
+ LixError::CODE_SCHEMA_DEFINITION,
150
+ format!("catalog entity surface '{schema_key}' is missing its surface spec"),
151
+ )
152
+ })
153
+ }
154
+
155
+ pub(crate) struct EntityProvider {
156
+ spec: Arc<EntitySurfaceSpec>,
157
+ live_state: Arc<dyn LiveStateReader>,
158
+ branch_ref: Arc<dyn BranchRefReader>,
159
+ schema: SchemaRef,
160
+ variant: EntitySurfaceShape,
161
+ branch_binding: BranchBinding,
162
+ }
163
+
164
+ impl std::fmt::Debug for EntityProvider {
165
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166
+ f.debug_struct("EntityProvider")
167
+ .field("schema_key", &self.spec.schema_key)
168
+ .field("variant", &self.variant)
169
+ .finish()
170
+ }
171
+ }
172
+
173
+ impl EntityProvider {
174
+ fn active(
175
+ spec: Arc<EntitySurfaceSpec>,
176
+ live_state: Arc<dyn LiveStateReader>,
177
+ branch_ref: Arc<dyn BranchRefReader>,
178
+ active_branch_id: String,
179
+ ) -> Self {
180
+ Self {
181
+ schema: entity_surface_schema(&spec, EntitySurfaceShape::Active),
182
+ spec,
183
+ live_state,
184
+ branch_ref,
185
+ variant: EntitySurfaceShape::Active,
186
+ branch_binding: BranchBinding::active(active_branch_id),
187
+ }
188
+ }
189
+
190
+ fn active_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
191
+ let active_branch_id = write_ctx.active_branch_id();
192
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
193
+ let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
194
+ Self {
195
+ schema: entity_surface_schema(&spec, EntitySurfaceShape::Active),
196
+ spec,
197
+ live_state,
198
+ branch_ref,
199
+ variant: EntitySurfaceShape::Active,
200
+ branch_binding: BranchBinding::active(active_branch_id),
201
+ }
202
+ }
203
+
204
+ fn by_branch(
205
+ spec: Arc<EntitySurfaceSpec>,
206
+ live_state: Arc<dyn LiveStateReader>,
207
+ branch_ref: Arc<dyn BranchRefReader>,
208
+ ) -> Self {
209
+ Self {
210
+ schema: entity_surface_schema(&spec, EntitySurfaceShape::ByBranch),
211
+ spec,
212
+ live_state,
213
+ branch_ref,
214
+ variant: EntitySurfaceShape::ByBranch,
215
+ branch_binding: BranchBinding::explicit(),
216
+ }
217
+ }
218
+
219
+ fn by_branch_with_write(spec: Arc<EntitySurfaceSpec>, write_ctx: SqlWriteContext) -> Self {
220
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
221
+ let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
222
+ Self {
223
+ schema: entity_surface_schema(&spec, EntitySurfaceShape::ByBranch),
224
+ spec,
225
+ live_state,
226
+ branch_ref,
227
+ variant: EntitySurfaceShape::ByBranch,
228
+ branch_binding: BranchBinding::explicit(),
229
+ }
230
+ }
231
+ }
232
+
233
+ #[async_trait]
234
+ impl TableProvider for EntityProvider {
235
+ fn as_any(&self) -> &dyn Any {
236
+ self
237
+ }
238
+
239
+ fn schema(&self) -> SchemaRef {
240
+ Arc::clone(&self.schema)
241
+ }
242
+
243
+ fn table_type(&self) -> TableType {
244
+ TableType::Base
245
+ }
246
+
247
+ fn supports_filters_pushdown(
248
+ &self,
249
+ filters: &[&Expr],
250
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
251
+ let analyzer = EntityPrimaryKeyFilterAnalyzer::new(&self.spec);
252
+ Ok(filters
253
+ .iter()
254
+ .map(|filter| {
255
+ if ExactBranchIdFilterAnalyzer.supports(filter) || analyzer.supports(filter) {
256
+ TableProviderFilterPushDown::Exact
257
+ } else {
258
+ TableProviderFilterPushDown::Unsupported
259
+ }
260
+ })
261
+ .collect())
262
+ }
263
+
264
+ async fn scan(
265
+ &self,
266
+ _state: &dyn Session,
267
+ projection: Option<&Vec<usize>>,
268
+ filters: &[Expr],
269
+ limit: Option<usize>,
270
+ ) -> Result<Arc<dyn ExecutionPlan>> {
271
+ let projected_schema = projected_schema(&self.schema, projection)?;
272
+ let mut request = entity_live_state_scan_request(
273
+ &self.spec.schema_key,
274
+ self.branch_binding.active_branch_id(),
275
+ Some(projected_schema.as_ref()),
276
+ limit,
277
+ );
278
+ request.filter.branch_ids = resolve_provider_branch_ids(
279
+ self.branch_ref.as_ref(),
280
+ &self.branch_binding,
281
+ request.filter.branch_ids,
282
+ )
283
+ .await
284
+ .map_err(lix_error_to_datafusion_error)?;
285
+ apply_exact_branch_id_filter(&mut request, exact_branch_ids_from_filters(filters)?);
286
+ apply_exact_entity_pk_filters(&mut request, &self.spec, filters)?;
287
+
288
+ Ok(Arc::new(EntityScanExec::new(
289
+ Arc::clone(&self.spec),
290
+ Arc::clone(&self.live_state),
291
+ projected_schema,
292
+ request,
293
+ )))
294
+ }
295
+
296
+ async fn insert_into(
297
+ &self,
298
+ _state: &dyn Session,
299
+ _input: Arc<dyn ExecutionPlan>,
300
+ _insert_op: InsertOp,
301
+ ) -> Result<Arc<dyn ExecutionPlan>> {
302
+ not_impl_err!("raw DataFusion INSERT is disabled; use the sql2 bound write pipeline")
303
+ }
304
+
305
+ async fn delete_from(
306
+ &self,
307
+ _state: &dyn Session,
308
+ _filters: Vec<Expr>,
309
+ ) -> Result<Arc<dyn ExecutionPlan>> {
310
+ not_impl_err!("raw DataFusion DELETE is disabled; use the sql2 bound write pipeline")
311
+ }
312
+
313
+ async fn update(
314
+ &self,
315
+ _state: &dyn Session,
316
+ _assignments: Vec<(String, Expr)>,
317
+ _filters: Vec<Expr>,
318
+ ) -> Result<Arc<dyn ExecutionPlan>> {
319
+ not_impl_err!("raw DataFusion UPDATE is disabled; use the sql2 bound write pipeline")
320
+ }
321
+ }
322
+
323
+ fn entity_pks_from_primary_key_filters(
324
+ spec: &EntitySurfaceSpec,
325
+ filters: &[Expr],
326
+ ) -> Result<Option<Vec<EntityPk>>> {
327
+ let analyzer = EntityPrimaryKeyFilterAnalyzer::new(spec);
328
+ let mut entity_pks: Option<BTreeSet<EntityPk>> = None;
329
+ for filter in filters {
330
+ let Some(filter_ids) = analyzer.analyze(filter)? else {
331
+ continue;
332
+ };
333
+ entity_pks = Some(match entity_pks {
334
+ Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
335
+ None => filter_ids,
336
+ });
337
+ }
338
+
339
+ Ok(entity_pks.map(|ids| ids.into_iter().collect()))
340
+ }
341
+
342
+ fn apply_exact_entity_pk_filters(
343
+ request: &mut LiveStateScanRequest,
344
+ spec: &EntitySurfaceSpec,
345
+ filters: &[Expr],
346
+ ) -> Result<()> {
347
+ if let Some(entity_pks) = entity_pks_from_primary_key_filters(spec, filters)? {
348
+ if entity_pks.is_empty() {
349
+ request.filter.rows = LiveStateRowFilter::None;
350
+ }
351
+ request.filter.entity_pks = entity_pks;
352
+ }
353
+ Ok(())
354
+ }
355
+
356
+ fn exact_branch_ids_from_filters(filters: &[Expr]) -> Result<Option<Vec<String>>> {
357
+ let analyzer = ExactBranchIdFilterAnalyzer;
358
+ let mut branch_ids: Option<BTreeSet<String>> = None;
359
+ for filter in filters {
360
+ let Some(filter_ids) = analyzer.analyze(filter)? else {
361
+ continue;
362
+ };
363
+ branch_ids = Some(match branch_ids {
364
+ Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
365
+ None => filter_ids,
366
+ });
367
+ }
368
+ Ok(branch_ids.map(|ids| ids.into_iter().collect()))
369
+ }
370
+
371
+ fn apply_exact_branch_id_filter(
372
+ request: &mut LiveStateScanRequest,
373
+ branch_ids: Option<Vec<String>>,
374
+ ) {
375
+ if let Some(branch_ids) = branch_ids {
376
+ if branch_ids.is_empty() {
377
+ request.filter.rows = LiveStateRowFilter::None;
378
+ }
379
+ request.filter.branch_ids = branch_ids;
380
+ }
381
+ }
382
+
383
+ struct EntityPrimaryKeyFilterAnalyzer<'a> {
384
+ primary_key_columns: Vec<&'a str>,
385
+ }
386
+
387
+ struct ExactBranchIdFilterAnalyzer;
388
+
389
+ impl ExactBranchIdFilterAnalyzer {
390
+ fn supports(&self, expr: &Expr) -> bool {
391
+ self.analyze(expr)
392
+ .is_ok_and(|constraint| constraint.is_some())
393
+ }
394
+
395
+ fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<String>>> {
396
+ match expr {
397
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
398
+ let Some(left) = self.analyze(&binary_expr.left)? else {
399
+ return Ok(None);
400
+ };
401
+ let Some(right) = self.analyze(&binary_expr.right)? else {
402
+ return Ok(None);
403
+ };
404
+ Ok(Some(left.intersection(&right).cloned().collect()))
405
+ }
406
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
407
+ let Some(mut left) = self.analyze(&binary_expr.left)? else {
408
+ return Ok(None);
409
+ };
410
+ let Some(right) = self.analyze(&binary_expr.right)? else {
411
+ return Ok(None);
412
+ };
413
+ left.extend(right);
414
+ Ok(Some(left))
415
+ }
416
+ Expr::BinaryExpr(binary_expr) => {
417
+ Ok(branch_id_from_binary_filter(binary_expr).map(|value| BTreeSet::from([value])))
418
+ }
419
+ Expr::InList(in_list) => {
420
+ Ok(branch_ids_from_in_list_filter(in_list)
421
+ .map(|values| values.into_iter().collect()))
422
+ }
423
+ _ => Ok(None),
424
+ }
425
+ }
426
+ }
427
+
428
+ fn branch_id_from_binary_filter(binary_expr: &BinaryExpr) -> Option<String> {
429
+ if binary_expr.op != Operator::Eq {
430
+ return None;
431
+ }
432
+
433
+ branch_id_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
434
+ .or_else(|| branch_id_from_column_literal_filter(&binary_expr.right, &binary_expr.left))
435
+ }
436
+
437
+ fn branch_ids_from_in_list_filter(in_list: &InList) -> Option<Vec<String>> {
438
+ if in_list.negated {
439
+ return None;
440
+ }
441
+ let Expr::Column(column) = in_list.expr.as_ref() else {
442
+ return None;
443
+ };
444
+ if column.name != "lixcol_branch_id" {
445
+ return None;
446
+ }
447
+
448
+ let values = in_list
449
+ .list
450
+ .iter()
451
+ .map(string_expr_literal)
452
+ .collect::<Option<Vec<_>>>()?;
453
+ if values.is_empty() {
454
+ return None;
455
+ }
456
+ Some(values)
457
+ }
458
+
459
+ fn branch_id_from_column_literal_filter(column_expr: &Expr, literal_expr: &Expr) -> Option<String> {
460
+ let Expr::Column(column) = column_expr else {
461
+ return None;
462
+ };
463
+ if column.name != "lixcol_branch_id" {
464
+ return None;
465
+ }
466
+ string_expr_literal(literal_expr)
467
+ }
468
+
469
+ impl<'a> EntityPrimaryKeyFilterAnalyzer<'a> {
470
+ fn new(spec: &'a EntitySurfaceSpec) -> Self {
471
+ Self {
472
+ primary_key_columns: string_primary_key_columns(spec),
473
+ }
474
+ }
475
+
476
+ fn supports(&self, expr: &Expr) -> bool {
477
+ self.analyze(expr)
478
+ .is_ok_and(|constraint| constraint.is_some())
479
+ }
480
+
481
+ fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<EntityPk>>> {
482
+ if self.primary_key_columns.is_empty() {
483
+ return Ok(None);
484
+ };
485
+ let Some(constraint) = self.analyze_constraint(expr)? else {
486
+ return Ok(None);
487
+ };
488
+ Ok(constraint.into_entity_pks(&self.primary_key_columns))
489
+ }
490
+
491
+ fn analyze_constraint(&self, expr: &Expr) -> Result<Option<EntityPkConstraint>> {
492
+ match expr {
493
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
494
+ let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
495
+ return Ok(None);
496
+ };
497
+ let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
498
+ return Ok(None);
499
+ };
500
+ Ok(Some(left.intersect(right, &self.primary_key_columns)))
501
+ }
502
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
503
+ let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
504
+ return Ok(None);
505
+ };
506
+ let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
507
+ return Ok(None);
508
+ };
509
+ let Some(left_ids) = left.into_entity_pks(&self.primary_key_columns) else {
510
+ return Ok(None);
511
+ };
512
+ let Some(mut right_ids) = right.into_entity_pks(&self.primary_key_columns) else {
513
+ return Ok(None);
514
+ };
515
+ right_ids.extend(left_ids);
516
+ Ok(Some(EntityPkConstraint::Full(right_ids)))
517
+ }
518
+ Expr::BinaryExpr(binary_expr) => Ok(entity_pk_constraint_from_binary_filter(
519
+ binary_expr,
520
+ &self.primary_key_columns,
521
+ )),
522
+ Expr::InList(in_list) => Ok(entity_pk_constraint_from_in_list_filter(
523
+ in_list,
524
+ &self.primary_key_columns,
525
+ )),
526
+ _ => Ok(None),
527
+ }
528
+ }
529
+ }
530
+
531
+ #[derive(Debug, Clone, PartialEq, Eq)]
532
+ enum EntityPkConstraint {
533
+ Full(BTreeSet<EntityPk>),
534
+ Parts(BTreeMap<String, BTreeSet<String>>),
535
+ }
536
+
537
+ impl EntityPkConstraint {
538
+ fn intersect(self, other: Self, primary_key_columns: &[&str]) -> Self {
539
+ match (self, other) {
540
+ (Self::Full(left), Self::Full(right)) => {
541
+ Self::Full(left.intersection(&right).cloned().collect())
542
+ }
543
+ (Self::Full(ids), Self::Parts(parts)) | (Self::Parts(parts), Self::Full(ids)) => {
544
+ Self::Full(
545
+ ids.into_iter()
546
+ .filter(|identity| {
547
+ identity_matches_parts(identity, primary_key_columns, &parts)
548
+ })
549
+ .collect(),
550
+ )
551
+ }
552
+ (Self::Parts(mut left), Self::Parts(right)) => {
553
+ for (column, right_values) in right {
554
+ left.entry(column)
555
+ .and_modify(|left_values| {
556
+ *left_values =
557
+ left_values.intersection(&right_values).cloned().collect();
558
+ })
559
+ .or_insert(right_values);
560
+ }
561
+ Self::Parts(left)
562
+ }
563
+ }
564
+ }
565
+
566
+ fn into_entity_pks(self, primary_key_columns: &[&str]) -> Option<BTreeSet<EntityPk>> {
567
+ match self {
568
+ Self::Full(ids) => Some(ids),
569
+ Self::Parts(parts) => entity_pks_from_primary_key_parts(primary_key_columns, parts),
570
+ }
571
+ }
572
+ }
573
+
574
+ fn string_primary_key_columns(spec: &EntitySurfaceSpec) -> Vec<&str> {
575
+ spec.primary_key_paths
576
+ .iter()
577
+ .map(|path| {
578
+ let [column_name] = path.as_slice() else {
579
+ return None;
580
+ };
581
+ let column = spec.visible_column(column_name)?;
582
+ (column.column_type == EntityColumnType::String).then_some(column.name.as_str())
583
+ })
584
+ .collect::<Option<Vec<_>>>()
585
+ .unwrap_or_default()
586
+ }
587
+
588
+ fn entity_pk_constraint_from_binary_filter(
589
+ binary_expr: &BinaryExpr,
590
+ primary_key_columns: &[&str],
591
+ ) -> Option<EntityPkConstraint> {
592
+ if binary_expr.op != Operator::Eq {
593
+ return None;
594
+ }
595
+ entity_pk_constraint_from_column_literal_filter(
596
+ &binary_expr.left,
597
+ &binary_expr.right,
598
+ primary_key_columns,
599
+ )
600
+ .or_else(|| {
601
+ entity_pk_constraint_from_column_literal_filter(
602
+ &binary_expr.right,
603
+ &binary_expr.left,
604
+ primary_key_columns,
605
+ )
606
+ })
607
+ }
608
+
609
+ fn entity_pk_constraint_from_in_list_filter(
610
+ in_list: &InList,
611
+ primary_key_columns: &[&str],
612
+ ) -> Option<EntityPkConstraint> {
613
+ if in_list.negated {
614
+ return None;
615
+ }
616
+ let Expr::Column(column) = in_list.expr.as_ref() else {
617
+ return None;
618
+ };
619
+ let values = in_list
620
+ .list
621
+ .iter()
622
+ .map(string_expr_literal)
623
+ .collect::<Option<Vec<_>>>()?;
624
+ if values.is_empty() {
625
+ return None;
626
+ }
627
+ match column.name.as_str() {
628
+ "lixcol_entity_pk" => values
629
+ .into_iter()
630
+ .map(|value| EntityPk::from_json_array_text(&value).ok())
631
+ .collect::<Option<BTreeSet<_>>>()
632
+ .map(EntityPkConstraint::Full),
633
+ column_name if primary_key_columns.contains(&column_name) => {
634
+ Some(EntityPkConstraint::Parts(BTreeMap::from([(
635
+ column_name.to_string(),
636
+ values.into_iter().collect(),
637
+ )])))
638
+ }
639
+ _ => None,
640
+ }
641
+ }
642
+
643
+ fn entity_pk_constraint_from_column_literal_filter(
644
+ column_expr: &Expr,
645
+ literal_expr: &Expr,
646
+ primary_key_columns: &[&str],
647
+ ) -> Option<EntityPkConstraint> {
648
+ let Expr::Column(column) = column_expr else {
649
+ return None;
650
+ };
651
+ let value = string_expr_literal(literal_expr)?;
652
+ match column.name.as_str() {
653
+ "lixcol_entity_pk" => EntityPk::from_json_array_text(&value)
654
+ .ok()
655
+ .map(|identity| EntityPkConstraint::Full(BTreeSet::from([identity]))),
656
+ column_name if primary_key_columns.contains(&column_name) => {
657
+ Some(EntityPkConstraint::Parts(BTreeMap::from([(
658
+ column_name.to_string(),
659
+ BTreeSet::from([value]),
660
+ )])))
661
+ }
662
+ _ => None,
663
+ }
664
+ }
665
+
666
+ fn entity_pks_from_primary_key_parts(
667
+ primary_key_columns: &[&str],
668
+ parts: BTreeMap<String, BTreeSet<String>>,
669
+ ) -> Option<BTreeSet<EntityPk>> {
670
+ if primary_key_columns
671
+ .iter()
672
+ .any(|column| !parts.contains_key(*column))
673
+ {
674
+ return None;
675
+ }
676
+
677
+ let mut identities = BTreeSet::from([Vec::<String>::new()]);
678
+ for column in primary_key_columns {
679
+ let values = parts.get(*column)?;
680
+ identities = identities
681
+ .into_iter()
682
+ .flat_map(|prefix| {
683
+ values.iter().map(move |value| {
684
+ let mut parts = prefix.clone();
685
+ parts.push(value.clone());
686
+ parts
687
+ })
688
+ })
689
+ .collect();
690
+ }
691
+ Some(
692
+ identities
693
+ .into_iter()
694
+ .map(|parts| EntityPk { parts })
695
+ .collect(),
696
+ )
697
+ }
698
+
699
+ fn identity_matches_parts(
700
+ identity: &EntityPk,
701
+ primary_key_columns: &[&str],
702
+ parts: &BTreeMap<String, BTreeSet<String>>,
703
+ ) -> bool {
704
+ let identity_parts = identity.parts.as_slice();
705
+ primary_key_columns
706
+ .iter()
707
+ .zip(identity_parts.iter())
708
+ .all(|(column, value)| {
709
+ parts
710
+ .get(*column)
711
+ .is_none_or(|values| values.contains(value))
712
+ })
713
+ }
714
+
715
+ fn string_expr_literal(expr: &Expr) -> Option<String> {
716
+ let Expr::Literal(literal, _) = expr else {
717
+ return None;
718
+ };
719
+ match literal {
720
+ ScalarValue::Utf8(Some(value))
721
+ | ScalarValue::Utf8View(Some(value))
722
+ | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
723
+ _ => None,
724
+ }
725
+ }
726
+
727
+ struct EntityScanExec {
728
+ spec: Arc<EntitySurfaceSpec>,
729
+ live_state: Arc<dyn LiveStateReader>,
730
+ schema: SchemaRef,
731
+ request: LiveStateScanRequest,
732
+ properties: Arc<PlanProperties>,
733
+ }
734
+
735
+ impl std::fmt::Debug for EntityScanExec {
736
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
737
+ f.debug_struct("EntityScanExec")
738
+ .field("schema_key", &self.spec.schema_key)
739
+ .finish()
740
+ }
741
+ }
742
+
743
+ impl EntityScanExec {
744
+ fn new(
745
+ spec: Arc<EntitySurfaceSpec>,
746
+ live_state: Arc<dyn LiveStateReader>,
747
+ schema: SchemaRef,
748
+ request: LiveStateScanRequest,
749
+ ) -> Self {
750
+ let properties = PlanProperties::new(
751
+ EquivalenceProperties::new(Arc::clone(&schema)),
752
+ Partitioning::UnknownPartitioning(1),
753
+ EmissionType::Incremental,
754
+ Boundedness::Bounded,
755
+ );
756
+ Self {
757
+ spec,
758
+ live_state,
759
+ schema,
760
+ request,
761
+ properties: Arc::new(properties),
762
+ }
763
+ }
764
+ }
765
+
766
+ impl DisplayAs for EntityScanExec {
767
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
768
+ match t {
769
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
770
+ write!(
771
+ f,
772
+ "EntityScanExec(schema_key={}, limit={:?})",
773
+ self.spec.schema_key, self.request.limit
774
+ )
775
+ }
776
+ DisplayFormatType::TreeRender => write!(f, "EntityScanExec"),
777
+ }
778
+ }
779
+ }
780
+
781
+ impl ExecutionPlan for EntityScanExec {
782
+ fn name(&self) -> &str {
783
+ "EntityScanExec"
784
+ }
785
+
786
+ fn as_any(&self) -> &dyn Any {
787
+ self
788
+ }
789
+
790
+ fn properties(&self) -> &Arc<PlanProperties> {
791
+ &self.properties
792
+ }
793
+
794
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
795
+ Vec::new()
796
+ }
797
+
798
+ fn with_new_children(
799
+ self: Arc<Self>,
800
+ children: Vec<Arc<dyn ExecutionPlan>>,
801
+ ) -> Result<Arc<dyn ExecutionPlan>> {
802
+ if !children.is_empty() {
803
+ return Err(DataFusionError::Execution(
804
+ "EntityScanExec does not accept children".to_string(),
805
+ ));
806
+ }
807
+ Ok(self)
808
+ }
809
+
810
+ fn execute(
811
+ &self,
812
+ partition: usize,
813
+ _context: Arc<TaskContext>,
814
+ ) -> Result<SendableRecordBatchStream> {
815
+ if partition != 0 {
816
+ return Err(DataFusionError::Execution(format!(
817
+ "EntityScanExec only exposes one partition, got {partition}"
818
+ )));
819
+ }
820
+
821
+ let spec = Arc::clone(&self.spec);
822
+ let live_state = Arc::clone(&self.live_state);
823
+ let schema = Arc::clone(&self.schema);
824
+ let request = self.request.clone();
825
+ let stream_schema = Arc::clone(&schema);
826
+ let stream = stream::once(async move {
827
+ let rows = live_state
828
+ .scan_rows(&request)
829
+ .await
830
+ .map_err(lix_error_to_datafusion_error)?;
831
+ let batch = entity_record_batch(&spec, Arc::clone(&stream_schema), &rows)?;
832
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
833
+ batch,
834
+ )]))
835
+ })
836
+ .try_flatten();
837
+
838
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
839
+ }
840
+ }
841
+
842
+ fn entity_live_state_scan_request(
843
+ schema_key: &str,
844
+ active_branch_id: Option<&str>,
845
+ projected_schema: Option<&Schema>,
846
+ limit: Option<usize>,
847
+ ) -> LiveStateScanRequest {
848
+ LiveStateScanRequest {
849
+ filter: LiveStateFilter {
850
+ schema_keys: vec![schema_key.to_string()],
851
+ branch_ids: active_branch_id
852
+ .map(|branch_id| vec![branch_id.to_string()])
853
+ .unwrap_or_default(),
854
+ ..LiveStateFilter::default()
855
+ },
856
+ projection: entity_live_state_projection(projected_schema),
857
+ limit,
858
+ }
859
+ }
860
+
861
+ fn entity_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
862
+ let Some(schema) = projected_schema else {
863
+ return LiveStateProjection::default();
864
+ };
865
+ let mut columns = projection_column_names(schema);
866
+ if schema
867
+ .fields()
868
+ .iter()
869
+ .any(|field| !field.name().starts_with("lixcol_"))
870
+ && !columns.iter().any(|column| column == "snapshot_content")
871
+ {
872
+ columns.push("snapshot_content".to_string());
873
+ }
874
+ LiveStateProjection { columns }
875
+ }
876
+
877
+ fn projection_column_names(schema: &Schema) -> Vec<String> {
878
+ schema
879
+ .fields()
880
+ .iter()
881
+ .filter_map(|field| field.name().strip_prefix("lixcol_"))
882
+ .map(str::to_string)
883
+ .collect()
884
+ }
885
+
886
+ fn entity_record_batch(
887
+ spec: &EntitySurfaceSpec,
888
+ schema: SchemaRef,
889
+ rows: &[MaterializedLiveStateRow],
890
+ ) -> Result<RecordBatch> {
891
+ if schema.fields().is_empty() {
892
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
893
+ return RecordBatch::try_new_with_options(schema, vec![], &options)
894
+ .map_err(DataFusionError::from);
895
+ }
896
+
897
+ let snapshots = rows
898
+ .iter()
899
+ .map(|row| parse_snapshot(row.snapshot_content.as_deref()))
900
+ .collect::<Result<Vec<_>>>()?;
901
+
902
+ let columns = schema
903
+ .fields()
904
+ .iter()
905
+ .map(|field| entity_column_array(spec, field.name(), rows, &snapshots))
906
+ .collect::<Result<Vec<_>>>()?;
907
+
908
+ RecordBatch::try_new(schema, columns).map_err(DataFusionError::from)
909
+ }
910
+
911
+ fn entity_column_array(
912
+ spec: &EntitySurfaceSpec,
913
+ column_name: &str,
914
+ rows: &[MaterializedLiveStateRow],
915
+ snapshots: &[Option<JsonValue>],
916
+ ) -> Result<ArrayRef> {
917
+ if let Some(property_name) = column_name.strip_prefix("lixcol_") {
918
+ return entity_system_column_array(property_name, rows);
919
+ }
920
+
921
+ let column_type = spec
922
+ .visible_column(column_name)
923
+ .ok_or_else(|| {
924
+ DataFusionError::Execution(format!(
925
+ "sql2 entity provider '{}' does not expose column '{}'",
926
+ spec.schema_key, column_name
927
+ ))
928
+ })?
929
+ .column_type;
930
+
931
+ let values = snapshots
932
+ .iter()
933
+ .map(|snapshot| snapshot.as_ref().and_then(|value| value.get(column_name)))
934
+ .collect::<Vec<_>>();
935
+ Ok(match column_type {
936
+ EntityColumnType::String | EntityColumnType::Json => Arc::new(StringArray::from(
937
+ values
938
+ .iter()
939
+ .map(|value| entity_json_text_value(*value, column_type))
940
+ .collect::<Result<Vec<_>>>()?,
941
+ )) as ArrayRef,
942
+ EntityColumnType::Integer => Arc::new(Int64Array::from(
943
+ values
944
+ .iter()
945
+ .map(|value| entity_i64_value(*value))
946
+ .collect::<Vec<_>>(),
947
+ )) as ArrayRef,
948
+ EntityColumnType::Number => Arc::new(Float64Array::from(
949
+ values
950
+ .iter()
951
+ .map(|value| entity_f64_value(*value))
952
+ .collect::<Vec<_>>(),
953
+ )) as ArrayRef,
954
+ EntityColumnType::Boolean => Arc::new(BooleanArray::from(
955
+ values
956
+ .iter()
957
+ .map(|value| value.and_then(JsonValue::as_bool))
958
+ .collect::<Vec<_>>(),
959
+ )) as ArrayRef,
960
+ })
961
+ }
962
+
963
+ fn entity_system_column_array(
964
+ column_name: &str,
965
+ rows: &[MaterializedLiveStateRow],
966
+ ) -> Result<ArrayRef> {
967
+ Ok(match column_name {
968
+ "entity_pk" => Arc::new(StringArray::from(
969
+ rows.iter()
970
+ .map(|row| {
971
+ row.entity_pk
972
+ .as_json_array_text()
973
+ .map(Some)
974
+ .map_err(lix_error_to_datafusion_error)
975
+ })
976
+ .collect::<Result<Vec<_>>>()?,
977
+ )) as ArrayRef,
978
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
979
+ "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
980
+ "snapshot_content" => string_array(rows.iter().map(|row| row.snapshot_content.as_deref())),
981
+ "metadata" => Arc::new(StringArray::from(
982
+ rows.iter()
983
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
984
+ .collect::<Vec<_>>(),
985
+ )) as ArrayRef,
986
+ "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
987
+ "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
988
+ "global" => Arc::new(BooleanArray::from(
989
+ rows.iter().map(|row| row.global).collect::<Vec<_>>(),
990
+ )) as ArrayRef,
991
+ "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
992
+ "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
993
+ "untracked" => Arc::new(BooleanArray::from(
994
+ rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
995
+ )) as ArrayRef,
996
+ "branch_id" => string_array(rows.iter().map(|row| Some(row.branch_id.as_str()))),
997
+ other => {
998
+ return Err(DataFusionError::Execution(format!(
999
+ "sql2 entity provider does not support system column 'lixcol_{other}'"
1000
+ )))
1001
+ }
1002
+ })
1003
+ }
1004
+
1005
+ pub(super) fn parse_snapshot(snapshot_content: Option<&str>) -> Result<Option<JsonValue>> {
1006
+ snapshot_content
1007
+ .map(|snapshot| {
1008
+ serde_json::from_str::<JsonValue>(snapshot).map_err(|error| {
1009
+ DataFusionError::Execution(format!(
1010
+ "sql2 entity provider expected valid snapshot_content JSON: {error}"
1011
+ ))
1012
+ })
1013
+ })
1014
+ .transpose()
1015
+ }
1016
+
1017
+ pub(super) fn entity_json_text_value(
1018
+ value: Option<&JsonValue>,
1019
+ column_type: EntityColumnType,
1020
+ ) -> Result<Option<String>> {
1021
+ Ok(match (column_type, value) {
1022
+ (_, None) | (_, Some(JsonValue::Null)) => None,
1023
+ (EntityColumnType::String, Some(JsonValue::Bool(value))) => Some(if *value {
1024
+ "true".to_string()
1025
+ } else {
1026
+ "false".to_string()
1027
+ }),
1028
+ (EntityColumnType::String, Some(JsonValue::String(value))) => Some(value.clone()),
1029
+ (EntityColumnType::String, Some(other)) => Some(json_to_string(other)?),
1030
+ (EntityColumnType::Json, Some(other)) => Some(json_to_string(other)?),
1031
+ _ => None,
1032
+ })
1033
+ }
1034
+
1035
+ pub(super) fn entity_i64_value(value: Option<&JsonValue>) -> Option<i64> {
1036
+ match value {
1037
+ Some(JsonValue::Number(number)) => number.as_i64(),
1038
+ Some(JsonValue::String(value)) => value.parse::<i64>().ok(),
1039
+ _ => None,
1040
+ }
1041
+ }
1042
+
1043
+ pub(super) fn entity_f64_value(value: Option<&JsonValue>) -> Option<f64> {
1044
+ match value {
1045
+ Some(JsonValue::Number(number)) => number.as_f64(),
1046
+ Some(JsonValue::String(value)) => value.parse::<f64>().ok(),
1047
+ _ => None,
1048
+ }
1049
+ }
1050
+
1051
+ fn json_to_string(value: &JsonValue) -> Result<String> {
1052
+ serde_json::to_string(value).map_err(|error| {
1053
+ DataFusionError::Execution(format!("failed to render JSON value: {error}"))
1054
+ })
1055
+ }
1056
+
1057
+ pub(super) fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1058
+ let values = values
1059
+ .map(|value| value.map(ToOwned::to_owned))
1060
+ .collect::<Vec<_>>();
1061
+ Arc::new(StringArray::from(values)) as ArrayRef
1062
+ }
1063
+
1064
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1065
+ let Some(projection) = projection else {
1066
+ return Ok(Arc::clone(schema));
1067
+ };
1068
+ Ok(Arc::new(schema.project(projection)?))
1069
+ }
1070
+
1071
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1072
+ crate::sql2::error::datafusion_error_to_lix_error(error)
1073
+ }
1074
+
1075
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1076
+ DataFusionError::External(Box::new(error))
1077
+ }
1078
+
1079
+ #[cfg(test)]
1080
+ mod tests {
1081
+ use std::sync::Arc;
1082
+
1083
+ use async_trait::async_trait;
1084
+ use datafusion::arrow::array::{Float64Array, Int64Array};
1085
+ use datafusion::common::{Column, ScalarValue};
1086
+ use datafusion::logical_expr::expr::InList;
1087
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
1088
+ use serde_json::json;
1089
+
1090
+ use super::entity_record_batch;
1091
+ use crate::branch::{BranchHead, BranchRefReader};
1092
+ use crate::live_state::{
1093
+ LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
1094
+ };
1095
+ use crate::sql2::catalog::{
1096
+ derive_entity_surface_spec_from_schema, entity_surface_schema,
1097
+ schema_exposed_as_entity_surface, EntityColumnType, EntitySurfaceShape,
1098
+ };
1099
+ use crate::LixError;
1100
+
1101
+ struct EmptyLiveStateReader;
1102
+ struct EmptyBranchRefReader;
1103
+
1104
+ #[async_trait]
1105
+ impl LiveStateReader for EmptyLiveStateReader {
1106
+ async fn scan_rows(
1107
+ &self,
1108
+ _request: &LiveStateScanRequest,
1109
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1110
+ Ok(vec![])
1111
+ }
1112
+
1113
+ async fn load_row(
1114
+ &self,
1115
+ _request: &LiveStateRowRequest,
1116
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1117
+ Ok(None)
1118
+ }
1119
+ }
1120
+
1121
+ #[async_trait]
1122
+ impl BranchRefReader for EmptyBranchRefReader {
1123
+ async fn load_head(&self, _branch_id: &str) -> Result<Option<BranchHead>, LixError> {
1124
+ Ok(None)
1125
+ }
1126
+
1127
+ async fn scan_heads(&self) -> Result<Vec<BranchHead>, LixError> {
1128
+ Ok(Vec::new())
1129
+ }
1130
+ }
1131
+
1132
+ fn empty_branch_ref() -> Arc<dyn BranchRefReader> {
1133
+ Arc::new(EmptyBranchRefReader)
1134
+ }
1135
+
1136
+ fn live_row() -> MaterializedLiveStateRow {
1137
+ MaterializedLiveStateRow {
1138
+ entity_pk: crate::entity_pk::EntityPk::single("entity-1"),
1139
+ schema_key: "project_message".to_string(),
1140
+ file_id: None,
1141
+ snapshot_content: Some(
1142
+ "{\"body\":\"hello\",\"rating\":4.5,\"count\":7,\"enabled\":true,\"meta\":{\"x\":1}}"
1143
+ .to_string(),
1144
+ ),
1145
+ metadata: Some(json!({"source": "test"}).to_string()),
1146
+ deleted: false,
1147
+ branch_id: "branch-a".to_string(),
1148
+ change_id: Some("change-a".to_string()),
1149
+ commit_id: Some("commit-a".to_string()),
1150
+ global: false,
1151
+ untracked: false,
1152
+ created_at: "2026-04-23T00:00:00Z".to_string(),
1153
+ updated_at: "2026-04-23T01:00:00Z".to_string(),
1154
+ }
1155
+ }
1156
+
1157
+ fn entity_insert_spec_with_primary_key() -> Arc<super::EntitySurfaceSpec> {
1158
+ Arc::new(
1159
+ derive_entity_surface_spec_from_schema(&json!({
1160
+ "x-lix-key": "project_message",
1161
+ "x-lix-primary-key": ["/id"],
1162
+ "type": "object",
1163
+ "properties": {
1164
+ "id": { "type": "string" },
1165
+ "body": { "type": "string" }
1166
+ },
1167
+ "required": ["id", "body"]
1168
+ }))
1169
+ .expect("schema should derive entity surface spec"),
1170
+ )
1171
+ }
1172
+
1173
+ fn string_literal(value: &str) -> Expr {
1174
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
1175
+ }
1176
+
1177
+ fn column(name: &str) -> Expr {
1178
+ Expr::Column(Column::from_name(name))
1179
+ }
1180
+
1181
+ fn eq_filter(column_name: &str, value: &str) -> Expr {
1182
+ Expr::BinaryExpr(BinaryExpr::new(
1183
+ Box::new(column(column_name)),
1184
+ Operator::Eq,
1185
+ Box::new(string_literal(value)),
1186
+ ))
1187
+ }
1188
+
1189
+ #[test]
1190
+ fn excludes_non_entity_builtin_session_surfaces() {
1191
+ assert!(!schema_exposed_as_entity_surface("lix_active_account"));
1192
+ assert!(schema_exposed_as_entity_surface("project_message"));
1193
+ }
1194
+
1195
+ #[test]
1196
+ fn derives_entity_surface_spec_from_schema_definition() {
1197
+ let spec = derive_entity_surface_spec_from_schema(&json!({
1198
+ "x-lix-key": "project_message",
1199
+ "type": "object",
1200
+ "properties": {
1201
+ "body": { "type": "string" },
1202
+ "rating": { "type": "number" },
1203
+ "meta": { "type": "object" },
1204
+ "lixcol_entity_pk": { "type": "string" }
1205
+ }
1206
+ }))
1207
+ .expect("schema should derive entity surface spec");
1208
+
1209
+ assert_eq!(spec.schema_key, "project_message");
1210
+ assert_eq!(
1211
+ spec.visible_column_names().collect::<Vec<_>>(),
1212
+ vec!["body", "meta", "rating"]
1213
+ );
1214
+ assert_eq!(
1215
+ spec.visible_column("body").map(|column| column.column_type),
1216
+ Some(EntityColumnType::String)
1217
+ );
1218
+ assert_eq!(
1219
+ spec.visible_column("rating")
1220
+ .map(|column| column.column_type),
1221
+ Some(EntityColumnType::Number)
1222
+ );
1223
+ assert_eq!(
1224
+ spec.visible_column("meta").map(|column| column.column_type),
1225
+ Some(EntityColumnType::Json)
1226
+ );
1227
+ assert!(spec.visible_column("lixcol_entity_pk").is_none());
1228
+ }
1229
+
1230
+ #[test]
1231
+ fn entity_surface_spec_rejects_properties_without_projection_type() {
1232
+ let error = derive_entity_surface_spec_from_schema(&json!({
1233
+ "x-lix-key": "project_message",
1234
+ "x-lix-primary-key": ["/id"],
1235
+ "type": "object",
1236
+ "properties": {
1237
+ "id": { "type": "string" },
1238
+ "kind": {}
1239
+ },
1240
+ "required": ["id", "kind"],
1241
+ "additionalProperties": false
1242
+ }))
1243
+ .expect_err("unprojectable property should be rejected");
1244
+
1245
+ assert_eq!(error.code, LixError::CODE_SCHEMA_DEFINITION);
1246
+ assert!(
1247
+ error.message.contains("property '/kind'"),
1248
+ "error should identify the property: {error:?}"
1249
+ );
1250
+ }
1251
+
1252
+ #[test]
1253
+ fn by_branch_schema_includes_branch_system_column() {
1254
+ let spec = derive_entity_surface_spec_from_schema(&json!({
1255
+ "x-lix-key": "project_message",
1256
+ "type": "object",
1257
+ "properties": {
1258
+ "body": { "type": "string" }
1259
+ }
1260
+ }))
1261
+ .expect("schema should derive entity surface spec");
1262
+
1263
+ let schema = entity_surface_schema(&spec, EntitySurfaceShape::ByBranch);
1264
+ assert!(schema.field_with_name("body").is_ok());
1265
+ assert!(schema.field_with_name("lixcol_entity_pk").is_ok());
1266
+ assert!(schema.field_with_name("lixcol_branch_id").is_ok());
1267
+ }
1268
+
1269
+ #[test]
1270
+ fn active_schema_excludes_branch_system_column() {
1271
+ let spec = derive_entity_surface_spec_from_schema(&json!({
1272
+ "x-lix-key": "project_message",
1273
+ "type": "object",
1274
+ "properties": {
1275
+ "body": { "type": "string" }
1276
+ }
1277
+ }))
1278
+ .expect("schema should derive entity surface spec");
1279
+
1280
+ let schema = entity_surface_schema(&spec, EntitySurfaceShape::Active);
1281
+ assert!(schema.field_with_name("body").is_ok());
1282
+ assert!(schema.field_with_name("lixcol_entity_pk").is_ok());
1283
+ assert!(schema.field_with_name("lixcol_branch_id").is_err());
1284
+ }
1285
+
1286
+ #[test]
1287
+ fn insert_schema_allows_defaulted_identity_columns_to_be_omitted() {
1288
+ let spec = derive_entity_surface_spec_from_schema(&json!({
1289
+ "x-lix-key": "project_message",
1290
+ "x-lix-primary-key": ["/id"],
1291
+ "type": "object",
1292
+ "properties": {
1293
+ "id": { "type": "string", "x-lix-default": "lix_uuid_v7()" },
1294
+ "body": { "type": "string" }
1295
+ }
1296
+ }))
1297
+ .expect("schema should derive entity surface spec");
1298
+
1299
+ let schema = entity_surface_schema(&spec, EntitySurfaceShape::Active);
1300
+ assert!(
1301
+ schema
1302
+ .field_with_name("id")
1303
+ .expect("id field")
1304
+ .is_nullable(),
1305
+ "defaulted primary-key property should be nullable at SQL input"
1306
+ );
1307
+ assert!(
1308
+ schema
1309
+ .field_with_name("lixcol_entity_pk")
1310
+ .expect("entity pk field")
1311
+ .is_nullable(),
1312
+ "opaque identity projection should be nullable for normal primary-key inserts"
1313
+ );
1314
+ }
1315
+
1316
+ #[test]
1317
+ fn record_batch_projects_payload_and_system_columns() {
1318
+ let spec = Arc::new(
1319
+ derive_entity_surface_spec_from_schema(&json!({
1320
+ "x-lix-key": "project_message",
1321
+ "type": "object",
1322
+ "properties": {
1323
+ "body": { "type": "string" },
1324
+ "rating": { "type": "number" },
1325
+ "count": { "type": "integer" },
1326
+ "enabled": { "type": "boolean" },
1327
+ "meta": { "type": "object" }
1328
+ }
1329
+ }))
1330
+ .expect("schema should derive entity surface spec"),
1331
+ );
1332
+ let schema = entity_surface_schema(&spec, EntitySurfaceShape::ByBranch);
1333
+
1334
+ let batch =
1335
+ entity_record_batch(&spec, schema, &[live_row()]).expect("entity batch should build");
1336
+
1337
+ assert_eq!(batch.num_rows(), 1);
1338
+ assert_eq!(
1339
+ batch
1340
+ .column_by_name("body")
1341
+ .expect("body column")
1342
+ .as_any()
1343
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
1344
+ .expect("body is string")
1345
+ .value(0),
1346
+ "hello"
1347
+ );
1348
+ assert_eq!(
1349
+ batch
1350
+ .column_by_name("rating")
1351
+ .expect("rating column")
1352
+ .as_any()
1353
+ .downcast_ref::<Float64Array>()
1354
+ .expect("rating is f64")
1355
+ .value(0),
1356
+ 4.5
1357
+ );
1358
+ assert_eq!(
1359
+ batch
1360
+ .column_by_name("count")
1361
+ .expect("count column")
1362
+ .as_any()
1363
+ .downcast_ref::<Int64Array>()
1364
+ .expect("count is i64")
1365
+ .value(0),
1366
+ 7
1367
+ );
1368
+ assert_eq!(
1369
+ batch
1370
+ .column_by_name("lixcol_entity_pk")
1371
+ .expect("entity pk column")
1372
+ .as_any()
1373
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
1374
+ .expect("entity pk is string")
1375
+ .value(0),
1376
+ "[\"entity-1\"]"
1377
+ );
1378
+ assert_eq!(
1379
+ batch
1380
+ .column_by_name("lixcol_branch_id")
1381
+ .expect("branch id column")
1382
+ .as_any()
1383
+ .downcast_ref::<datafusion::arrow::array::StringArray>()
1384
+ .expect("branch id is string")
1385
+ .value(0),
1386
+ "branch-a"
1387
+ );
1388
+ }
1389
+
1390
+ #[tokio::test]
1391
+ async fn provider_registers_as_table_provider() {
1392
+ let spec = Arc::new(
1393
+ derive_entity_surface_spec_from_schema(&json!({
1394
+ "x-lix-key": "project_message",
1395
+ "type": "object",
1396
+ "properties": {
1397
+ "body": { "type": "string" }
1398
+ }
1399
+ }))
1400
+ .expect("schema should derive entity surface spec"),
1401
+ );
1402
+ let provider = super::EntityProvider::by_branch(
1403
+ spec,
1404
+ Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>,
1405
+ empty_branch_ref(),
1406
+ );
1407
+
1408
+ assert!(provider.schema.field_with_name("lixcol_branch_id").is_ok());
1409
+ }
1410
+
1411
+ #[test]
1412
+ fn primary_key_filters_route_entity_pks_for_string_primary_key() {
1413
+ let spec = entity_insert_spec_with_primary_key();
1414
+ let filters = vec![
1415
+ eq_filter("id", "entity-a"),
1416
+ Expr::InList(InList::new(
1417
+ Box::new(column("id")),
1418
+ vec![string_literal("entity-b"), string_literal("entity-a")],
1419
+ false,
1420
+ )),
1421
+ ];
1422
+
1423
+ let entity_pks = super::entity_pks_from_primary_key_filters(&spec, &filters)
1424
+ .expect("primary-key filters should analyze")
1425
+ .expect("primary-key filters should produce a constraint");
1426
+
1427
+ assert_eq!(
1428
+ entity_pks,
1429
+ vec![crate::entity_pk::EntityPk::single("entity-a")]
1430
+ );
1431
+ }
1432
+
1433
+ #[test]
1434
+ fn primary_key_filter_analyzer_models_boolean_predicates() {
1435
+ let spec = entity_insert_spec_with_primary_key();
1436
+ let analyzer = super::EntityPrimaryKeyFilterAnalyzer::new(&spec);
1437
+ let disjunction = Expr::BinaryExpr(BinaryExpr::new(
1438
+ Box::new(eq_filter("id", "entity-a")),
1439
+ Operator::Or,
1440
+ Box::new(eq_filter("id", "entity-b")),
1441
+ ));
1442
+ let contradiction = Expr::BinaryExpr(BinaryExpr::new(
1443
+ Box::new(eq_filter("id", "entity-a")),
1444
+ Operator::And,
1445
+ Box::new(eq_filter("id", "entity-b")),
1446
+ ));
1447
+
1448
+ let disjunction_ids = analyzer
1449
+ .analyze(&disjunction)
1450
+ .expect("OR should analyze")
1451
+ .expect("OR should produce an entity-pk set");
1452
+ let contradiction_ids = analyzer
1453
+ .analyze(&contradiction)
1454
+ .expect("AND should analyze")
1455
+ .expect("AND should produce an entity-pk set");
1456
+
1457
+ assert_eq!(
1458
+ disjunction_ids.into_iter().collect::<Vec<_>>(),
1459
+ vec![
1460
+ crate::entity_pk::EntityPk::single("entity-a"),
1461
+ crate::entity_pk::EntityPk::single("entity-b"),
1462
+ ]
1463
+ );
1464
+ assert!(contradiction_ids.is_empty());
1465
+ }
1466
+
1467
+ #[test]
1468
+ fn primary_key_filters_ignore_non_key_and_negated_predicates() {
1469
+ let spec = entity_insert_spec_with_primary_key();
1470
+ let filters = vec![
1471
+ eq_filter("body", "hello"),
1472
+ Expr::InList(InList::new(
1473
+ Box::new(column("id")),
1474
+ vec![string_literal("entity-a")],
1475
+ true,
1476
+ )),
1477
+ ];
1478
+
1479
+ assert!(super::entity_pks_from_primary_key_filters(&spec, &filters)
1480
+ .expect("ignored filters should analyze")
1481
+ .unwrap_or_default()
1482
+ .is_empty());
1483
+ }
1484
+ }