@lix-js/sdk 0.6.0-preview.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +38 -207
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +38 -71
  16. package/SKILL.md +0 -507
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -833
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -27
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -359
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/capabilities.rs +0 -67
  31. package/dist-engine-src/src/backend/conformance/baseline.rs +0 -1127
  32. package/dist-engine-src/src/backend/conformance/factory.rs +0 -93
  33. package/dist-engine-src/src/backend/conformance/failure_tests.rs +0 -608
  34. package/dist-engine-src/src/backend/conformance/fixtures.rs +0 -26
  35. package/dist-engine-src/src/backend/conformance/mod.rs +0 -75
  36. package/dist-engine-src/src/backend/conformance/model.rs +0 -28
  37. package/dist-engine-src/src/backend/conformance/model_based.rs +0 -257
  38. package/dist-engine-src/src/backend/conformance/persistence.rs +0 -204
  39. package/dist-engine-src/src/backend/conformance/projection.rs +0 -21
  40. package/dist-engine-src/src/backend/conformance/pushdown.rs +0 -24
  41. package/dist-engine-src/src/backend/conformance/runner.rs +0 -90
  42. package/dist-engine-src/src/backend/conformance/scan.rs +0 -24
  43. package/dist-engine-src/src/backend/conformance/write.rs +0 -16
  44. package/dist-engine-src/src/backend/error.rs +0 -94
  45. package/dist-engine-src/src/backend/in_memory.rs +0 -670
  46. package/dist-engine-src/src/backend/mod.rs +0 -39
  47. package/dist-engine-src/src/backend/predicate.rs +0 -80
  48. package/dist-engine-src/src/backend/traits.rs +0 -260
  49. package/dist-engine-src/src/backend/types.rs +0 -239
  50. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  51. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  52. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  53. package/dist-engine-src/src/binary_cas/kv.rs +0 -1038
  54. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  55. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  56. package/dist-engine-src/src/branch/context.rs +0 -40
  57. package/dist-engine-src/src/branch/lifecycle.rs +0 -221
  58. package/dist-engine-src/src/branch/mod.rs +0 -13
  59. package/dist-engine-src/src/branch/refs.rs +0 -321
  60. package/dist-engine-src/src/branch/stage_rows.rs +0 -67
  61. package/dist-engine-src/src/branch/types.rs +0 -21
  62. package/dist-engine-src/src/catalog/context.rs +0 -412
  63. package/dist-engine-src/src/catalog/mod.rs +0 -10
  64. package/dist-engine-src/src/catalog/schema.rs +0 -4
  65. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  66. package/dist-engine-src/src/cel/context.rs +0 -86
  67. package/dist-engine-src/src/cel/error.rs +0 -19
  68. package/dist-engine-src/src/cel/mod.rs +0 -8
  69. package/dist-engine-src/src/cel/provider.rs +0 -9
  70. package/dist-engine-src/src/cel/runtime.rs +0 -167
  71. package/dist-engine-src/src/cel/value.rs +0 -50
  72. package/dist-engine-src/src/changelog/bench_support.rs +0 -785
  73. package/dist-engine-src/src/changelog/change.rs +0 -1
  74. package/dist-engine-src/src/changelog/codec.rs +0 -497
  75. package/dist-engine-src/src/changelog/commit.rs +0 -1
  76. package/dist-engine-src/src/changelog/context.rs +0 -1614
  77. package/dist-engine-src/src/changelog/mod.rs +0 -29
  78. package/dist-engine-src/src/changelog/store.rs +0 -163
  79. package/dist-engine-src/src/changelog/test_support.rs +0 -54
  80. package/dist-engine-src/src/changelog/types.rs +0 -213
  81. package/dist-engine-src/src/commit_graph/context.rs +0 -944
  82. package/dist-engine-src/src/commit_graph/mod.rs +0 -9
  83. package/dist-engine-src/src/commit_graph/types.rs +0 -89
  84. package/dist-engine-src/src/commit_graph/walker.rs +0 -786
  85. package/dist-engine-src/src/common/error.rs +0 -347
  86. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  87. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  88. package/dist-engine-src/src/common/identity.rs +0 -145
  89. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  90. package/dist-engine-src/src/common/metadata.rs +0 -40
  91. package/dist-engine-src/src/common/mod.rs +0 -23
  92. package/dist-engine-src/src/common/types.rs +0 -105
  93. package/dist-engine-src/src/common/wire.rs +0 -222
  94. package/dist-engine-src/src/domain.rs +0 -320
  95. package/dist-engine-src/src/engine.rs +0 -203
  96. package/dist-engine-src/src/entity_pk.rs +0 -402
  97. package/dist-engine-src/src/functions/context.rs +0 -296
  98. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  99. package/dist-engine-src/src/functions/mod.rs +0 -18
  100. package/dist-engine-src/src/functions/provider.rs +0 -130
  101. package/dist-engine-src/src/functions/state.rs +0 -335
  102. package/dist-engine-src/src/functions/types.rs +0 -37
  103. package/dist-engine-src/src/init.rs +0 -692
  104. package/dist-engine-src/src/json_store/compression.rs +0 -77
  105. package/dist-engine-src/src/json_store/context.rs +0 -172
  106. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  107. package/dist-engine-src/src/json_store/mod.rs +0 -38
  108. package/dist-engine-src/src/json_store/store.rs +0 -494
  109. package/dist-engine-src/src/json_store/types.rs +0 -212
  110. package/dist-engine-src/src/lib.rs +0 -92
  111. package/dist-engine-src/src/live_state/context.rs +0 -1883
  112. package/dist-engine-src/src/live_state/mod.rs +0 -21
  113. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  114. package/dist-engine-src/src/live_state/reader.rs +0 -23
  115. package/dist-engine-src/src/live_state/types.rs +0 -231
  116. package/dist-engine-src/src/live_state/visibility.rs +0 -666
  117. package/dist-engine-src/src/plugin/archive.rs +0 -438
  118. package/dist-engine-src/src/plugin/component.rs +0 -183
  119. package/dist-engine-src/src/plugin/install.rs +0 -619
  120. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  121. package/dist-engine-src/src/plugin/materializer.rs +0 -202
  122. package/dist-engine-src/src/plugin/mod.rs +0 -33
  123. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -119
  124. package/dist-engine-src/src/plugin/storage.rs +0 -74
  125. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  126. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  127. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  128. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  129. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  130. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +0 -34
  131. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +0 -48
  132. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  133. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  134. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  135. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  136. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  137. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  138. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  139. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  140. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  141. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  142. package/dist-engine-src/src/schema/builtin/mod.rs +0 -220
  143. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  144. package/dist-engine-src/src/schema/definition.json +0 -187
  145. package/dist-engine-src/src/schema/definition.rs +0 -742
  146. package/dist-engine-src/src/schema/key.rs +0 -138
  147. package/dist-engine-src/src/schema/mod.rs +0 -20
  148. package/dist-engine-src/src/schema/seed.rs +0 -14
  149. package/dist-engine-src/src/schema/tests.rs +0 -780
  150. package/dist-engine-src/src/session/context.rs +0 -1059
  151. package/dist-engine-src/src/session/create_branch.rs +0 -94
  152. package/dist-engine-src/src/session/execute.rs +0 -681
  153. package/dist-engine-src/src/session/merge/analysis.rs +0 -108
  154. package/dist-engine-src/src/session/merge/branch.rs +0 -417
  155. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  156. package/dist-engine-src/src/session/merge/mod.rs +0 -10
  157. package/dist-engine-src/src/session/merge/stats.rs +0 -61
  158. package/dist-engine-src/src/session/mod.rs +0 -30
  159. package/dist-engine-src/src/session/switch_branch.rs +0 -113
  160. package/dist-engine-src/src/session/transaction.rs +0 -557
  161. package/dist-engine-src/src/sql2/bind/classify.rs +0 -102
  162. package/dist-engine-src/src/sql2/bind/error.rs +0 -5
  163. package/dist-engine-src/src/sql2/bind/expr.rs +0 -29
  164. package/dist-engine-src/src/sql2/bind/mod.rs +0 -12
  165. package/dist-engine-src/src/sql2/bind/public_udf.rs +0 -306
  166. package/dist-engine-src/src/sql2/bind/read.rs +0 -65
  167. package/dist-engine-src/src/sql2/bind/statement.rs +0 -2236
  168. package/dist-engine-src/src/sql2/bind/table.rs +0 -273
  169. package/dist-engine-src/src/sql2/bind/write.rs +0 -86
  170. package/dist-engine-src/src/sql2/branch_scope.rs +0 -436
  171. package/dist-engine-src/src/sql2/catalog/capability.rs +0 -20
  172. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +0 -296
  173. package/dist-engine-src/src/sql2/catalog/mod.rs +0 -15
  174. package/dist-engine-src/src/sql2/catalog/registry.rs +0 -556
  175. package/dist-engine-src/src/sql2/catalog/schema.rs +0 -88
  176. package/dist-engine-src/src/sql2/catalog/surface.rs +0 -41
  177. package/dist-engine-src/src/sql2/change_materialization.rs +0 -122
  178. package/dist-engine-src/src/sql2/context.rs +0 -317
  179. package/dist-engine-src/src/sql2/dml.rs +0 -148
  180. package/dist-engine-src/src/sql2/error.rs +0 -215
  181. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +0 -1593
  182. package/dist-engine-src/src/sql2/exec/datafusion.rs +0 -5266
  183. package/dist-engine-src/src/sql2/exec/fast_write.rs +0 -82
  184. package/dist-engine-src/src/sql2/exec/mod.rs +0 -24
  185. package/dist-engine-src/src/sql2/exec/write.rs +0 -661
  186. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1485
  187. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  188. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  189. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  190. package/dist-engine-src/src/sql2/history_route.rs +0 -661
  191. package/dist-engine-src/src/sql2/mod.rs +0 -52
  192. package/dist-engine-src/src/sql2/optimize/datafusion.rs +0 -1
  193. package/dist-engine-src/src/sql2/optimize/mod.rs +0 -2
  194. package/dist-engine-src/src/sql2/optimize/simple_write.rs +0 -116
  195. package/dist-engine-src/src/sql2/parse/mod.rs +0 -69
  196. package/dist-engine-src/src/sql2/parse/normalize.rs +0 -1
  197. package/dist-engine-src/src/sql2/plan/branch_scope.rs +0 -24
  198. package/dist-engine-src/src/sql2/plan/mod.rs +0 -5
  199. package/dist-engine-src/src/sql2/plan/predicate.rs +0 -22
  200. package/dist-engine-src/src/sql2/plan/write.rs +0 -147
  201. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -504
  202. package/dist-engine-src/src/sql2/providers/branch.rs +0 -1206
  203. package/dist-engine-src/src/sql2/providers/change.rs +0 -445
  204. package/dist-engine-src/src/sql2/providers/directory.rs +0 -2422
  205. package/dist-engine-src/src/sql2/providers/directory_history.rs +0 -645
  206. package/dist-engine-src/src/sql2/providers/entity.rs +0 -1484
  207. package/dist-engine-src/src/sql2/providers/entity_history.rs +0 -452
  208. package/dist-engine-src/src/sql2/providers/file.rs +0 -3686
  209. package/dist-engine-src/src/sql2/providers/file_history.rs +0 -924
  210. package/dist-engine-src/src/sql2/providers/history.rs +0 -426
  211. package/dist-engine-src/src/sql2/providers/lix_state.rs +0 -2542
  212. package/dist-engine-src/src/sql2/providers/mod.rs +0 -508
  213. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  214. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  215. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  216. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  217. package/dist-engine-src/src/sql2/session.rs +0 -83
  218. package/dist-engine-src/src/sql2/storage/constraints.rs +0 -1
  219. package/dist-engine-src/src/sql2/storage/mod.rs +0 -1
  220. package/dist-engine-src/src/sql2/test_support/differential.rs +0 -712
  221. package/dist-engine-src/src/sql2/test_support/generators.rs +0 -354
  222. package/dist-engine-src/src/sql2/test_support/mod.rs +0 -2
  223. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  224. package/dist-engine-src/src/sql2/udfs/lix_active_branch_commit_id.rs +0 -53
  225. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  226. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  227. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  228. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  229. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  230. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  231. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  232. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  233. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -86
  234. package/dist-engine-src/src/sql2/write_normalization.rs +0 -368
  235. package/dist-engine-src/src/storage/conformance.rs +0 -399
  236. package/dist-engine-src/src/storage/context.rs +0 -620
  237. package/dist-engine-src/src/storage/mod.rs +0 -52
  238. package/dist-engine-src/src/storage/point.rs +0 -440
  239. package/dist-engine-src/src/storage/read_scope.rs +0 -67
  240. package/dist-engine-src/src/storage/reader.rs +0 -867
  241. package/dist-engine-src/src/storage/scan.rs +0 -784
  242. package/dist-engine-src/src/storage/spaces.rs +0 -236
  243. package/dist-engine-src/src/storage/stats.rs +0 -80
  244. package/dist-engine-src/src/storage/write_set.rs +0 -962
  245. package/dist-engine-src/src/storage_bench.rs +0 -171
  246. package/dist-engine-src/src/test_support.rs +0 -450
  247. package/dist-engine-src/src/tracked_state/bench_support.rs +0 -394
  248. package/dist-engine-src/src/tracked_state/codec.rs +0 -1183
  249. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +0 -358
  250. package/dist-engine-src/src/tracked_state/context.rs +0 -2801
  251. package/dist-engine-src/src/tracked_state/diff.rs +0 -2140
  252. package/dist-engine-src/src/tracked_state/merge.rs +0 -478
  253. package/dist-engine-src/src/tracked_state/mod.rs +0 -35
  254. package/dist-engine-src/src/tracked_state/row_materialization.rs +0 -275
  255. package/dist-engine-src/src/tracked_state/storage.rs +0 -427
  256. package/dist-engine-src/src/tracked_state/tree.rs +0 -3063
  257. package/dist-engine-src/src/tracked_state/types.rs +0 -238
  258. package/dist-engine-src/src/transaction/bench_support.rs +0 -407
  259. package/dist-engine-src/src/transaction/commit.rs +0 -1592
  260. package/dist-engine-src/src/transaction/context.rs +0 -1653
  261. package/dist-engine-src/src/transaction/mod.rs +0 -24
  262. package/dist-engine-src/src/transaction/normalization.rs +0 -877
  263. package/dist-engine-src/src/transaction/prep.rs +0 -37
  264. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -163
  265. package/dist-engine-src/src/transaction/staging.rs +0 -1525
  266. package/dist-engine-src/src/transaction/types.rs +0 -403
  267. package/dist-engine-src/src/transaction/validation.rs +0 -5766
  268. package/dist-engine-src/src/untracked_state/codec.rs +0 -615
  269. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  270. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  271. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  272. package/dist-engine-src/src/untracked_state/storage.rs +0 -898
  273. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  274. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,2542 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::BTreeSet;
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
7
- use datafusion::arrow::compute::{and, filter_record_batch};
8
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
9
- use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
10
- use datafusion::catalog::{Session, TableProvider};
11
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, SchemaExt};
12
- use datafusion::datasource::TableType;
13
- use datafusion::execution::TaskContext;
14
- use datafusion::logical_expr::dml::InsertOp;
15
- use datafusion::logical_expr::expr::InList;
16
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
17
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
18
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
19
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
20
- use datafusion::physical_plan::{
21
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
22
- };
23
- use datafusion::prelude::SessionContext;
24
- use datafusion::scalar::ScalarValue;
25
- use futures_util::{stream, TryStreamExt};
26
- use serde_json::Value as JsonValue;
27
-
28
- use crate::branch::BranchRefReader;
29
- use crate::entity_pk::EntityPk;
30
- use crate::live_state::MaterializedLiveStateRow;
31
- use crate::live_state::{
32
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateRowFilter, LiveStateScanRequest,
33
- };
34
- use crate::sql2::branch_scope::{resolve_provider_branch_ids, BranchBinding};
35
- use crate::sql2::dml::{InsertExec, InsertSink};
36
- use crate::sql2::read_only::reject_read_only_stage_rows;
37
- use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
38
- use crate::transaction::types::{TransactionJson, TransactionWriteRow};
39
- use crate::GLOBAL_BRANCH_ID;
40
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError, NullableKeyFilter};
41
-
42
- use crate::sql2::{
43
- SqlWriteContext, WriteAccess, WriteContextBranchRefReader, WriteContextLiveStateReader,
44
- };
45
- use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
46
-
47
- use crate::sql2::predicate_typecheck::{
48
- canonicalize_json_identity_text_filters, validate_json_predicate_filters,
49
- };
50
- use crate::sql2::result_metadata::json_field;
51
-
52
- pub(super) async fn register_lix_state_active_provider(
53
- session: &SessionContext,
54
- surface_name: &str,
55
- active_branch_id: &str,
56
- live_state: Arc<dyn LiveStateReader>,
57
- branch_ref: Arc<dyn BranchRefReader>,
58
- ) -> Result<(), LixError> {
59
- session
60
- .register_table(
61
- surface_name,
62
- Arc::new(LixStateProvider::active_branch(
63
- active_branch_id,
64
- live_state,
65
- branch_ref,
66
- )),
67
- )
68
- .map_err(datafusion_error_to_lix_error)?;
69
- Ok(())
70
- }
71
-
72
- pub(super) async fn register_lix_state_by_branch_provider(
73
- session: &SessionContext,
74
- surface_name: &str,
75
- live_state: Arc<dyn LiveStateReader>,
76
- branch_ref: Arc<dyn BranchRefReader>,
77
- ) -> Result<(), LixError> {
78
- session
79
- .register_table(
80
- surface_name,
81
- Arc::new(LixStateProvider::by_branch(live_state, branch_ref)),
82
- )
83
- .map_err(datafusion_error_to_lix_error)?;
84
- Ok(())
85
- }
86
-
87
- pub(super) async fn register_lix_state_by_branch_write_provider(
88
- session: &SessionContext,
89
- surface_name: &str,
90
- write_ctx: SqlWriteContext,
91
- ) -> Result<(), LixError> {
92
- session
93
- .register_table(
94
- surface_name,
95
- Arc::new(LixStateProvider::by_branch_with_write(write_ctx)),
96
- )
97
- .map_err(datafusion_error_to_lix_error)?;
98
- Ok(())
99
- }
100
-
101
- pub(super) async fn register_lix_state_active_write_provider(
102
- session: &SessionContext,
103
- surface_name: &str,
104
- write_ctx: SqlWriteContext,
105
- ) -> Result<(), LixError> {
106
- session
107
- .register_table(
108
- surface_name,
109
- Arc::new(LixStateProvider::active_branch_with_write(write_ctx)),
110
- )
111
- .map_err(datafusion_error_to_lix_error)?;
112
- Ok(())
113
- }
114
-
115
- pub(crate) struct LixStateProvider {
116
- schema: SchemaRef,
117
- live_state: Arc<dyn LiveStateReader>,
118
- branch_ref: Arc<dyn BranchRefReader>,
119
- write_access: WriteAccess,
120
- branch_binding: BranchBinding,
121
- }
122
-
123
- impl std::fmt::Debug for LixStateProvider {
124
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125
- f.debug_struct("LixStateProvider")
126
- .field("write_access", &self.write_access.is_write())
127
- .finish()
128
- }
129
- }
130
-
131
- impl LixStateProvider {
132
- pub(crate) fn active_branch(
133
- active_branch_id: impl Into<String>,
134
- live_state: Arc<dyn LiveStateReader>,
135
- branch_ref: Arc<dyn BranchRefReader>,
136
- ) -> Self {
137
- Self {
138
- schema: lix_state_schema(),
139
- live_state,
140
- branch_ref,
141
- write_access: WriteAccess::read_only(),
142
- branch_binding: BranchBinding::active(active_branch_id),
143
- }
144
- }
145
-
146
- pub(crate) fn active_branch_with_write(write_ctx: SqlWriteContext) -> Self {
147
- let active_branch_id = write_ctx.active_branch_id();
148
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
149
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
150
- Self {
151
- schema: lix_state_schema(),
152
- live_state,
153
- branch_ref,
154
- write_access: WriteAccess::write(write_ctx),
155
- branch_binding: BranchBinding::active(active_branch_id),
156
- }
157
- }
158
-
159
- pub(crate) fn by_branch(
160
- live_state: Arc<dyn LiveStateReader>,
161
- branch_ref: Arc<dyn BranchRefReader>,
162
- ) -> Self {
163
- Self {
164
- schema: lix_state_by_branch_schema(),
165
- live_state,
166
- branch_ref,
167
- write_access: WriteAccess::read_only(),
168
- branch_binding: BranchBinding::explicit(),
169
- }
170
- }
171
-
172
- pub(crate) fn by_branch_with_write(write_ctx: SqlWriteContext) -> Self {
173
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
174
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
175
- Self {
176
- schema: lix_state_by_branch_schema(),
177
- live_state,
178
- branch_ref,
179
- write_access: WriteAccess::write(write_ctx),
180
- branch_binding: BranchBinding::explicit(),
181
- }
182
- }
183
- }
184
-
185
- #[async_trait]
186
- impl TableProvider for LixStateProvider {
187
- fn as_any(&self) -> &dyn Any {
188
- self
189
- }
190
-
191
- fn schema(&self) -> SchemaRef {
192
- Arc::clone(&self.schema)
193
- }
194
-
195
- fn table_type(&self) -> TableType {
196
- TableType::Base
197
- }
198
-
199
- fn supports_filters_pushdown(
200
- &self,
201
- filters: &[&Expr],
202
- ) -> Result<Vec<TableProviderFilterPushDown>> {
203
- Ok(filters
204
- .iter()
205
- .map(|filter| {
206
- if parse_lix_state_filter(filter).is_some() {
207
- TableProviderFilterPushDown::Exact
208
- } else {
209
- TableProviderFilterPushDown::Unsupported
210
- }
211
- })
212
- .collect())
213
- }
214
-
215
- async fn scan(
216
- &self,
217
- _state: &dyn Session,
218
- projection: Option<&Vec<usize>>,
219
- filters: &[Expr],
220
- limit: Option<usize>,
221
- ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
222
- let route = LixStateByBranchRoute::from_filters(filters);
223
- let projected_schema = projected_schema(&self.schema, projection)?;
224
- let mut request = lix_state_scan_request(
225
- &self.schema,
226
- self.branch_binding.active_branch_id(),
227
- projection,
228
- &route,
229
- limit,
230
- );
231
- request.filter.branch_ids = resolve_provider_branch_ids(
232
- self.branch_ref.as_ref(),
233
- &self.branch_binding,
234
- request.filter.branch_ids,
235
- )
236
- .await
237
- .map_err(lix_error_to_datafusion_error)?;
238
- Ok(Arc::new(LixStateScanExec::new(
239
- Arc::clone(&self.live_state),
240
- projected_schema,
241
- request,
242
- )))
243
- }
244
-
245
- async fn insert_into(
246
- &self,
247
- _state: &dyn Session,
248
- input: Arc<dyn ExecutionPlan>,
249
- insert_op: InsertOp,
250
- ) -> Result<Arc<dyn ExecutionPlan>> {
251
- if insert_op != InsertOp::Append {
252
- return not_impl_err!("{insert_op} not implemented for lix_state yet");
253
- }
254
-
255
- let write_ctx = self.write_access.require_write("INSERT into lix_state")?;
256
- let branch_binding = self.branch_binding.active_branch_id().map(str::to_owned);
257
-
258
- self.schema
259
- .logically_equivalent_names_and_types(&input.schema())?;
260
-
261
- let sink = LixStateInsertSink::new(write_ctx.clone(), branch_binding);
262
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
263
- }
264
-
265
- async fn delete_from(
266
- &self,
267
- state: &dyn Session,
268
- filters: Vec<Expr>,
269
- ) -> Result<Arc<dyn ExecutionPlan>> {
270
- let write_ctx = self.write_access.require_write("DELETE FROM lix_state")?;
271
-
272
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
273
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
274
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
275
- let physical_filters = filters
276
- .iter()
277
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
278
- .collect::<Result<Vec<_>>>()?;
279
-
280
- let route = LixStateByBranchRoute::from_filters(&filters);
281
- let branch_binding = self.branch_binding.active_branch_id().map(str::to_owned);
282
- let request =
283
- lix_state_scan_request(&self.schema, branch_binding.as_deref(), None, &route, None);
284
-
285
- Ok(Arc::new(LixStateDeleteExec::new(
286
- write_ctx.clone(),
287
- Arc::clone(&self.schema),
288
- branch_binding,
289
- request,
290
- physical_filters,
291
- )))
292
- }
293
-
294
- async fn update(
295
- &self,
296
- state: &dyn Session,
297
- assignments: Vec<(String, Expr)>,
298
- filters: Vec<Expr>,
299
- ) -> Result<Arc<dyn ExecutionPlan>> {
300
- let write_ctx = self.write_access.require_write("UPDATE lix_state")?;
301
- validate_lix_state_update_assignments(&self.schema, &assignments)?;
302
-
303
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
304
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
305
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
306
- let physical_assignments = assignments
307
- .iter()
308
- .map(|(column_name, expr)| {
309
- Ok((
310
- column_name.clone(),
311
- create_physical_expr(expr, &df_schema, state.execution_props())?,
312
- ))
313
- })
314
- .collect::<Result<Vec<_>>>()?;
315
- let physical_filters = filters
316
- .iter()
317
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
318
- .collect::<Result<Vec<_>>>()?;
319
-
320
- let route = LixStateByBranchRoute::from_filters(&filters);
321
- let branch_binding = self.branch_binding.active_branch_id().map(str::to_owned);
322
- let request =
323
- lix_state_scan_request(&self.schema, branch_binding.as_deref(), None, &route, None);
324
-
325
- Ok(Arc::new(LixStateUpdateExec::new(
326
- write_ctx.clone(),
327
- Arc::clone(&self.schema),
328
- branch_binding,
329
- request,
330
- physical_assignments,
331
- physical_filters,
332
- )))
333
- }
334
- }
335
-
336
- struct LixStateInsertSink {
337
- write_ctx: SqlWriteContext,
338
- branch_binding: Option<String>,
339
- }
340
-
341
- impl std::fmt::Debug for LixStateInsertSink {
342
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
343
- f.debug_struct("LixStateInsertSink").finish()
344
- }
345
- }
346
-
347
- impl LixStateInsertSink {
348
- fn new(write_ctx: SqlWriteContext, branch_binding: Option<String>) -> Self {
349
- Self {
350
- write_ctx,
351
- branch_binding,
352
- }
353
- }
354
- }
355
-
356
- impl DisplayAs for LixStateInsertSink {
357
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358
- match t {
359
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
360
- write!(f, "LixStateInsertSink")
361
- }
362
- DisplayFormatType::TreeRender => write!(f, "LixStateInsertSink"),
363
- }
364
- }
365
- }
366
-
367
- #[async_trait]
368
- impl InsertSink for LixStateInsertSink {
369
- async fn write_batches(
370
- &self,
371
- batches: Vec<RecordBatch>,
372
- _context: &Arc<TaskContext>,
373
- ) -> Result<u64> {
374
- let mut rows = Vec::new();
375
- for batch in batches {
376
- rows.extend(lix_state_write_rows_from_batch(
377
- &batch,
378
- self.branch_binding.as_deref(),
379
- "INSERT into lix_state",
380
- )?);
381
- }
382
- reject_read_only_stage_rows(&rows, "INSERT into lix_state")?;
383
- let count = u64::try_from(rows.len())
384
- .map_err(|_| DataFusionError::Execution("INSERT row count overflow".into()))?;
385
-
386
- self.write_ctx
387
- .stage_write(TransactionWrite::Rows {
388
- mode: TransactionWriteMode::Insert,
389
- rows,
390
- })
391
- .await
392
- .map_err(lix_error_to_datafusion_error)?;
393
-
394
- Ok(count)
395
- }
396
- }
397
-
398
- #[allow(dead_code)]
399
- struct LixStateDeleteExec {
400
- write_ctx: SqlWriteContext,
401
- table_schema: SchemaRef,
402
- branch_binding: Option<String>,
403
- request: LiveStateScanRequest,
404
- filters: Vec<Arc<dyn PhysicalExpr>>,
405
- result_schema: SchemaRef,
406
- properties: Arc<PlanProperties>,
407
- }
408
-
409
- impl std::fmt::Debug for LixStateDeleteExec {
410
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
411
- f.debug_struct("LixStateDeleteExec").finish()
412
- }
413
- }
414
-
415
- impl LixStateDeleteExec {
416
- fn new(
417
- write_ctx: SqlWriteContext,
418
- table_schema: SchemaRef,
419
- branch_binding: Option<String>,
420
- request: LiveStateScanRequest,
421
- filters: Vec<Arc<dyn PhysicalExpr>>,
422
- ) -> Self {
423
- let result_schema = dml_count_schema();
424
- let properties = PlanProperties::new(
425
- EquivalenceProperties::new(Arc::clone(&result_schema)),
426
- Partitioning::UnknownPartitioning(1),
427
- EmissionType::Final,
428
- Boundedness::Bounded,
429
- );
430
- Self {
431
- write_ctx,
432
- table_schema,
433
- branch_binding,
434
- request,
435
- filters,
436
- result_schema,
437
- properties: Arc::new(properties),
438
- }
439
- }
440
- }
441
-
442
- impl DisplayAs for LixStateDeleteExec {
443
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444
- match t {
445
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
446
- write!(f, "LixStateDeleteExec(filters={})", self.filters.len())
447
- }
448
- DisplayFormatType::TreeRender => write!(f, "LixStateDeleteExec"),
449
- }
450
- }
451
- }
452
-
453
- impl ExecutionPlan for LixStateDeleteExec {
454
- fn name(&self) -> &str {
455
- "LixStateDeleteExec"
456
- }
457
-
458
- fn as_any(&self) -> &dyn Any {
459
- self
460
- }
461
-
462
- fn properties(&self) -> &Arc<PlanProperties> {
463
- &self.properties
464
- }
465
-
466
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
467
- Vec::new()
468
- }
469
-
470
- fn with_new_children(
471
- self: Arc<Self>,
472
- children: Vec<Arc<dyn ExecutionPlan>>,
473
- ) -> Result<Arc<dyn ExecutionPlan>> {
474
- if !children.is_empty() {
475
- return Err(DataFusionError::Execution(
476
- "LixStateDeleteExec does not accept children".to_string(),
477
- ));
478
- }
479
- Ok(self)
480
- }
481
-
482
- fn execute(
483
- &self,
484
- partition: usize,
485
- _context: Arc<TaskContext>,
486
- ) -> Result<SendableRecordBatchStream> {
487
- if partition != 0 {
488
- return Err(DataFusionError::Execution(format!(
489
- "LixStateDeleteExec only exposes one partition, got {partition}"
490
- )));
491
- }
492
- let write_ctx = self.write_ctx.clone();
493
- let table_schema = Arc::clone(&self.table_schema);
494
- let branch_binding = self.branch_binding.clone();
495
- let request = self.request.clone();
496
- let filters = self.filters.clone();
497
- let result_schema = Arc::clone(&self.result_schema);
498
- let stream_schema = Arc::clone(&result_schema);
499
-
500
- let stream = stream::once(async move {
501
- let rows = write_ctx
502
- .scan_live_state(&request)
503
- .await
504
- .map_err(lix_error_to_datafusion_error)?;
505
- let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
506
- .map_err(lix_error_to_datafusion_error)?;
507
- let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
508
- let write_rows = lix_state_deletable_write_rows_from_batch(
509
- &matched_batch,
510
- branch_binding.as_deref(),
511
- )?;
512
- reject_read_only_stage_rows(&write_rows, "DELETE FROM lix_state")?;
513
- let count = u64::try_from(write_rows.len())
514
- .map_err(|_| DataFusionError::Execution("DELETE row count overflow".to_string()))?;
515
-
516
- if count > 0 {
517
- write_ctx
518
- .stage_write(TransactionWrite::Rows {
519
- mode: TransactionWriteMode::Replace,
520
- rows: write_rows,
521
- })
522
- .await
523
- .map_err(lix_error_to_datafusion_error)?;
524
- }
525
-
526
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
527
- dml_count_batch(Arc::clone(&stream_schema), count)?,
528
- )]))
529
- })
530
- .try_flatten();
531
- Ok(Box::pin(RecordBatchStreamAdapter::new(
532
- result_schema,
533
- stream,
534
- )))
535
- }
536
- }
537
-
538
- #[allow(dead_code)]
539
- struct LixStateUpdateExec {
540
- write_ctx: SqlWriteContext,
541
- table_schema: SchemaRef,
542
- branch_binding: Option<String>,
543
- request: LiveStateScanRequest,
544
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
545
- filters: Vec<Arc<dyn PhysicalExpr>>,
546
- result_schema: SchemaRef,
547
- properties: Arc<PlanProperties>,
548
- }
549
-
550
- impl std::fmt::Debug for LixStateUpdateExec {
551
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
552
- f.debug_struct("LixStateUpdateExec").finish()
553
- }
554
- }
555
-
556
- impl LixStateUpdateExec {
557
- fn new(
558
- write_ctx: SqlWriteContext,
559
- table_schema: SchemaRef,
560
- branch_binding: Option<String>,
561
- request: LiveStateScanRequest,
562
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
563
- filters: Vec<Arc<dyn PhysicalExpr>>,
564
- ) -> Self {
565
- let result_schema = dml_count_schema();
566
- let properties = PlanProperties::new(
567
- EquivalenceProperties::new(Arc::clone(&result_schema)),
568
- Partitioning::UnknownPartitioning(1),
569
- EmissionType::Final,
570
- Boundedness::Bounded,
571
- );
572
- Self {
573
- write_ctx,
574
- table_schema,
575
- branch_binding,
576
- request,
577
- assignments,
578
- filters,
579
- result_schema,
580
- properties: Arc::new(properties),
581
- }
582
- }
583
- }
584
-
585
- impl DisplayAs for LixStateUpdateExec {
586
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
587
- match t {
588
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
589
- write!(
590
- f,
591
- "LixStateUpdateExec(assignments={}, filters={})",
592
- self.assignments.len(),
593
- self.filters.len()
594
- )
595
- }
596
- DisplayFormatType::TreeRender => write!(f, "LixStateUpdateExec"),
597
- }
598
- }
599
- }
600
-
601
- impl ExecutionPlan for LixStateUpdateExec {
602
- fn name(&self) -> &str {
603
- "LixStateUpdateExec"
604
- }
605
-
606
- fn as_any(&self) -> &dyn Any {
607
- self
608
- }
609
-
610
- fn properties(&self) -> &Arc<PlanProperties> {
611
- &self.properties
612
- }
613
-
614
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
615
- Vec::new()
616
- }
617
-
618
- fn with_new_children(
619
- self: Arc<Self>,
620
- children: Vec<Arc<dyn ExecutionPlan>>,
621
- ) -> Result<Arc<dyn ExecutionPlan>> {
622
- if !children.is_empty() {
623
- return Err(DataFusionError::Execution(
624
- "LixStateUpdateExec does not accept children".to_string(),
625
- ));
626
- }
627
- Ok(self)
628
- }
629
-
630
- fn execute(
631
- &self,
632
- partition: usize,
633
- _context: Arc<TaskContext>,
634
- ) -> Result<SendableRecordBatchStream> {
635
- if partition != 0 {
636
- return Err(DataFusionError::Execution(format!(
637
- "LixStateUpdateExec only exposes one partition, got {partition}"
638
- )));
639
- }
640
- let write_ctx = self.write_ctx.clone();
641
- let table_schema = Arc::clone(&self.table_schema);
642
- let branch_binding = self.branch_binding.clone();
643
- let request = self.request.clone();
644
- let assignments = self.assignments.clone();
645
- let filters = self.filters.clone();
646
- let result_schema = Arc::clone(&self.result_schema);
647
- let stream_schema = Arc::clone(&result_schema);
648
-
649
- let stream = stream::once(async move {
650
- let rows = write_ctx
651
- .scan_live_state(&request)
652
- .await
653
- .map_err(lix_error_to_datafusion_error)?;
654
- let source_batch = lix_state_record_batch(Arc::clone(&table_schema), &rows)
655
- .map_err(lix_error_to_datafusion_error)?;
656
- let matched_batch = filter_lix_state_batch(source_batch, &filters)?;
657
- let write_rows = lix_state_update_write_rows_from_batch(
658
- &matched_batch,
659
- &assignments,
660
- branch_binding.as_deref(),
661
- )?;
662
- reject_read_only_stage_rows(&write_rows, "UPDATE lix_state")?;
663
- let count = u64::try_from(write_rows.len())
664
- .map_err(|_| DataFusionError::Execution("UPDATE row count overflow".to_string()))?;
665
-
666
- if count > 0 {
667
- write_ctx
668
- .stage_write(TransactionWrite::Rows {
669
- mode: TransactionWriteMode::Replace,
670
- rows: write_rows,
671
- })
672
- .await
673
- .map_err(lix_error_to_datafusion_error)?;
674
- }
675
-
676
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
677
- dml_count_batch(Arc::clone(&stream_schema), count)?,
678
- )]))
679
- })
680
- .try_flatten();
681
- Ok(Box::pin(RecordBatchStreamAdapter::new(
682
- result_schema,
683
- stream,
684
- )))
685
- }
686
- }
687
-
688
- fn filter_lix_state_batch(
689
- batch: RecordBatch,
690
- filters: &[Arc<dyn PhysicalExpr>],
691
- ) -> Result<RecordBatch> {
692
- let Some(mask) = evaluate_lix_state_filters(&batch, filters)? else {
693
- return Ok(batch);
694
- };
695
- Ok(filter_record_batch(&batch, &mask)?)
696
- }
697
-
698
- fn evaluate_lix_state_filters(
699
- batch: &RecordBatch,
700
- filters: &[Arc<dyn PhysicalExpr>],
701
- ) -> Result<Option<BooleanArray>> {
702
- if filters.is_empty() {
703
- return Ok(None);
704
- }
705
-
706
- let mut combined_mask: Option<BooleanArray> = None;
707
- for filter in filters {
708
- let result = filter.evaluate(batch)?;
709
- let array = result.into_array(batch.num_rows())?;
710
- let bool_array = array
711
- .as_any()
712
- .downcast_ref::<BooleanArray>()
713
- .ok_or_else(|| {
714
- DataFusionError::Execution("UPDATE lix_state filter was not boolean".to_string())
715
- })?;
716
- let normalized = bool_array
717
- .iter()
718
- .map(|value| Some(value == Some(true)))
719
- .collect::<BooleanArray>();
720
- combined_mask = Some(match combined_mask {
721
- Some(existing) => and(&existing, &normalized)?,
722
- None => normalized,
723
- });
724
- }
725
- Ok(combined_mask)
726
- }
727
-
728
- fn lix_state_stageable_write_rows_from_batch(
729
- batch: &RecordBatch,
730
- branch_binding: Option<&str>,
731
- action: &str,
732
- ) -> Result<Vec<TransactionWriteRow>> {
733
- let mut rows = lix_state_write_rows_from_batch(batch, branch_binding, action)?;
734
- for row in &mut rows {
735
- row.created_at = None;
736
- row.updated_at = None;
737
- row.change_id = None;
738
- row.commit_id = None;
739
- }
740
- Ok(rows)
741
- }
742
-
743
- fn lix_state_update_write_rows_from_batch(
744
- batch: &RecordBatch,
745
- assignments: &[(String, Arc<dyn PhysicalExpr>)],
746
- branch_binding: Option<&str>,
747
- ) -> Result<Vec<TransactionWriteRow>> {
748
- let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
749
- (0..batch.num_rows())
750
- .map(|row_index| {
751
- let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
752
- let branch_id =
753
- optional_string_value(batch, row_index, "branch_id")?.unwrap_or_else(|| {
754
- if global {
755
- GLOBAL_BRANCH_ID.to_string()
756
- } else {
757
- branch_binding.unwrap_or_default().to_string()
758
- }
759
- });
760
- if !global && branch_id.is_empty() {
761
- return Err(DataFusionError::Execution(
762
- "UPDATE lix_state_by_branch requires branch_id".to_string(),
763
- ));
764
- }
765
-
766
- Ok(TransactionWriteRow {
767
- entity_pk: Some(
768
- EntityPk::from_json_array_text(&required_string_value(
769
- batch,
770
- row_index,
771
- "entity_pk",
772
- )?)
773
- .map_err(|error| {
774
- DataFusionError::Execution(format!(
775
- "lix_state UPDATE has invalid entity_pk: {error}"
776
- ))
777
- })?,
778
- ),
779
- schema_key: required_string_value(batch, row_index, "schema_key")?,
780
- file_id: optional_string_value(batch, row_index, "file_id")?,
781
- snapshot: update_optional_json_value(
782
- batch,
783
- &assignment_values,
784
- row_index,
785
- "snapshot_content",
786
- )?,
787
- metadata: update_optional_metadata_value(
788
- batch,
789
- &assignment_values,
790
- row_index,
791
- "metadata",
792
- "lix_state",
793
- )?,
794
- origin: None,
795
- created_at: None,
796
- updated_at: None,
797
- global,
798
- change_id: None,
799
- commit_id: None,
800
- untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
801
- branch_id,
802
- })
803
- })
804
- .collect()
805
- }
806
-
807
- fn lix_state_deletable_write_rows_from_batch(
808
- batch: &RecordBatch,
809
- branch_binding: Option<&str>,
810
- ) -> Result<Vec<TransactionWriteRow>> {
811
- let mut rows =
812
- lix_state_stageable_write_rows_from_batch(batch, branch_binding, "DELETE FROM lix_state")?;
813
- for row in &mut rows {
814
- row.snapshot = None;
815
- }
816
- Ok(rows)
817
- }
818
-
819
- fn update_optional_string_value(
820
- batch: &RecordBatch,
821
- assignment_values: &UpdateAssignmentValues,
822
- row_index: usize,
823
- column_name: &str,
824
- ) -> Result<Option<String>> {
825
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
826
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
827
- InsertCell::Provided(SqlCell::Value(
828
- ScalarValue::Utf8(Some(value))
829
- | ScalarValue::Utf8View(Some(value))
830
- | ScalarValue::LargeUtf8(Some(value)),
831
- )) => Ok(Some(value)),
832
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
833
- "UPDATE lix_state expected text-compatible column '{column_name}', got {other:?}"
834
- ))),
835
- }
836
- }
837
-
838
- fn update_optional_metadata_value(
839
- batch: &RecordBatch,
840
- assignment_values: &UpdateAssignmentValues,
841
- row_index: usize,
842
- column_name: &str,
843
- context: &str,
844
- ) -> Result<Option<TransactionJson>> {
845
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
846
- .map(|value| {
847
- let metadata = parse_row_metadata_value(&value, context)
848
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
849
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
850
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
851
- })
852
- .transpose()
853
- }
854
-
855
- fn update_optional_json_value(
856
- batch: &RecordBatch,
857
- assignment_values: &UpdateAssignmentValues,
858
- row_index: usize,
859
- column_name: &str,
860
- ) -> Result<Option<TransactionJson>> {
861
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
862
- .map(|value| parse_snapshot_json(&value, column_name))
863
- .transpose()
864
- }
865
-
866
- fn dml_count_schema() -> SchemaRef {
867
- Arc::new(Schema::new(vec![Field::new(
868
- "count",
869
- DataType::UInt64,
870
- false,
871
- )]))
872
- }
873
-
874
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
875
- RecordBatch::try_new(
876
- schema,
877
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
878
- )
879
- .map_err(DataFusionError::from)
880
- }
881
-
882
- fn lix_state_write_rows_from_batch(
883
- batch: &RecordBatch,
884
- branch_binding: Option<&str>,
885
- action: &str,
886
- ) -> Result<Vec<TransactionWriteRow>> {
887
- (0..batch.num_rows())
888
- .map(|row_index| {
889
- let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
890
- let branch_id =
891
- optional_string_value(batch, row_index, "branch_id")?.unwrap_or_else(|| {
892
- if global {
893
- GLOBAL_BRANCH_ID.to_string()
894
- } else {
895
- branch_binding.unwrap_or_default().to_string()
896
- }
897
- });
898
- if !global && branch_id.is_empty() {
899
- return Err(DataFusionError::Execution(format!(
900
- "{action} requires branch_id"
901
- )));
902
- }
903
-
904
- Ok(TransactionWriteRow {
905
- entity_pk: Some(
906
- EntityPk::from_json_array_text(&required_string_value(
907
- batch,
908
- row_index,
909
- "entity_pk",
910
- )?)
911
- .map_err(|error| {
912
- DataFusionError::Execution(format!(
913
- "lix_state INSERT has invalid entity_pk: {error}"
914
- ))
915
- })?,
916
- ),
917
- schema_key: required_string_value(batch, row_index, "schema_key")?,
918
- file_id: optional_string_value(batch, row_index, "file_id")?,
919
- snapshot: optional_json_value(batch, row_index, "snapshot_content")?,
920
- metadata: optional_metadata_value(batch, row_index, "metadata", "lix_state")?,
921
- origin: None,
922
- created_at: optional_string_value(batch, row_index, "created_at")?,
923
- updated_at: optional_string_value(batch, row_index, "updated_at")?,
924
- global,
925
- change_id: optional_string_value(batch, row_index, "change_id")?,
926
- commit_id: optional_string_value(batch, row_index, "commit_id")?,
927
- untracked: optional_bool_value(batch, row_index, "untracked")?.unwrap_or(false),
928
- branch_id,
929
- })
930
- })
931
- .collect()
932
- }
933
-
934
- fn validate_lix_state_update_assignments(
935
- schema: &SchemaRef,
936
- assignments: &[(String, Expr)],
937
- ) -> Result<()> {
938
- for (column_name, _) in assignments {
939
- schema.field_with_name(column_name).map_err(|_| {
940
- DataFusionError::Plan(format!(
941
- "UPDATE lix_state failed: column '{column_name}' does not exist"
942
- ))
943
- })?;
944
- if !matches!(
945
- column_name.as_str(),
946
- "snapshot_content" | "metadata" | "global" | "untracked"
947
- ) {
948
- return Err(DataFusionError::Execution(format!(
949
- "UPDATE lix_state cannot stage read-only column '{column_name}'"
950
- )));
951
- }
952
- }
953
- Ok(())
954
- }
955
-
956
- fn required_string_value(
957
- batch: &RecordBatch,
958
- row_index: usize,
959
- column_name: &str,
960
- ) -> Result<String> {
961
- optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
962
- DataFusionError::Execution(format!(
963
- "INSERT into lix_state requires non-null text column '{column_name}'"
964
- ))
965
- })
966
- }
967
-
968
- fn optional_string_value(
969
- batch: &RecordBatch,
970
- row_index: usize,
971
- column_name: &str,
972
- ) -> Result<Option<String>> {
973
- match optional_scalar_value(batch, row_index, column_name)? {
974
- None
975
- | Some(ScalarValue::Null)
976
- | Some(ScalarValue::Utf8(None))
977
- | Some(ScalarValue::Utf8View(None))
978
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
979
- Some(ScalarValue::Utf8(Some(value)))
980
- | Some(ScalarValue::Utf8View(Some(value)))
981
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
982
- Some(other) => Err(DataFusionError::Execution(format!(
983
- "INSERT into lix_state expected text-compatible column '{column_name}', got {other:?}"
984
- ))),
985
- }
986
- }
987
-
988
- fn optional_metadata_value(
989
- batch: &RecordBatch,
990
- row_index: usize,
991
- column_name: &str,
992
- context: &str,
993
- ) -> Result<Option<TransactionJson>> {
994
- optional_string_value(batch, row_index, column_name)?
995
- .map(|value| {
996
- let metadata = parse_row_metadata_value(&value, context)
997
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
998
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
999
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
1000
- })
1001
- .transpose()
1002
- }
1003
-
1004
- fn optional_json_value(
1005
- batch: &RecordBatch,
1006
- row_index: usize,
1007
- column_name: &str,
1008
- ) -> Result<Option<TransactionJson>> {
1009
- optional_string_value(batch, row_index, column_name)?
1010
- .map(|value| parse_snapshot_json(&value, column_name))
1011
- .transpose()
1012
- }
1013
-
1014
- fn parse_snapshot_json(value: &str, column_name: &str) -> Result<TransactionJson> {
1015
- let parsed = serde_json::from_str::<JsonValue>(value).map_err(|error| {
1016
- DataFusionError::Execution(format!(
1017
- "lix_state expected valid JSON in column '{column_name}': {error}"
1018
- ))
1019
- })?;
1020
- TransactionJson::from_value(parsed, &format!("lix_state {column_name}"))
1021
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
1022
- }
1023
-
1024
- fn optional_bool_value(
1025
- batch: &RecordBatch,
1026
- row_index: usize,
1027
- column_name: &str,
1028
- ) -> Result<Option<bool>> {
1029
- match optional_scalar_value(batch, row_index, column_name)? {
1030
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1031
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1032
- Some(other) => Err(DataFusionError::Execution(format!(
1033
- "INSERT into lix_state expected boolean column '{column_name}', got {other:?}"
1034
- ))),
1035
- }
1036
- }
1037
-
1038
- fn optional_scalar_value(
1039
- batch: &RecordBatch,
1040
- row_index: usize,
1041
- column_name: &str,
1042
- ) -> Result<Option<ScalarValue>> {
1043
- let schema = batch.schema();
1044
- let column_index = match schema.index_of(column_name) {
1045
- Ok(column_index) => column_index,
1046
- Err(_) => return Ok(None),
1047
- };
1048
-
1049
- if row_index >= batch.num_rows() {
1050
- return Err(DataFusionError::Execution(format!(
1051
- "row index {row_index} out of bounds for lix_state batch with {} rows",
1052
- batch.num_rows()
1053
- )));
1054
- }
1055
-
1056
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1057
- .map(Some)
1058
- .map_err(|error| {
1059
- DataFusionError::Execution(format!(
1060
- "failed to decode lix_state column '{column_name}' at row {row_index}: {error}"
1061
- ))
1062
- })
1063
- }
1064
-
1065
- struct LixStateScanExec {
1066
- live_state: Arc<dyn LiveStateReader>,
1067
- schema: SchemaRef,
1068
- request: LiveStateScanRequest,
1069
- properties: Arc<PlanProperties>,
1070
- }
1071
-
1072
- impl std::fmt::Debug for LixStateScanExec {
1073
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1074
- f.debug_struct("LixStateScanExec").finish()
1075
- }
1076
- }
1077
-
1078
- impl LixStateScanExec {
1079
- fn new(
1080
- live_state: Arc<dyn LiveStateReader>,
1081
- schema: SchemaRef,
1082
- request: LiveStateScanRequest,
1083
- ) -> Self {
1084
- let properties = PlanProperties::new(
1085
- EquivalenceProperties::new(schema.clone()),
1086
- Partitioning::UnknownPartitioning(1),
1087
- EmissionType::Incremental,
1088
- Boundedness::Bounded,
1089
- );
1090
- Self {
1091
- live_state,
1092
- schema,
1093
- request,
1094
- properties: Arc::new(properties),
1095
- }
1096
- }
1097
- }
1098
-
1099
- impl DisplayAs for LixStateScanExec {
1100
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1101
- match t {
1102
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
1103
- write!(f, "LixStateScanExec(limit={:?})", self.request.limit)
1104
- }
1105
- DisplayFormatType::TreeRender => write!(f, "LixStateScanExec"),
1106
- }
1107
- }
1108
- }
1109
-
1110
- impl ExecutionPlan for LixStateScanExec {
1111
- fn name(&self) -> &str {
1112
- "LixStateScanExec"
1113
- }
1114
-
1115
- fn as_any(&self) -> &dyn Any {
1116
- self
1117
- }
1118
-
1119
- fn properties(&self) -> &Arc<PlanProperties> {
1120
- &self.properties
1121
- }
1122
-
1123
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1124
- Vec::new()
1125
- }
1126
-
1127
- fn with_new_children(
1128
- self: Arc<Self>,
1129
- children: Vec<Arc<dyn ExecutionPlan>>,
1130
- ) -> Result<Arc<dyn ExecutionPlan>> {
1131
- if !children.is_empty() {
1132
- return Err(DataFusionError::Execution(
1133
- "LixStateScanExec does not accept children".to_string(),
1134
- ));
1135
- }
1136
- Ok(self)
1137
- }
1138
-
1139
- fn execute(
1140
- &self,
1141
- partition: usize,
1142
- _context: Arc<TaskContext>,
1143
- ) -> Result<SendableRecordBatchStream> {
1144
- if partition != 0 {
1145
- return Err(DataFusionError::Execution(format!(
1146
- "LixStateScanExec only exposes one partition, got {partition}"
1147
- )));
1148
- }
1149
-
1150
- let live_state = Arc::clone(&self.live_state);
1151
- let schema = Arc::clone(&self.schema);
1152
- let request = self.request.clone();
1153
- let stream_schema = Arc::clone(&schema);
1154
- let stream = stream::once(async move {
1155
- let rows = live_state
1156
- .scan_rows(&request)
1157
- .await
1158
- .map_err(lix_error_to_datafusion_error)?;
1159
- let batch = lix_state_record_batch(Arc::clone(&stream_schema), &rows)
1160
- .map_err(lix_error_to_datafusion_error)?;
1161
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
1162
- batch,
1163
- )]))
1164
- })
1165
- .try_flatten();
1166
- Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1167
- }
1168
- }
1169
-
1170
- pub(super) fn lix_state_schema() -> SchemaRef {
1171
- Arc::new(Schema::new(vec![
1172
- json_field("entity_pk", false),
1173
- Field::new("schema_key", DataType::Utf8, false),
1174
- Field::new("file_id", DataType::Utf8, true),
1175
- json_field("snapshot_content", true),
1176
- json_field("metadata", true),
1177
- Field::new("created_at", DataType::Utf8, true),
1178
- Field::new("updated_at", DataType::Utf8, true),
1179
- Field::new("global", DataType::Boolean, true),
1180
- Field::new("change_id", DataType::Utf8, true),
1181
- Field::new("commit_id", DataType::Utf8, true),
1182
- Field::new("untracked", DataType::Boolean, true),
1183
- ]))
1184
- }
1185
-
1186
- pub(super) fn lix_state_by_branch_schema() -> SchemaRef {
1187
- Arc::new(Schema::new(vec![
1188
- json_field("entity_pk", false),
1189
- Field::new("schema_key", DataType::Utf8, false),
1190
- Field::new("file_id", DataType::Utf8, true),
1191
- json_field("snapshot_content", true),
1192
- json_field("metadata", true),
1193
- Field::new("created_at", DataType::Utf8, true),
1194
- Field::new("updated_at", DataType::Utf8, true),
1195
- Field::new("global", DataType::Boolean, true),
1196
- Field::new("change_id", DataType::Utf8, true),
1197
- Field::new("commit_id", DataType::Utf8, true),
1198
- Field::new("untracked", DataType::Boolean, true),
1199
- Field::new("branch_id", DataType::Utf8, false),
1200
- ]))
1201
- }
1202
-
1203
- #[derive(Debug, Clone, PartialEq, Eq, Default)]
1204
- struct LixStateByBranchRoute {
1205
- schema_keys: Option<BTreeSet<String>>,
1206
- branch_ids: Option<BTreeSet<String>>,
1207
- entity_pks: Option<BTreeSet<String>>,
1208
- file_id: Option<NullableKeyFilter<String>>,
1209
- contradictory: bool,
1210
- }
1211
-
1212
- impl LixStateByBranchRoute {
1213
- fn from_filters(filters: &[Expr]) -> Self {
1214
- let mut route = Self::default();
1215
- for filter in filters {
1216
- let Some(predicates) = parse_lix_state_filters(filter) else {
1217
- continue;
1218
- };
1219
- for predicate in predicates {
1220
- match predicate {
1221
- LixStateFilterPredicate::SchemaKeys(values) => {
1222
- merge_string_route_slot(
1223
- &mut route.schema_keys,
1224
- values,
1225
- &mut route.contradictory,
1226
- );
1227
- }
1228
- LixStateFilterPredicate::BranchIds(values) => {
1229
- merge_string_route_slot(
1230
- &mut route.branch_ids,
1231
- values,
1232
- &mut route.contradictory,
1233
- );
1234
- }
1235
- LixStateFilterPredicate::EntityPks(values) => {
1236
- merge_string_route_slot(
1237
- &mut route.entity_pks,
1238
- values,
1239
- &mut route.contradictory,
1240
- );
1241
- }
1242
- LixStateFilterPredicate::FileId(filter) => {
1243
- merge_nullable_key_route_slot(
1244
- &mut route.file_id,
1245
- filter,
1246
- &mut route.contradictory,
1247
- );
1248
- }
1249
- }
1250
- }
1251
- }
1252
- route
1253
- }
1254
- }
1255
-
1256
- #[derive(Debug, Clone, PartialEq, Eq)]
1257
- enum LixStateFilterPredicate {
1258
- SchemaKeys(BTreeSet<String>),
1259
- BranchIds(BTreeSet<String>),
1260
- EntityPks(BTreeSet<String>),
1261
- FileId(NullableKeyFilter<String>),
1262
- }
1263
-
1264
- fn lix_state_scan_request(
1265
- schema: &SchemaRef,
1266
- branch_binding: Option<&str>,
1267
- projection: Option<&Vec<usize>>,
1268
- route: &LixStateByBranchRoute,
1269
- limit: Option<usize>,
1270
- ) -> LiveStateScanRequest {
1271
- let projection = LiveStateProjection {
1272
- columns: projection_column_names(schema, projection),
1273
- };
1274
- let mut filter = LiveStateFilter {
1275
- schema_keys: route
1276
- .schema_keys
1277
- .as_ref()
1278
- .map(|values| values.iter().cloned().collect())
1279
- .unwrap_or_default(),
1280
- entity_pks: route
1281
- .entity_pks
1282
- .as_ref()
1283
- .map(|values| {
1284
- values
1285
- .iter()
1286
- .filter_map(|value| EntityPk::from_json_array_text(value).ok())
1287
- .collect()
1288
- })
1289
- .unwrap_or_default(),
1290
- branch_ids: branch_binding
1291
- .map(|value| vec![value.to_string()])
1292
- .or_else(|| {
1293
- route
1294
- .branch_ids
1295
- .as_ref()
1296
- .map(|values| values.iter().cloned().collect())
1297
- })
1298
- .unwrap_or_default(),
1299
- ..LiveStateFilter::default()
1300
- };
1301
- if let Some(file_id) = route.file_id.clone() {
1302
- filter.file_ids.push(file_id);
1303
- }
1304
-
1305
- if route.contradictory {
1306
- filter.rows = LiveStateRowFilter::None;
1307
- }
1308
-
1309
- LiveStateScanRequest {
1310
- filter,
1311
- projection,
1312
- limit,
1313
- }
1314
- }
1315
-
1316
- fn projection_column_names(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Vec<String> {
1317
- projection
1318
- .map(|indices| {
1319
- indices
1320
- .iter()
1321
- .filter_map(|index| schema.fields().get(*index))
1322
- .map(|field| field.name().to_string())
1323
- .collect::<Vec<_>>()
1324
- })
1325
- .unwrap_or_default()
1326
- }
1327
-
1328
- fn merge_string_route_slot(
1329
- slot: &mut Option<BTreeSet<String>>,
1330
- values: BTreeSet<String>,
1331
- contradictory: &mut bool,
1332
- ) {
1333
- if values.is_empty() {
1334
- return;
1335
- }
1336
-
1337
- match slot {
1338
- Some(existing) => {
1339
- existing.retain(|value| values.contains(value));
1340
- if existing.is_empty() {
1341
- *contradictory = true;
1342
- }
1343
- }
1344
- None => *slot = Some(values),
1345
- }
1346
- }
1347
-
1348
- fn merge_nullable_key_route_slot(
1349
- slot: &mut Option<NullableKeyFilter<String>>,
1350
- value: NullableKeyFilter<String>,
1351
- contradictory: &mut bool,
1352
- ) {
1353
- match slot {
1354
- Some(existing) if *existing != value => *contradictory = true,
1355
- Some(_) => {}
1356
- None => *slot = Some(value),
1357
- }
1358
- }
1359
-
1360
- fn parse_lix_state_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1361
- parse_lix_state_filters(expr)?.into_iter().next()
1362
- }
1363
-
1364
- fn parse_lix_state_filters(expr: &Expr) -> Option<Vec<LixStateFilterPredicate>> {
1365
- match expr {
1366
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
1367
- let mut predicates = parse_lix_state_filters(&binary_expr.left)?;
1368
- predicates.extend(parse_lix_state_filters(&binary_expr.right)?);
1369
- Some(predicates)
1370
- }
1371
- Expr::BinaryExpr(binary_expr) => {
1372
- parse_lix_state_binary_filter(binary_expr).map(|predicate| vec![predicate])
1373
- }
1374
- Expr::InList(in_list) => {
1375
- parse_lix_state_in_list_filter(in_list).map(|predicate| vec![predicate])
1376
- }
1377
- Expr::IsNull(expr) => parse_lix_state_null_filter(expr).map(|predicate| vec![predicate]),
1378
- _ => None,
1379
- }
1380
- }
1381
-
1382
- fn parse_lix_state_binary_filter(binary_expr: &BinaryExpr) -> Option<LixStateFilterPredicate> {
1383
- if binary_expr.op != Operator::Eq {
1384
- return None;
1385
- }
1386
-
1387
- parse_lix_state_column_literal_filter(&binary_expr.left, &binary_expr.right)
1388
- .or_else(|| parse_lix_state_column_literal_filter(&binary_expr.right, &binary_expr.left))
1389
- }
1390
-
1391
- fn parse_lix_state_in_list_filter(in_list: &InList) -> Option<LixStateFilterPredicate> {
1392
- if in_list.negated {
1393
- return None;
1394
- }
1395
- let Expr::Column(column) = in_list.expr.as_ref() else {
1396
- return None;
1397
- };
1398
-
1399
- let values = in_list
1400
- .list
1401
- .iter()
1402
- .map(string_expr_literal)
1403
- .collect::<Option<Vec<_>>>()?;
1404
- if values.is_empty() {
1405
- return None;
1406
- }
1407
-
1408
- let values = values.into_iter().collect::<BTreeSet<_>>();
1409
- match column.name.as_str() {
1410
- "schema_key" => Some(LixStateFilterPredicate::SchemaKeys(values)),
1411
- "branch_id" => Some(LixStateFilterPredicate::BranchIds(values)),
1412
- "entity_pk" => canonical_entity_pk_values(values).map(LixStateFilterPredicate::EntityPks),
1413
- _ => None,
1414
- }
1415
- }
1416
-
1417
- fn parse_lix_state_null_filter(expr: &Expr) -> Option<LixStateFilterPredicate> {
1418
- let Expr::Column(column) = expr else {
1419
- return None;
1420
- };
1421
-
1422
- match column.name.as_str() {
1423
- "file_id" => Some(LixStateFilterPredicate::FileId(NullableKeyFilter::Null)),
1424
- _ => None,
1425
- }
1426
- }
1427
-
1428
- fn parse_lix_state_column_literal_filter(
1429
- column_expr: &Expr,
1430
- literal_expr: &Expr,
1431
- ) -> Option<LixStateFilterPredicate> {
1432
- let Expr::Column(column) = column_expr else {
1433
- return None;
1434
- };
1435
-
1436
- match column.name.as_str() {
1437
- "schema_key" => string_expr_literal(literal_expr)
1438
- .map(|value| LixStateFilterPredicate::SchemaKeys(BTreeSet::from([value]))),
1439
- "branch_id" => string_expr_literal(literal_expr)
1440
- .map(|value| LixStateFilterPredicate::BranchIds(BTreeSet::from([value]))),
1441
- "entity_pk" => string_expr_literal(literal_expr)
1442
- .and_then(|value| canonical_entity_pk_value(&value))
1443
- .map(|value| LixStateFilterPredicate::EntityPks(BTreeSet::from([value]))),
1444
- "file_id" => nullable_key_literal(literal_expr).map(LixStateFilterPredicate::FileId),
1445
- _ => None,
1446
- }
1447
- }
1448
-
1449
- fn canonical_entity_pk_values(values: BTreeSet<String>) -> Option<BTreeSet<String>> {
1450
- values
1451
- .into_iter()
1452
- .map(|value| canonical_entity_pk_value(&value))
1453
- .collect()
1454
- }
1455
-
1456
- fn canonical_entity_pk_value(value: &str) -> Option<String> {
1457
- EntityPk::from_json_array_text(value)
1458
- .ok()?
1459
- .as_json_array_text()
1460
- .ok()
1461
- }
1462
-
1463
- fn nullable_key_literal(expr: &Expr) -> Option<NullableKeyFilter<String>> {
1464
- if is_null_literal(expr) {
1465
- return Some(NullableKeyFilter::Null);
1466
- }
1467
- string_expr_literal(expr).map(NullableKeyFilter::Value)
1468
- }
1469
-
1470
- fn string_expr_literal(expr: &Expr) -> Option<String> {
1471
- let Expr::Literal(literal, _) = expr else {
1472
- return None;
1473
- };
1474
- match literal {
1475
- ScalarValue::Utf8(Some(value))
1476
- | ScalarValue::Utf8View(Some(value))
1477
- | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
1478
- _ => None,
1479
- }
1480
- }
1481
-
1482
- fn is_null_literal(expr: &Expr) -> bool {
1483
- matches!(expr, Expr::Literal(ScalarValue::Null, _))
1484
- }
1485
-
1486
- fn lix_state_record_batch(
1487
- schema: SchemaRef,
1488
- rows: &[MaterializedLiveStateRow],
1489
- ) -> Result<RecordBatch, LixError> {
1490
- if schema.fields().is_empty() {
1491
- let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
1492
- return RecordBatch::try_new_with_options(schema, vec![], &options).map_err(|error| {
1493
- LixError::new(
1494
- "LIX_ERROR_UNKNOWN",
1495
- format!("sql2 failed to build zero-column lix_state batch: {error}"),
1496
- )
1497
- });
1498
- }
1499
-
1500
- let columns = schema
1501
- .fields()
1502
- .iter()
1503
- .map(|field| {
1504
- Ok(match field.name().as_str() {
1505
- "entity_pk" => Arc::new(StringArray::from(
1506
- rows.iter()
1507
- .map(|row| row.entity_pk.as_json_array_text().map(Some))
1508
- .collect::<std::result::Result<Vec<_>, LixError>>()?,
1509
- )) as ArrayRef,
1510
- "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
1511
- "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
1512
- "snapshot_content" => {
1513
- string_array(rows.iter().map(|row| row.snapshot_content.as_deref()))
1514
- }
1515
- "metadata" => Arc::new(StringArray::from(
1516
- rows.iter()
1517
- .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1518
- .collect::<Vec<_>>(),
1519
- )),
1520
- "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1521
- "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1522
- "global" => Arc::new(BooleanArray::from(
1523
- rows.iter().map(|row| row.global).collect::<Vec<_>>(),
1524
- )) as ArrayRef,
1525
- "change_id" => string_array(rows.iter().map(|row| row.change_id.as_deref())),
1526
- "commit_id" => string_array(rows.iter().map(|row| row.commit_id.as_deref())),
1527
- "untracked" => Arc::new(BooleanArray::from(
1528
- rows.iter().map(|row| row.untracked).collect::<Vec<_>>(),
1529
- )) as ArrayRef,
1530
- "branch_id" => string_array(rows.iter().map(|row| Some(row.branch_id.as_str()))),
1531
- other => {
1532
- return Err(LixError::new(
1533
- "LIX_ERROR_UNKNOWN",
1534
- format!("sql2 does not support lix_state column '{other}'"),
1535
- ))
1536
- }
1537
- })
1538
- })
1539
- .collect::<Result<Vec<_>, _>>()?;
1540
-
1541
- RecordBatch::try_new(schema, columns).map_err(|error| {
1542
- LixError::new(
1543
- "LIX_ERROR_UNKNOWN",
1544
- format!("sql2 failed to build lix_state_by_branch batch: {error}"),
1545
- )
1546
- })
1547
- }
1548
-
1549
- fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
1550
- let values = values
1551
- .map(|value| value.map(ToOwned::to_owned))
1552
- .collect::<Vec<_>>();
1553
- Arc::new(StringArray::from(values)) as ArrayRef
1554
- }
1555
-
1556
- fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1557
- let Some(projection) = projection else {
1558
- return Ok(Arc::clone(schema));
1559
- };
1560
-
1561
- let projected = schema.project(projection).map_err(|error| {
1562
- DataFusionError::Execution(format!("sql2 failed to project lix_state schema: {error}"))
1563
- })?;
1564
- Ok(Arc::new(projected))
1565
- }
1566
-
1567
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1568
- crate::sql2::error::datafusion_error_to_lix_error(error)
1569
- }
1570
-
1571
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1572
- crate::sql2::error::lix_error_to_datafusion_error(error)
1573
- }
1574
-
1575
- #[cfg(test)]
1576
- mod tests {
1577
- use super::{
1578
- lix_state_scan_request, lix_state_schema, lix_state_write_rows_from_batch,
1579
- parse_lix_state_filter, register_lix_state_active_write_provider,
1580
- register_lix_state_by_branch_write_provider, LixStateByBranchRoute, LixStateDeleteExec,
1581
- LixStateFilterPredicate, LixStateInsertSink, LixStateProvider, LixStateUpdateExec,
1582
- };
1583
- use crate::binary_cas::BlobDataReader;
1584
- use crate::branch::{BranchHead, BranchRefReader};
1585
- use crate::functions::{
1586
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1587
- };
1588
- use crate::sql2::dml::{InsertExec, InsertSink};
1589
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1590
- use crate::transaction::types::{
1591
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
1592
- TransactionWriteRow,
1593
- };
1594
- use crate::{
1595
- entity_pk::EntityPk,
1596
- live_state::{
1597
- LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
1598
- },
1599
- };
1600
- use crate::{LixError, NullableKeyFilter};
1601
- use async_trait::async_trait;
1602
- use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray, UInt64Array};
1603
- use datafusion::arrow::datatypes::DataType;
1604
- use datafusion::arrow::record_batch::RecordBatch;
1605
- use datafusion::catalog::TableProvider;
1606
- use datafusion::common::{Column, DataFusionError};
1607
- use datafusion::execution::TaskContext;
1608
- use datafusion::logical_expr::dml::InsertOp;
1609
- use datafusion::logical_expr::expr::InList;
1610
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
1611
- use datafusion::physical_expr::EquivalenceProperties;
1612
- use datafusion::physical_plan::empty::EmptyExec;
1613
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
1614
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
1615
- use datafusion::physical_plan::{
1616
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
1617
- };
1618
- use datafusion::prelude::SessionContext;
1619
- use datafusion::scalar::ScalarValue;
1620
- use futures_util::stream;
1621
- use serde_json::json;
1622
- use std::collections::BTreeSet;
1623
- use std::sync::Arc;
1624
-
1625
- struct EmptyLiveStateReader;
1626
- struct EmptyBranchRefReader;
1627
- struct DummyBlobReader;
1628
-
1629
- #[derive(Default)]
1630
- struct DummyWriteContext {
1631
- rows: Vec<MaterializedLiveStateRow>,
1632
- }
1633
-
1634
- #[derive(Default)]
1635
- struct CapturingWriteContext {
1636
- rows: Vec<MaterializedLiveStateRow>,
1637
- writes: Vec<TransactionWrite>,
1638
- }
1639
-
1640
- struct SingleBatchExec {
1641
- batch: RecordBatch,
1642
- properties: Arc<PlanProperties>,
1643
- }
1644
-
1645
- impl std::fmt::Debug for SingleBatchExec {
1646
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1647
- f.debug_struct("SingleBatchExec").finish()
1648
- }
1649
- }
1650
-
1651
- impl SingleBatchExec {
1652
- fn new(batch: RecordBatch) -> Self {
1653
- let properties = PlanProperties::new(
1654
- EquivalenceProperties::new(batch.schema()),
1655
- Partitioning::UnknownPartitioning(1),
1656
- EmissionType::Incremental,
1657
- Boundedness::Bounded,
1658
- );
1659
- Self {
1660
- batch,
1661
- properties: Arc::new(properties),
1662
- }
1663
- }
1664
- }
1665
-
1666
- impl DisplayAs for SingleBatchExec {
1667
- fn fmt_as(
1668
- &self,
1669
- _t: DisplayFormatType,
1670
- f: &mut std::fmt::Formatter<'_>,
1671
- ) -> std::fmt::Result {
1672
- write!(f, "SingleBatchExec")
1673
- }
1674
- }
1675
-
1676
- impl ExecutionPlan for SingleBatchExec {
1677
- fn name(&self) -> &str {
1678
- "SingleBatchExec"
1679
- }
1680
-
1681
- fn as_any(&self) -> &dyn std::any::Any {
1682
- self
1683
- }
1684
-
1685
- fn properties(&self) -> &Arc<PlanProperties> {
1686
- &self.properties
1687
- }
1688
-
1689
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
1690
- Vec::new()
1691
- }
1692
-
1693
- fn with_new_children(
1694
- self: Arc<Self>,
1695
- children: Vec<Arc<dyn ExecutionPlan>>,
1696
- ) -> datafusion::common::Result<Arc<dyn ExecutionPlan>> {
1697
- if !children.is_empty() {
1698
- return Err(DataFusionError::Execution(
1699
- "SingleBatchExec does not accept children".to_string(),
1700
- ));
1701
- }
1702
- Ok(self)
1703
- }
1704
-
1705
- fn execute(
1706
- &self,
1707
- partition: usize,
1708
- _context: Arc<TaskContext>,
1709
- ) -> datafusion::common::Result<SendableRecordBatchStream> {
1710
- if partition != 0 {
1711
- return Err(DataFusionError::Execution(format!(
1712
- "SingleBatchExec only exposes one partition, got {partition}"
1713
- )));
1714
- }
1715
-
1716
- let batch = self.batch.clone();
1717
- let schema = batch.schema();
1718
- let stream = stream::iter(vec![Ok(batch)]);
1719
- Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
1720
- }
1721
- }
1722
-
1723
- #[async_trait]
1724
- impl LiveStateReader for EmptyLiveStateReader {
1725
- async fn scan_rows(
1726
- &self,
1727
- _request: &LiveStateScanRequest,
1728
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1729
- Ok(vec![])
1730
- }
1731
-
1732
- async fn load_row(
1733
- &self,
1734
- _request: &LiveStateRowRequest,
1735
- ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1736
- Ok(None)
1737
- }
1738
- }
1739
-
1740
- #[async_trait]
1741
- impl BranchRefReader for EmptyBranchRefReader {
1742
- async fn load_head(&self, _branch_id: &str) -> Result<Option<BranchHead>, LixError> {
1743
- Ok(None)
1744
- }
1745
-
1746
- async fn scan_heads(&self) -> Result<Vec<BranchHead>, LixError> {
1747
- Ok(Vec::new())
1748
- }
1749
- }
1750
-
1751
- fn empty_branch_ref() -> Arc<dyn BranchRefReader> {
1752
- Arc::new(EmptyBranchRefReader)
1753
- }
1754
-
1755
- fn test_functions() -> FunctionProviderHandle {
1756
- SharedFunctionProvider::new(
1757
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1758
- )
1759
- }
1760
-
1761
- #[async_trait]
1762
- impl BlobDataReader for DummyBlobReader {
1763
- async fn load_bytes_many(
1764
- &self,
1765
- hashes: &[crate::binary_cas::BlobHash],
1766
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1767
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
1768
- None;
1769
- hashes.len()
1770
- ]))
1771
- }
1772
- }
1773
-
1774
- #[async_trait]
1775
- impl SqlWriteExecutionContext for DummyWriteContext {
1776
- fn active_branch_id(&self) -> &str {
1777
- "branch-a"
1778
- }
1779
-
1780
- fn functions(&self) -> FunctionProviderHandle {
1781
- test_functions()
1782
- }
1783
-
1784
- fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1785
- Ok(Vec::new())
1786
- }
1787
-
1788
- async fn load_bytes_many(
1789
- &mut self,
1790
- hashes: &[crate::binary_cas::BlobHash],
1791
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1792
- DummyBlobReader.load_bytes_many(hashes).await
1793
- }
1794
-
1795
- async fn scan_live_state(
1796
- &mut self,
1797
- _request: &LiveStateScanRequest,
1798
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1799
- Ok(self.rows.clone())
1800
- }
1801
-
1802
- async fn load_branch_head(&mut self, branch_id: &str) -> Result<Option<String>, LixError> {
1803
- if branch_id == "ghost-branch" {
1804
- return Ok(None);
1805
- }
1806
- Ok(Some(format!("commit-{branch_id}")))
1807
- }
1808
-
1809
- async fn stage_write(
1810
- &mut self,
1811
- _write: TransactionWrite,
1812
- ) -> Result<TransactionWriteOutcome, LixError> {
1813
- Ok(TransactionWriteOutcome { count: 0 })
1814
- }
1815
- }
1816
-
1817
- #[async_trait]
1818
- impl SqlWriteExecutionContext for CapturingWriteContext {
1819
- fn active_branch_id(&self) -> &str {
1820
- "branch-a"
1821
- }
1822
-
1823
- fn functions(&self) -> FunctionProviderHandle {
1824
- test_functions()
1825
- }
1826
-
1827
- fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1828
- Ok(Vec::new())
1829
- }
1830
-
1831
- async fn load_bytes_many(
1832
- &mut self,
1833
- hashes: &[crate::binary_cas::BlobHash],
1834
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1835
- DummyBlobReader.load_bytes_many(hashes).await
1836
- }
1837
-
1838
- async fn scan_live_state(
1839
- &mut self,
1840
- _request: &LiveStateScanRequest,
1841
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1842
- Ok(self.rows.clone())
1843
- }
1844
-
1845
- async fn load_branch_head(&mut self, branch_id: &str) -> Result<Option<String>, LixError> {
1846
- if branch_id == "ghost-branch" {
1847
- return Ok(None);
1848
- }
1849
- Ok(Some(format!("commit-{branch_id}")))
1850
- }
1851
-
1852
- async fn stage_write(
1853
- &mut self,
1854
- write: TransactionWrite,
1855
- ) -> Result<TransactionWriteOutcome, LixError> {
1856
- self.writes.push(write);
1857
- Ok(TransactionWriteOutcome { count: 0 })
1858
- }
1859
- }
1860
-
1861
- fn col(name: &str) -> Expr {
1862
- Expr::Column(Column::from_name(name))
1863
- }
1864
-
1865
- fn str_lit(value: &str) -> Expr {
1866
- Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
1867
- }
1868
-
1869
- fn json_lit(value: &str) -> Expr {
1870
- Expr::Literal(
1871
- ScalarValue::Utf8(Some(value.to_string())),
1872
- Some(datafusion::common::metadata::FieldMetadata::new(
1873
- std::collections::BTreeMap::from([(
1874
- crate::sql2::result_metadata::LIX_VALUE_TYPE_METADATA_KEY.to_string(),
1875
- crate::sql2::result_metadata::LIX_VALUE_TYPE_JSON.to_string(),
1876
- )]),
1877
- )),
1878
- )
1879
- }
1880
-
1881
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
1882
- Arc::new(StringArray::from(values)) as ArrayRef
1883
- }
1884
-
1885
- fn one_row_lix_state_batch(global: bool) -> RecordBatch {
1886
- RecordBatch::try_new(
1887
- lix_state_schema(),
1888
- vec![
1889
- string_column(vec![Some("[\"entity-1\"]")]),
1890
- string_column(vec![Some("lix_key_value")]),
1891
- string_column(vec![None]),
1892
- string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1893
- string_column(vec![Some("{\"source\":\"test\"}")]),
1894
- string_column(vec![Some("2026-04-23T00:00:00Z")]),
1895
- string_column(vec![Some("2026-04-23T01:00:00Z")]),
1896
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
1897
- string_column(vec![Some("change-a")]),
1898
- string_column(vec![None]),
1899
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1900
- ],
1901
- )
1902
- .expect("valid lix_state batch")
1903
- }
1904
-
1905
- fn one_row_stageable_lix_state_batch() -> RecordBatch {
1906
- RecordBatch::try_new(
1907
- lix_state_schema(),
1908
- vec![
1909
- string_column(vec![Some("[\"entity-1\"]")]),
1910
- string_column(vec![Some("lix_key_value")]),
1911
- string_column(vec![None]),
1912
- string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1913
- string_column(vec![None]),
1914
- string_column(vec![None]),
1915
- string_column(vec![None]),
1916
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1917
- string_column(vec![None]),
1918
- string_column(vec![None]),
1919
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
1920
- ],
1921
- )
1922
- .expect("valid stageable lix_state batch")
1923
- }
1924
-
1925
- fn live_row(entity_pk: &str, metadata: Option<&str>) -> MaterializedLiveStateRow {
1926
- MaterializedLiveStateRow {
1927
- entity_pk: EntityPk::single(entity_pk),
1928
- schema_key: "lix_key_value".to_string(),
1929
- file_id: None,
1930
- snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
1931
- metadata: metadata.map(str::to_string),
1932
- deleted: false,
1933
- branch_id: "branch-a".to_string(),
1934
- change_id: Some(format!("change-{entity_pk}")),
1935
- commit_id: Some(format!("commit-{entity_pk}")),
1936
- global: false,
1937
- untracked: false,
1938
- created_at: "2026-04-23T00:00:00Z".to_string(),
1939
- updated_at: "2026-04-23T01:00:00Z".to_string(),
1940
- }
1941
- }
1942
-
1943
- #[test]
1944
- fn parses_eq_filter_for_schema_key() {
1945
- let expr = Expr::BinaryExpr(BinaryExpr::new(
1946
- Box::new(col("schema_key")),
1947
- Operator::Eq,
1948
- Box::new(str_lit("profile")),
1949
- ));
1950
-
1951
- assert_eq!(
1952
- parse_lix_state_filter(&expr),
1953
- Some(LixStateFilterPredicate::SchemaKeys(BTreeSet::from([
1954
- "profile".to_string(),
1955
- ])))
1956
- );
1957
- }
1958
-
1959
- #[test]
1960
- fn parses_in_list_filter_for_branch_id() {
1961
- let expr = Expr::InList(InList::new(
1962
- Box::new(col("branch_id")),
1963
- vec![str_lit("a"), str_lit("b")],
1964
- false,
1965
- ));
1966
-
1967
- assert_eq!(
1968
- parse_lix_state_filter(&expr),
1969
- Some(LixStateFilterPredicate::BranchIds(BTreeSet::from([
1970
- "a".to_string(),
1971
- "b".to_string(),
1972
- ])))
1973
- );
1974
- }
1975
-
1976
- #[test]
1977
- fn builds_scan_request_from_route_and_projection() {
1978
- let schema = super::lix_state_by_branch_schema();
1979
- let route = LixStateByBranchRoute::from_filters(&[
1980
- Expr::BinaryExpr(BinaryExpr::new(
1981
- Box::new(col("schema_key")),
1982
- Operator::Eq,
1983
- Box::new(str_lit("profile")),
1984
- )),
1985
- Expr::BinaryExpr(BinaryExpr::new(
1986
- Box::new(col("branch_id")),
1987
- Operator::Eq,
1988
- Box::new(str_lit("v1")),
1989
- )),
1990
- Expr::IsNull(Box::new(col("file_id"))),
1991
- ]);
1992
-
1993
- let request =
1994
- lix_state_scan_request(&schema, None, Some(&vec![0, 1, 11]), &route, Some(10));
1995
-
1996
- assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
1997
- assert_eq!(request.filter.branch_ids, vec!["v1".to_string()]);
1998
- assert_eq!(request.filter.file_ids, vec![NullableKeyFilter::Null]);
1999
- assert_eq!(
2000
- request.projection.columns,
2001
- vec![
2002
- "entity_pk".to_string(),
2003
- "schema_key".to_string(),
2004
- "branch_id".to_string()
2005
- ]
2006
- );
2007
- assert_eq!(request.limit, Some(10));
2008
- }
2009
-
2010
- #[test]
2011
- fn builds_route_from_and_filter_tree() {
2012
- let route = LixStateByBranchRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
2013
- Box::new(Expr::BinaryExpr(BinaryExpr::new(
2014
- Box::new(col("entity_pk")),
2015
- Operator::Eq,
2016
- Box::new(str_lit("[\"entity-a\"]")),
2017
- ))),
2018
- Operator::And,
2019
- Box::new(Expr::InList(InList::new(
2020
- Box::new(col("branch_id")),
2021
- vec![str_lit("branch-a"), str_lit("global")],
2022
- false,
2023
- ))),
2024
- ))]);
2025
-
2026
- assert_eq!(
2027
- route.entity_pks,
2028
- Some(BTreeSet::from(["[\"entity-a\"]".to_string()]))
2029
- );
2030
- assert_eq!(
2031
- route.branch_ids,
2032
- Some(BTreeSet::from([
2033
- "global".to_string(),
2034
- "branch-a".to_string()
2035
- ]))
2036
- );
2037
- }
2038
-
2039
- #[test]
2040
- fn contradictory_filters_turn_into_zero_limit_request() {
2041
- let schema = super::lix_state_by_branch_schema();
2042
- let route = LixStateByBranchRoute::from_filters(&[
2043
- Expr::BinaryExpr(BinaryExpr::new(
2044
- Box::new(col("schema_key")),
2045
- Operator::Eq,
2046
- Box::new(str_lit("a")),
2047
- )),
2048
- Expr::BinaryExpr(BinaryExpr::new(
2049
- Box::new(col("schema_key")),
2050
- Operator::Eq,
2051
- Box::new(str_lit("b")),
2052
- )),
2053
- ]);
2054
-
2055
- let request = lix_state_scan_request(&schema, None, None, &route, None);
2056
-
2057
- assert_eq!(
2058
- request.filter.rows,
2059
- crate::live_state::LiveStateRowFilter::None
2060
- );
2061
- assert_eq!(request.limit, None);
2062
- assert!(request.filter.schema_keys.is_empty());
2063
- }
2064
-
2065
- #[tokio::test]
2066
- async fn active_provider_contradictory_filters_still_validate_active_head() {
2067
- let provider = LixStateProvider::active_branch(
2068
- "missing-branch",
2069
- Arc::new(EmptyLiveStateReader),
2070
- empty_branch_ref(),
2071
- );
2072
- let session = SessionContext::new();
2073
- let filters = vec![
2074
- Expr::BinaryExpr(BinaryExpr::new(
2075
- Box::new(col("schema_key")),
2076
- Operator::Eq,
2077
- Box::new(str_lit("a")),
2078
- )),
2079
- Expr::BinaryExpr(BinaryExpr::new(
2080
- Box::new(col("schema_key")),
2081
- Operator::Eq,
2082
- Box::new(str_lit("b")),
2083
- )),
2084
- ];
2085
-
2086
- let error = provider
2087
- .scan(&session.state(), None, &filters, None)
2088
- .await
2089
- .expect_err("missing active branch should be checked before zero-row scan");
2090
- let error = super::datafusion_error_to_lix_error(error);
2091
-
2092
- assert_eq!(error.code, LixError::CODE_BRANCH_NOT_FOUND);
2093
- assert!(error
2094
- .message
2095
- .contains("branch 'missing-branch' was not found"));
2096
- }
2097
-
2098
- #[test]
2099
- fn active_branch_view_pins_branch_filter() {
2100
- let schema = super::lix_state_schema();
2101
- let route = LixStateByBranchRoute::from_filters(&[Expr::BinaryExpr(BinaryExpr::new(
2102
- Box::new(col("schema_key")),
2103
- Operator::Eq,
2104
- Box::new(str_lit("profile")),
2105
- ))]);
2106
-
2107
- let request = lix_state_scan_request(&schema, Some("branch-a"), None, &route, None);
2108
-
2109
- assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
2110
- assert_eq!(request.filter.branch_ids, vec!["branch-a".to_string()]);
2111
- }
2112
-
2113
- #[tokio::test]
2114
- async fn registers_active_lix_state_with_write_context_only() {
2115
- let session = SessionContext::new();
2116
- let mut write_context = DummyWriteContext::default();
2117
- let write_ctx = SqlWriteContext::new(&mut write_context);
2118
-
2119
- register_lix_state_active_write_provider(&session, "lix_state", write_ctx.clone())
2120
- .await
2121
- .expect("lix_state provider should register");
2122
- register_lix_state_by_branch_write_provider(&session, "lix_state_by_branch", write_ctx)
2123
- .await
2124
- .expect("lix_state_by_branch provider should register");
2125
-
2126
- let lix_state = session
2127
- .table_provider("lix_state")
2128
- .await
2129
- .expect("lix_state provider should exist");
2130
- let lix_state = lix_state
2131
- .as_any()
2132
- .downcast_ref::<LixStateProvider>()
2133
- .expect("lix_state should be a LixStateProvider");
2134
- assert!(lix_state.write_access.is_write());
2135
-
2136
- let by_branch = session
2137
- .table_provider("lix_state_by_branch")
2138
- .await
2139
- .expect("lix_state_by_branch provider should exist");
2140
- let by_branch = by_branch
2141
- .as_any()
2142
- .downcast_ref::<LixStateProvider>()
2143
- .expect("lix_state_by_branch should be a LixStateProvider");
2144
- assert!(by_branch.write_access.is_write());
2145
- }
2146
-
2147
- #[tokio::test]
2148
- async fn insert_into_requires_write_transaction() {
2149
- let session = SessionContext::new();
2150
- let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2151
- let provider = LixStateProvider::active_branch("branch-a", live_state, empty_branch_ref());
2152
- let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2153
-
2154
- let error = provider
2155
- .insert_into(&session.state(), input, InsertOp::Append)
2156
- .await
2157
- .expect_err("insert without a write context should fail");
2158
-
2159
- assert!(
2160
- error.to_string().contains("requires a write transaction"),
2161
- "unexpected error: {error}"
2162
- );
2163
- }
2164
-
2165
- #[tokio::test]
2166
- async fn update_requires_write_transaction() {
2167
- let session = SessionContext::new();
2168
- let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2169
- let provider = LixStateProvider::active_branch("branch-a", live_state, empty_branch_ref());
2170
-
2171
- let error = provider
2172
- .update(
2173
- &session.state(),
2174
- vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2175
- vec![],
2176
- )
2177
- .await
2178
- .expect_err("update without a write context should fail");
2179
-
2180
- assert!(
2181
- error.to_string().contains("requires a write transaction"),
2182
- "unexpected error: {error}"
2183
- );
2184
- }
2185
-
2186
- #[tokio::test]
2187
- async fn delete_requires_write_transaction() {
2188
- let session = SessionContext::new();
2189
- let live_state = Arc::new(EmptyLiveStateReader) as Arc<dyn LiveStateReader>;
2190
- let provider = LixStateProvider::active_branch("branch-a", live_state, empty_branch_ref());
2191
-
2192
- let error = provider
2193
- .delete_from(&session.state(), vec![])
2194
- .await
2195
- .expect_err("delete without a write context should fail");
2196
-
2197
- assert!(
2198
- error.to_string().contains("requires a write transaction"),
2199
- "unexpected error: {error}"
2200
- );
2201
- }
2202
-
2203
- #[tokio::test]
2204
- async fn delete_returns_lix_state_delete_exec_with_write_ctx() {
2205
- let session = SessionContext::new();
2206
- let mut write_context = DummyWriteContext::default();
2207
- let write_ctx = SqlWriteContext::new(&mut write_context);
2208
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2209
-
2210
- let plan = provider
2211
- .delete_from(&session.state(), vec![])
2212
- .await
2213
- .expect("delete should produce a write plan");
2214
-
2215
- assert!(plan.as_any().is::<LixStateDeleteExec>());
2216
- }
2217
-
2218
- #[tokio::test]
2219
- async fn update_rejects_read_only_lix_state_columns() {
2220
- let session = SessionContext::new();
2221
- let mut write_context = DummyWriteContext::default();
2222
- let write_ctx = SqlWriteContext::new(&mut write_context);
2223
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2224
-
2225
- let error = provider
2226
- .update(
2227
- &session.state(),
2228
- vec![("entity_pk".to_string(), str_lit("entity-2"))],
2229
- vec![],
2230
- )
2231
- .await
2232
- .expect_err("updating a read-only field should fail");
2233
-
2234
- assert!(
2235
- error.to_string().contains("read-only column 'entity_pk'"),
2236
- "unexpected error: {error}"
2237
- );
2238
- }
2239
-
2240
- #[tokio::test]
2241
- async fn update_returns_lix_state_update_exec_with_write_ctx() {
2242
- let session = SessionContext::new();
2243
- let mut write_context = DummyWriteContext::default();
2244
- let write_ctx = SqlWriteContext::new(&mut write_context);
2245
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2246
-
2247
- let plan = provider
2248
- .update(
2249
- &session.state(),
2250
- vec![("metadata".to_string(), str_lit("{\"source\":\"update\"}"))],
2251
- vec![],
2252
- )
2253
- .await
2254
- .expect("update should produce a write plan");
2255
-
2256
- assert!(plan.as_any().is::<LixStateUpdateExec>());
2257
- }
2258
-
2259
- #[tokio::test]
2260
- async fn insert_into_returns_data_sink_exec_with_write_ctx() {
2261
- let session = SessionContext::new();
2262
- let mut write_context = DummyWriteContext::default();
2263
- let write_ctx = SqlWriteContext::new(&mut write_context);
2264
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2265
- let input = Arc::new(EmptyExec::new(provider.schema())) as Arc<dyn ExecutionPlan>;
2266
-
2267
- let plan = provider
2268
- .insert_into(&session.state(), input, InsertOp::Append)
2269
- .await
2270
- .expect("insert should produce a write plan");
2271
-
2272
- assert!(plan.as_any().is::<InsertExec>());
2273
- }
2274
-
2275
- #[test]
2276
- fn decodes_lix_state_batch_into_write_rows() {
2277
- let rows = lix_state_write_rows_from_batch(
2278
- &one_row_lix_state_batch(false),
2279
- Some("branch-a"),
2280
- "INSERT into lix_state",
2281
- )
2282
- .expect("batch should decode");
2283
-
2284
- assert_eq!(
2285
- rows,
2286
- vec![TransactionWriteRow {
2287
- entity_pk: Some(crate::entity_pk::EntityPk::single("entity-1")),
2288
- schema_key: "lix_key_value".to_string(),
2289
- file_id: None,
2290
- snapshot: Some(TransactionJson::from_value_for_test(
2291
- json!({"key":"hello","value":"world"})
2292
- )),
2293
- metadata: Some(TransactionJson::from_value_for_test(
2294
- json!({"source": "test"})
2295
- )),
2296
- origin: None,
2297
- created_at: Some("2026-04-23T00:00:00Z".to_string()),
2298
- updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2299
- global: false,
2300
- change_id: Some("change-a".to_string()),
2301
- commit_id: None,
2302
- untracked: false,
2303
- branch_id: "branch-a".to_string(),
2304
- }]
2305
- );
2306
- }
2307
-
2308
- #[test]
2309
- fn decodes_global_lix_state_batch_into_global_branch() {
2310
- let rows = lix_state_write_rows_from_batch(
2311
- &one_row_lix_state_batch(true),
2312
- Some("branch-a"),
2313
- "INSERT into lix_state",
2314
- )
2315
- .expect("batch should decode");
2316
-
2317
- assert_eq!(rows[0].branch_id, "global");
2318
- assert!(rows[0].global);
2319
- }
2320
-
2321
- #[tokio::test]
2322
- async fn insert_sink_stages_decoded_lix_state_rows() {
2323
- let mut write_context = CapturingWriteContext::default();
2324
- let write_ctx = SqlWriteContext::new(&mut write_context);
2325
- let sink = LixStateInsertSink::new(write_ctx, Some("branch-a".to_string()));
2326
- let batch = one_row_lix_state_batch(false);
2327
- let count = sink
2328
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2329
- .await
2330
- .expect("sink should stage write");
2331
-
2332
- assert_eq!(count, 1);
2333
- assert_eq!(
2334
- write_context.writes.as_slice(),
2335
- &[TransactionWrite::Rows {
2336
- mode: TransactionWriteMode::Insert,
2337
- rows: vec![TransactionWriteRow {
2338
- entity_pk: Some(crate::entity_pk::EntityPk::single("entity-1")),
2339
- schema_key: "lix_key_value".to_string(),
2340
- file_id: None,
2341
- snapshot: Some(TransactionJson::from_value_for_test(
2342
- json!({"key":"hello","value":"world"})
2343
- )),
2344
- metadata: Some(TransactionJson::from_value_for_test(
2345
- json!({"source": "test"})
2346
- )),
2347
- origin: None,
2348
- created_at: Some("2026-04-23T00:00:00Z".to_string()),
2349
- updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2350
- global: false,
2351
- change_id: Some("change-a".to_string()),
2352
- commit_id: None,
2353
- untracked: false,
2354
- branch_id: "branch-a".to_string(),
2355
- }]
2356
- }]
2357
- );
2358
- }
2359
-
2360
- #[tokio::test]
2361
- async fn insert_plan_returns_datafusion_count_uint64() {
2362
- let session = SessionContext::new();
2363
- let mut write_context = CapturingWriteContext::default();
2364
- let write_ctx = SqlWriteContext::new(&mut write_context);
2365
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2366
- let input = Arc::new(SingleBatchExec::new(one_row_stageable_lix_state_batch()))
2367
- as Arc<dyn ExecutionPlan>;
2368
-
2369
- let plan = provider
2370
- .insert_into(&session.state(), input, InsertOp::Append)
2371
- .await
2372
- .expect("insert should produce a write plan");
2373
- let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2374
- .await
2375
- .expect("insert write plan should execute");
2376
-
2377
- assert_eq!(batches.len(), 1);
2378
- assert_eq!(batches[0].num_rows(), 1);
2379
- assert_eq!(batches[0].num_columns(), 1);
2380
- assert_eq!(batches[0].schema().field(0).name(), "count");
2381
- assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2382
- assert!(!batches[0].schema().field(0).is_nullable());
2383
-
2384
- let count = batches[0]
2385
- .column(0)
2386
- .as_any()
2387
- .downcast_ref::<UInt64Array>()
2388
- .expect("count should be UInt64");
2389
- assert_eq!(count.value(0), 1);
2390
- assert_eq!(write_context.writes.len(), 1);
2391
- }
2392
-
2393
- #[tokio::test]
2394
- async fn update_plan_evaluates_filters_assignments_and_stages_rows() {
2395
- let session = SessionContext::new();
2396
- let mut write_context = CapturingWriteContext {
2397
- rows: vec![
2398
- live_row("entity-1", Some("{\"source\":\"match\"}")),
2399
- live_row("entity-2", Some("{\"source\":\"skip\"}")),
2400
- ],
2401
- writes: Vec::new(),
2402
- };
2403
- let write_ctx = SqlWriteContext::new(&mut write_context);
2404
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2405
-
2406
- let plan = provider
2407
- .update(
2408
- &session.state(),
2409
- vec![
2410
- (
2411
- "snapshot_content".to_string(),
2412
- str_lit("{\"key\":\"hello\",\"value\":\"updated\"}"),
2413
- ),
2414
- (
2415
- "metadata".to_string(),
2416
- str_lit("{\"schema_key\":\"lix_key_value\"}"),
2417
- ),
2418
- ],
2419
- vec![Expr::BinaryExpr(BinaryExpr::new(
2420
- Box::new(col("metadata")),
2421
- Operator::Eq,
2422
- Box::new(json_lit("{\"source\":\"match\"}")),
2423
- ))],
2424
- )
2425
- .await
2426
- .expect("update should produce a write plan");
2427
- let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2428
- .await
2429
- .expect("update write plan should execute");
2430
-
2431
- assert_eq!(batches.len(), 1);
2432
- assert_eq!(batches[0].schema().field(0).name(), "count");
2433
- assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2434
- let count = batches[0]
2435
- .column(0)
2436
- .as_any()
2437
- .downcast_ref::<UInt64Array>()
2438
- .expect("count should be UInt64");
2439
- assert_eq!(count.value(0), 1);
2440
-
2441
- assert_eq!(
2442
- write_context.writes.as_slice(),
2443
- &[TransactionWrite::Rows {
2444
- mode: TransactionWriteMode::Replace,
2445
- rows: vec![TransactionWriteRow {
2446
- entity_pk: Some(crate::entity_pk::EntityPk::single("entity-1")),
2447
- schema_key: "lix_key_value".to_string(),
2448
- file_id: None,
2449
- snapshot: Some(TransactionJson::from_value_for_test(
2450
- json!({"key":"hello","value":"updated"})
2451
- )),
2452
- metadata: Some(TransactionJson::from_value_for_test(
2453
- json!({"schema_key": "lix_key_value"})
2454
- )),
2455
- origin: None,
2456
- created_at: None,
2457
- updated_at: None,
2458
- global: false,
2459
- change_id: None,
2460
- commit_id: None,
2461
- untracked: false,
2462
- branch_id: "branch-a".to_string(),
2463
- }]
2464
- }]
2465
- );
2466
- }
2467
-
2468
- #[tokio::test]
2469
- async fn delete_plan_with_empty_filters_stages_all_visible_rows() {
2470
- let session = SessionContext::new();
2471
- let mut write_context = CapturingWriteContext {
2472
- rows: vec![
2473
- live_row("entity-1", Some("{\"source\":\"one\"}")),
2474
- live_row("entity-2", Some("{\"source\":\"two\"}")),
2475
- ],
2476
- writes: Vec::new(),
2477
- };
2478
- let write_ctx = SqlWriteContext::new(&mut write_context);
2479
- let provider = LixStateProvider::active_branch_with_write(write_ctx);
2480
-
2481
- let plan = provider
2482
- .delete_from(&session.state(), vec![])
2483
- .await
2484
- .expect("delete should produce a write plan");
2485
- let batches = datafusion::physical_plan::collect(plan, Arc::new(TaskContext::default()))
2486
- .await
2487
- .expect("delete write plan should execute");
2488
-
2489
- assert_eq!(batches.len(), 1);
2490
- assert_eq!(batches[0].schema().field(0).name(), "count");
2491
- assert_eq!(batches[0].schema().field(0).data_type(), &DataType::UInt64);
2492
- let count = batches[0]
2493
- .column(0)
2494
- .as_any()
2495
- .downcast_ref::<UInt64Array>()
2496
- .expect("count should be UInt64");
2497
- assert_eq!(count.value(0), 2);
2498
-
2499
- assert_eq!(
2500
- write_context.writes.as_slice(),
2501
- &[TransactionWrite::Rows {
2502
- mode: TransactionWriteMode::Replace,
2503
- rows: vec![
2504
- TransactionWriteRow {
2505
- entity_pk: Some(crate::entity_pk::EntityPk::single("entity-1")),
2506
- schema_key: "lix_key_value".to_string(),
2507
- file_id: None,
2508
- snapshot: None,
2509
- metadata: Some(TransactionJson::from_value_for_test(
2510
- json!({"source": "one"})
2511
- )),
2512
- origin: None,
2513
- created_at: None,
2514
- updated_at: None,
2515
- global: false,
2516
- change_id: None,
2517
- commit_id: None,
2518
- untracked: false,
2519
- branch_id: "branch-a".to_string(),
2520
- },
2521
- TransactionWriteRow {
2522
- entity_pk: Some(crate::entity_pk::EntityPk::single("entity-2")),
2523
- schema_key: "lix_key_value".to_string(),
2524
- file_id: None,
2525
- snapshot: None,
2526
- metadata: Some(TransactionJson::from_value_for_test(
2527
- json!({"source": "two"})
2528
- )),
2529
- origin: None,
2530
- created_at: None,
2531
- updated_at: None,
2532
- global: false,
2533
- change_id: None,
2534
- commit_id: None,
2535
- untracked: false,
2536
- branch_id: "branch-a".to_string(),
2537
- },
2538
- ]
2539
- }]
2540
- );
2541
- }
2542
- }