@lix-js/sdk 0.6.0-preview.2 → 0.6.0-preview.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/SKILL.md +46 -8
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +25 -1
  3. package/dist/engine-wasm/wasm/lix_engine.js +60 -2
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +5 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +10 -3
  9. package/dist/open-lix.js +39 -0
  10. package/dist-engine-src/src/binary_cas/types.rs +0 -6
  11. package/dist-engine-src/src/catalog/context.rs +412 -0
  12. package/dist-engine-src/src/catalog/mod.rs +10 -0
  13. package/dist-engine-src/src/catalog/schema.rs +4 -0
  14. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  15. package/dist-engine-src/src/cel/mod.rs +1 -1
  16. package/dist-engine-src/src/cel/provider.rs +1 -1
  17. package/dist-engine-src/src/commit_graph/context.rs +328 -1015
  18. package/dist-engine-src/src/commit_graph/mod.rs +2 -3
  19. package/dist-engine-src/src/commit_graph/types.rs +7 -43
  20. package/dist-engine-src/src/commit_graph/walker.rs +57 -81
  21. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  22. package/dist-engine-src/src/commit_store/context.rs +944 -0
  23. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  24. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  25. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  26. package/dist-engine-src/src/commit_store/types.rs +215 -0
  27. package/dist-engine-src/src/common/identity.rs +15 -5
  28. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  29. package/dist-engine-src/src/common/metadata.rs +17 -12
  30. package/dist-engine-src/src/common/mod.rs +5 -5
  31. package/dist-engine-src/src/domain.rs +324 -0
  32. package/dist-engine-src/src/engine.rs +29 -43
  33. package/dist-engine-src/src/entity_identity.rs +238 -118
  34. package/dist-engine-src/src/functions/context.rs +17 -52
  35. package/dist-engine-src/src/functions/deterministic.rs +1 -1
  36. package/dist-engine-src/src/functions/mod.rs +1 -1
  37. package/dist-engine-src/src/functions/provider.rs +4 -4
  38. package/dist-engine-src/src/functions/state.rs +39 -66
  39. package/dist-engine-src/src/functions/types.rs +1 -1
  40. package/dist-engine-src/src/init.rs +204 -151
  41. package/dist-engine-src/src/json_store/context.rs +354 -60
  42. package/dist-engine-src/src/json_store/encoded.rs +6 -6
  43. package/dist-engine-src/src/json_store/mod.rs +4 -1
  44. package/dist-engine-src/src/json_store/store.rs +884 -11
  45. package/dist-engine-src/src/json_store/types.rs +166 -1
  46. package/dist-engine-src/src/lib.rs +11 -10
  47. package/dist-engine-src/src/live_state/context.rs +608 -830
  48. package/dist-engine-src/src/live_state/mod.rs +3 -3
  49. package/dist-engine-src/src/live_state/overlay.rs +7 -7
  50. package/dist-engine-src/src/live_state/reader.rs +5 -5
  51. package/dist-engine-src/src/live_state/types.rs +19 -36
  52. package/dist-engine-src/src/live_state/visibility.rs +19 -14
  53. package/dist-engine-src/src/plugin/archive.rs +3 -6
  54. package/dist-engine-src/src/plugin/install.rs +0 -18
  55. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -1
  56. package/dist-engine-src/src/schema/annotations/defaults.rs +2 -7
  57. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -1
  58. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -1
  59. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -1
  60. package/dist-engine-src/src/schema/builtin/lix_change.json +11 -10
  61. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -1
  62. package/dist-engine-src/src/schema/builtin/lix_commit.json +8 -46
  63. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +29 -22
  64. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -1
  65. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -1
  66. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -1
  67. package/dist-engine-src/src/schema/builtin/lix_label.json +10 -3
  68. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  69. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +2 -8
  70. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -1
  71. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -1
  72. package/dist-engine-src/src/schema/builtin/mod.rs +10 -59
  73. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  74. package/dist-engine-src/src/schema/definition.json +47 -17
  75. package/dist-engine-src/src/schema/definition.rs +202 -96
  76. package/dist-engine-src/src/schema/key.rs +9 -77
  77. package/dist-engine-src/src/schema/mod.rs +4 -4
  78. package/dist-engine-src/src/schema/tests.rs +133 -92
  79. package/dist-engine-src/src/session/context.rs +86 -48
  80. package/dist-engine-src/src/session/create_version.rs +22 -14
  81. package/dist-engine-src/src/session/execute.rs +117 -23
  82. package/dist-engine-src/src/session/merge/apply.rs +4 -4
  83. package/dist-engine-src/src/session/merge/conflicts.rs +3 -2
  84. package/dist-engine-src/src/session/merge/stats.rs +1 -1
  85. package/dist-engine-src/src/session/merge/version.rs +35 -45
  86. package/dist-engine-src/src/session/mod.rs +9 -7
  87. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  88. package/dist-engine-src/src/session/switch_version.rs +17 -28
  89. package/dist-engine-src/src/session/transaction.rs +76 -0
  90. package/dist-engine-src/src/sql2/change_provider.rs +14 -20
  91. package/dist-engine-src/src/sql2/classify.rs +75 -48
  92. package/dist-engine-src/src/sql2/context.rs +22 -18
  93. package/dist-engine-src/src/sql2/directory_history_provider.rs +28 -20
  94. package/dist-engine-src/src/sql2/directory_provider.rs +131 -83
  95. package/dist-engine-src/src/sql2/entity_history_provider.rs +10 -14
  96. package/dist-engine-src/src/sql2/entity_provider.rs +680 -169
  97. package/dist-engine-src/src/sql2/error.rs +24 -5
  98. package/dist-engine-src/src/sql2/execute.rs +426 -272
  99. package/dist-engine-src/src/sql2/file_history_provider.rs +29 -21
  100. package/dist-engine-src/src/sql2/file_provider.rs +533 -108
  101. package/dist-engine-src/src/sql2/filesystem_planner.rs +58 -94
  102. package/dist-engine-src/src/sql2/filesystem_visibility.rs +37 -23
  103. package/dist-engine-src/src/sql2/history_projection.rs +3 -27
  104. package/dist-engine-src/src/sql2/history_provider.rs +11 -17
  105. package/dist-engine-src/src/sql2/history_route.rs +22 -8
  106. package/dist-engine-src/src/sql2/lix_state_provider.rs +178 -96
  107. package/dist-engine-src/src/sql2/mod.rs +8 -4
  108. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  109. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  110. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  111. package/dist-engine-src/src/sql2/public_bind/dml.rs +172 -0
  112. package/dist-engine-src/src/sql2/public_bind/mod.rs +26 -0
  113. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  114. package/dist-engine-src/src/sql2/read_only.rs +10 -12
  115. package/dist-engine-src/src/sql2/session.rs +7 -10
  116. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  117. package/dist-engine-src/src/sql2/udfs/mod.rs +8 -1
  118. package/dist-engine-src/src/sql2/udfs/public_call.rs +238 -0
  119. package/dist-engine-src/src/sql2/version_provider.rs +46 -31
  120. package/dist-engine-src/src/sql2/version_scope.rs +4 -4
  121. package/dist-engine-src/src/storage_bench.rs +1782 -325
  122. package/dist-engine-src/src/test_support.rs +183 -36
  123. package/dist-engine-src/src/tracked_state/by_file_index.rs +20 -24
  124. package/dist-engine-src/src/tracked_state/codec.rs +1519 -181
  125. package/dist-engine-src/src/tracked_state/context.rs +1155 -271
  126. package/dist-engine-src/src/tracked_state/diff.rs +249 -57
  127. package/dist-engine-src/src/tracked_state/materialization.rs +365 -103
  128. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  129. package/dist-engine-src/src/tracked_state/merge.rs +37 -19
  130. package/dist-engine-src/src/tracked_state/mod.rs +8 -7
  131. package/dist-engine-src/src/tracked_state/storage.rs +138 -6
  132. package/dist-engine-src/src/tracked_state/tree.rs +695 -252
  133. package/dist-engine-src/src/tracked_state/types.rs +176 -6
  134. package/dist-engine-src/src/transaction/commit.rs +695 -435
  135. package/dist-engine-src/src/transaction/context.rs +551 -310
  136. package/dist-engine-src/src/transaction/live_state_overlay.rs +9 -8
  137. package/dist-engine-src/src/transaction/mod.rs +2 -0
  138. package/dist-engine-src/src/transaction/normalization.rs +311 -447
  139. package/dist-engine-src/src/transaction/prep.rs +37 -0
  140. package/dist-engine-src/src/transaction/schema_resolver.rs +93 -71
  141. package/dist-engine-src/src/transaction/staging.rs +701 -406
  142. package/dist-engine-src/src/transaction/types.rs +231 -122
  143. package/dist-engine-src/src/transaction/validation.rs +2717 -1698
  144. package/dist-engine-src/src/untracked_state/codec.rs +40 -96
  145. package/dist-engine-src/src/untracked_state/context.rs +21 -5
  146. package/dist-engine-src/src/untracked_state/materialization.rs +10 -104
  147. package/dist-engine-src/src/untracked_state/mod.rs +3 -5
  148. package/dist-engine-src/src/untracked_state/storage.rs +105 -57
  149. package/dist-engine-src/src/untracked_state/types.rs +63 -13
  150. package/dist-engine-src/src/version/context.rs +1 -13
  151. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  152. package/dist-engine-src/src/version/mod.rs +3 -2
  153. package/dist-engine-src/src/version/refs.rs +12 -103
  154. package/dist-engine-src/src/version/stage_rows.rs +15 -19
  155. package/package.json +1 -1
  156. package/dist-engine-src/src/changelog/codec.rs +0 -321
  157. package/dist-engine-src/src/changelog/context.rs +0 -92
  158. package/dist-engine-src/src/changelog/materialization.rs +0 -121
  159. package/dist-engine-src/src/changelog/mod.rs +0 -13
  160. package/dist-engine-src/src/changelog/reader.rs +0 -20
  161. package/dist-engine-src/src/changelog/storage.rs +0 -220
  162. package/dist-engine-src/src/changelog/types.rs +0 -38
  163. package/dist-engine-src/src/schema/builtin/lix_change_set.json +0 -18
  164. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +0 -75
  165. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +0 -63
  166. package/dist-engine-src/src/schema_registry.rs +0 -294
  167. package/dist-engine-src/src/sql2/commit_derived_provider.rs +0 -591
  168. package/dist-engine-src/src/tracked_state/rebuild.rs +0 -771
  169. package/dist-engine-src/src/tracked_state/tree_types.rs +0 -176
@@ -23,9 +23,10 @@ use datafusion::physical_plan::{
23
23
  use datafusion::prelude::SessionContext;
24
24
  use datafusion::scalar::ScalarValue;
25
25
  use futures_util::{stream, TryStreamExt};
26
+ use serde_json::Value as JsonValue;
26
27
 
27
28
  use crate::entity_identity::EntityIdentity;
28
- use crate::live_state::LiveStateRow;
29
+ use crate::live_state::MaterializedLiveStateRow;
29
30
  use crate::live_state::{
30
31
  LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
31
32
  };
@@ -33,16 +34,17 @@ use crate::sql2::dml::{InsertExec, InsertSink};
33
34
  use crate::sql2::read_only::reject_read_only_stage_rows;
34
35
  use crate::sql2::version_scope::{resolve_provider_version_ids, VersionBinding};
35
36
  use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
36
- use crate::transaction::types::StageRow;
37
+ use crate::transaction::types::{TransactionJson, TransactionWriteRow};
37
38
  use crate::version::VersionRefReader;
38
39
  use crate::GLOBAL_VERSION_ID;
39
- use crate::{parse_row_metadata, serialize_row_metadata, LixError, NullableKeyFilter, RowMetadata};
40
+ use crate::{parse_row_metadata_value, serialize_row_metadata, LixError, NullableKeyFilter};
40
41
 
41
42
  use crate::sql2::{
42
43
  SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
43
44
  };
44
- use crate::transaction::types::{StageWrite, StageWriteMode};
45
+ use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
45
46
 
47
+ use super::predicate_typecheck::validate_json_predicate_filters;
46
48
  use super::result_metadata::json_field;
47
49
 
48
50
  pub(crate) async fn register_lix_state_providers(
@@ -265,6 +267,7 @@ impl TableProvider for LixStateProvider {
265
267
  let write_ctx = self.write_access.require_write("DELETE FROM lix_state")?;
266
268
 
267
269
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
270
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
268
271
  let physical_filters = filters
269
272
  .iter()
270
273
  .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
@@ -299,6 +302,7 @@ impl TableProvider for LixStateProvider {
299
302
  validate_lix_state_update_assignments(&self.schema, &assignments)?;
300
303
 
301
304
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
305
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
302
306
  let physical_assignments = assignments
303
307
  .iter()
304
308
  .map(|(column_name, expr)| {
@@ -378,8 +382,8 @@ impl InsertSink for LixStateInsertSink {
378
382
  .map_err(|_| DataFusionError::Execution("INSERT row count overflow".into()))?;
379
383
 
380
384
  self.write_ctx
381
- .stage_write(StageWrite::Rows {
382
- mode: StageWriteMode::Insert,
385
+ .stage_write(TransactionWrite::Rows {
386
+ mode: TransactionWriteMode::Insert,
383
387
  rows,
384
388
  })
385
389
  .await
@@ -511,8 +515,8 @@ impl ExecutionPlan for LixStateDeleteExec {
511
515
 
512
516
  if count > 0 {
513
517
  write_ctx
514
- .stage_write(StageWrite::Rows {
515
- mode: StageWriteMode::Replace,
518
+ .stage_write(TransactionWrite::Rows {
519
+ mode: TransactionWriteMode::Replace,
516
520
  rows: write_rows,
517
521
  })
518
522
  .await
@@ -665,8 +669,8 @@ impl ExecutionPlan for LixStateUpdateExec {
665
669
 
666
670
  if count > 0 {
667
671
  write_ctx
668
- .stage_write(StageWrite::Rows {
669
- mode: StageWriteMode::Replace,
672
+ .stage_write(TransactionWrite::Rows {
673
+ mode: TransactionWriteMode::Replace,
670
674
  rows: write_rows,
671
675
  })
672
676
  .await
@@ -747,7 +751,7 @@ fn evaluate_lix_state_filters(
747
751
  fn lix_state_stageable_write_rows_from_batch(
748
752
  batch: &RecordBatch,
749
753
  version_binding: &str,
750
- ) -> Result<Vec<StageRow>> {
754
+ ) -> Result<Vec<TransactionWriteRow>> {
751
755
  let mut rows = lix_state_write_rows_from_batch(batch, version_binding)?;
752
756
  for row in &mut rows {
753
757
  row.created_at = None;
@@ -762,7 +766,7 @@ fn lix_state_update_write_rows_from_batch(
762
766
  batch: &RecordBatch,
763
767
  assignments: &[(String, Arc<dyn PhysicalExpr>)],
764
768
  version_binding: &str,
765
- ) -> Result<Vec<StageRow>> {
769
+ ) -> Result<Vec<TransactionWriteRow>> {
766
770
  let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
767
771
  (0..batch.num_rows())
768
772
  .map(|row_index| {
@@ -776,9 +780,9 @@ fn lix_state_update_write_rows_from_batch(
776
780
  }
777
781
  });
778
782
 
779
- Ok(StageRow {
783
+ Ok(TransactionWriteRow {
780
784
  entity_id: Some(
781
- EntityIdentity::from_string(&required_string_value(
785
+ EntityIdentity::from_json_array_text(&required_string_value(
782
786
  batch,
783
787
  row_index,
784
788
  "entity_id",
@@ -791,7 +795,7 @@ fn lix_state_update_write_rows_from_batch(
791
795
  ),
792
796
  schema_key: required_string_value(batch, row_index, "schema_key")?,
793
797
  file_id: optional_string_value(batch, row_index, "file_id")?,
794
- snapshot_content: update_optional_string_value(
798
+ snapshot: update_optional_json_value(
795
799
  batch,
796
800
  &assignment_values,
797
801
  row_index,
@@ -805,7 +809,6 @@ fn lix_state_update_write_rows_from_batch(
805
809
  "lix_state",
806
810
  )?,
807
811
  origin: None,
808
- schema_version: required_string_value(batch, row_index, "schema_version")?,
809
812
  created_at: None,
810
813
  updated_at: None,
811
814
  global,
@@ -821,10 +824,10 @@ fn lix_state_update_write_rows_from_batch(
821
824
  fn lix_state_deletable_write_rows_from_batch(
822
825
  batch: &RecordBatch,
823
826
  version_binding: &str,
824
- ) -> Result<Vec<StageRow>> {
827
+ ) -> Result<Vec<TransactionWriteRow>> {
825
828
  let mut rows = lix_state_stageable_write_rows_from_batch(batch, version_binding)?;
826
829
  for row in &mut rows {
827
- row.snapshot_content = None;
830
+ row.snapshot = None;
828
831
  }
829
832
  Ok(rows)
830
833
  }
@@ -854,14 +857,28 @@ fn update_optional_metadata_value(
854
857
  row_index: usize,
855
858
  column_name: &str,
856
859
  context: &str,
857
- ) -> Result<Option<RowMetadata>> {
860
+ ) -> Result<Option<TransactionJson>> {
858
861
  update_optional_string_value(batch, assignment_values, row_index, column_name)?
859
862
  .map(|value| {
860
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
863
+ let metadata = parse_row_metadata_value(&value, context)
864
+ .map_err(super::error::lix_error_to_datafusion_error)?;
865
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
866
+ .map_err(super::error::lix_error_to_datafusion_error)
861
867
  })
862
868
  .transpose()
863
869
  }
864
870
 
871
+ fn update_optional_json_value(
872
+ batch: &RecordBatch,
873
+ assignment_values: &UpdateAssignmentValues,
874
+ row_index: usize,
875
+ column_name: &str,
876
+ ) -> Result<Option<TransactionJson>> {
877
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?
878
+ .map(|value| parse_snapshot_json(&value, column_name))
879
+ .transpose()
880
+ }
881
+
865
882
  fn dml_count_schema() -> SchemaRef {
866
883
  Arc::new(Schema::new(vec![Field::new(
867
884
  "count",
@@ -881,7 +898,7 @@ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
881
898
  fn lix_state_write_rows_from_batch(
882
899
  batch: &RecordBatch,
883
900
  version_binding: &str,
884
- ) -> Result<Vec<StageRow>> {
901
+ ) -> Result<Vec<TransactionWriteRow>> {
885
902
  (0..batch.num_rows())
886
903
  .map(|row_index| {
887
904
  let global = optional_bool_value(batch, row_index, "global")?.unwrap_or(false);
@@ -894,9 +911,9 @@ fn lix_state_write_rows_from_batch(
894
911
  }
895
912
  });
896
913
 
897
- Ok(StageRow {
914
+ Ok(TransactionWriteRow {
898
915
  entity_id: Some(
899
- EntityIdentity::from_string(&required_string_value(
916
+ EntityIdentity::from_json_array_text(&required_string_value(
900
917
  batch,
901
918
  row_index,
902
919
  "entity_id",
@@ -909,10 +926,9 @@ fn lix_state_write_rows_from_batch(
909
926
  ),
910
927
  schema_key: required_string_value(batch, row_index, "schema_key")?,
911
928
  file_id: optional_string_value(batch, row_index, "file_id")?,
912
- snapshot_content: optional_string_value(batch, row_index, "snapshot_content")?,
929
+ snapshot: optional_json_value(batch, row_index, "snapshot_content")?,
913
930
  metadata: optional_metadata_value(batch, row_index, "metadata", "lix_state")?,
914
931
  origin: None,
915
- schema_version: required_string_value(batch, row_index, "schema_version")?,
916
932
  created_at: optional_string_value(batch, row_index, "created_at")?,
917
933
  updated_at: optional_string_value(batch, row_index, "updated_at")?,
918
934
  global,
@@ -962,14 +978,37 @@ fn optional_metadata_value(
962
978
  row_index: usize,
963
979
  column_name: &str,
964
980
  context: &str,
965
- ) -> Result<Option<RowMetadata>> {
981
+ ) -> Result<Option<TransactionJson>> {
966
982
  optional_string_value(batch, row_index, column_name)?
967
983
  .map(|value| {
968
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
984
+ let metadata = parse_row_metadata_value(&value, context)
985
+ .map_err(super::error::lix_error_to_datafusion_error)?;
986
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
987
+ .map_err(super::error::lix_error_to_datafusion_error)
969
988
  })
970
989
  .transpose()
971
990
  }
972
991
 
992
+ fn optional_json_value(
993
+ batch: &RecordBatch,
994
+ row_index: usize,
995
+ column_name: &str,
996
+ ) -> Result<Option<TransactionJson>> {
997
+ optional_string_value(batch, row_index, column_name)?
998
+ .map(|value| parse_snapshot_json(&value, column_name))
999
+ .transpose()
1000
+ }
1001
+
1002
+ fn parse_snapshot_json(value: &str, column_name: &str) -> Result<TransactionJson> {
1003
+ let parsed = serde_json::from_str::<JsonValue>(value).map_err(|error| {
1004
+ DataFusionError::Execution(format!(
1005
+ "lix_state expected valid JSON in column '{column_name}': {error}"
1006
+ ))
1007
+ })?;
1008
+ TransactionJson::from_value(parsed, &format!("lix_state {column_name}"))
1009
+ .map_err(super::error::lix_error_to_datafusion_error)
1010
+ }
1011
+
973
1012
  fn optional_bool_value(
974
1013
  batch: &RecordBatch,
975
1014
  row_index: usize,
@@ -1122,12 +1161,11 @@ impl ExecutionPlan for LixStateScanExec {
1122
1161
 
1123
1162
  fn lix_state_schema() -> SchemaRef {
1124
1163
  Arc::new(Schema::new(vec![
1125
- Field::new("entity_id", DataType::Utf8, false),
1164
+ json_field("entity_id", false),
1126
1165
  Field::new("schema_key", DataType::Utf8, false),
1127
1166
  Field::new("file_id", DataType::Utf8, true),
1128
1167
  json_field("snapshot_content", true),
1129
1168
  json_field("metadata", true),
1130
- Field::new("schema_version", DataType::Utf8, true),
1131
1169
  Field::new("created_at", DataType::Utf8, true),
1132
1170
  Field::new("updated_at", DataType::Utf8, true),
1133
1171
  Field::new("global", DataType::Boolean, true),
@@ -1139,12 +1177,11 @@ fn lix_state_schema() -> SchemaRef {
1139
1177
 
1140
1178
  fn lix_state_by_version_schema() -> SchemaRef {
1141
1179
  Arc::new(Schema::new(vec![
1142
- Field::new("entity_id", DataType::Utf8, false),
1180
+ json_field("entity_id", false),
1143
1181
  Field::new("schema_key", DataType::Utf8, false),
1144
1182
  Field::new("file_id", DataType::Utf8, true),
1145
1183
  json_field("snapshot_content", true),
1146
1184
  json_field("metadata", true),
1147
- Field::new("schema_version", DataType::Utf8, true),
1148
1185
  Field::new("created_at", DataType::Utf8, true),
1149
1186
  Field::new("updated_at", DataType::Utf8, true),
1150
1187
  Field::new("global", DataType::Boolean, true),
@@ -1238,7 +1275,7 @@ fn lix_state_scan_request(
1238
1275
  .map(|values| {
1239
1276
  values
1240
1277
  .iter()
1241
- .map(|value| EntityIdentity::single(value))
1278
+ .filter_map(|value| EntityIdentity::from_json_array_text(value).ok())
1242
1279
  .collect()
1243
1280
  })
1244
1281
  .unwrap_or_default(),
@@ -1360,7 +1397,7 @@ fn parse_lix_state_in_list_filter(in_list: &InList) -> Option<LixStateFilterPred
1360
1397
  match column.name.as_str() {
1361
1398
  "schema_key" => Some(LixStateFilterPredicate::SchemaKeys(values)),
1362
1399
  "version_id" => Some(LixStateFilterPredicate::VersionIds(values)),
1363
- "entity_id" => Some(LixStateFilterPredicate::EntityIds(values)),
1400
+ "entity_id" => canonical_entity_id_values(values).map(LixStateFilterPredicate::EntityIds),
1364
1401
  _ => None,
1365
1402
  }
1366
1403
  }
@@ -1390,12 +1427,27 @@ fn parse_lix_state_column_literal_filter(
1390
1427
  "version_id" => string_expr_literal(literal_expr)
1391
1428
  .map(|value| LixStateFilterPredicate::VersionIds(BTreeSet::from([value]))),
1392
1429
  "entity_id" => string_expr_literal(literal_expr)
1430
+ .and_then(|value| canonical_entity_id_value(&value))
1393
1431
  .map(|value| LixStateFilterPredicate::EntityIds(BTreeSet::from([value]))),
1394
1432
  "file_id" => nullable_key_literal(literal_expr).map(LixStateFilterPredicate::FileId),
1395
1433
  _ => None,
1396
1434
  }
1397
1435
  }
1398
1436
 
1437
+ fn canonical_entity_id_values(values: BTreeSet<String>) -> Option<BTreeSet<String>> {
1438
+ values
1439
+ .into_iter()
1440
+ .map(|value| canonical_entity_id_value(&value))
1441
+ .collect()
1442
+ }
1443
+
1444
+ fn canonical_entity_id_value(value: &str) -> Option<String> {
1445
+ EntityIdentity::from_json_array_text(value)
1446
+ .ok()?
1447
+ .as_json_array_text()
1448
+ .ok()
1449
+ }
1450
+
1399
1451
  fn nullable_key_literal(expr: &Expr) -> Option<NullableKeyFilter<String>> {
1400
1452
  if is_null_literal(expr) {
1401
1453
  return Some(NullableKeyFilter::Null);
@@ -1421,7 +1473,7 @@ fn is_null_literal(expr: &Expr) -> bool {
1421
1473
 
1422
1474
  fn lix_state_record_batch(
1423
1475
  schema: SchemaRef,
1424
- rows: &[LiveStateRow],
1476
+ rows: &[MaterializedLiveStateRow],
1425
1477
  ) -> Result<RecordBatch, LixError> {
1426
1478
  if schema.fields().is_empty() {
1427
1479
  let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
@@ -1440,7 +1492,7 @@ fn lix_state_record_batch(
1440
1492
  Ok(match field.name().as_str() {
1441
1493
  "entity_id" => Arc::new(StringArray::from(
1442
1494
  rows.iter()
1443
- .map(|row| row.entity_id.as_string().map(Some))
1495
+ .map(|row| row.entity_id.as_json_array_text().map(Some))
1444
1496
  .collect::<std::result::Result<Vec<_>, LixError>>()?,
1445
1497
  )) as ArrayRef,
1446
1498
  "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
@@ -1453,9 +1505,6 @@ fn lix_state_record_batch(
1453
1505
  .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1454
1506
  .collect::<Vec<_>>(),
1455
1507
  )),
1456
- "schema_version" => {
1457
- string_array(rows.iter().map(|row| Some(row.schema_version.as_str())))
1458
- }
1459
1508
  "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1460
1509
  "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1461
1510
  "global" => Arc::new(BooleanArray::from(
@@ -1525,11 +1574,16 @@ mod tests {
1525
1574
  };
1526
1575
  use crate::sql2::dml::{InsertExec, InsertSink};
1527
1576
  use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1528
- use crate::transaction::types::{StageRow, StageWrite, StageWriteMode, StageWriteOutcome};
1577
+ use crate::transaction::types::{
1578
+ TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
1579
+ TransactionWriteRow,
1580
+ };
1529
1581
  use crate::version::{VersionHead, VersionRefReader};
1530
1582
  use crate::{
1531
1583
  entity_identity::EntityIdentity,
1532
- live_state::{LiveStateReader, LiveStateRow, LiveStateRowRequest, LiveStateScanRequest},
1584
+ live_state::{
1585
+ LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
1586
+ },
1533
1587
  };
1534
1588
  use crate::{LixError, NullableKeyFilter};
1535
1589
  use async_trait::async_trait;
@@ -1560,19 +1614,19 @@ mod tests {
1560
1614
  struct EmptyVersionRefReader;
1561
1615
  #[allow(dead_code)]
1562
1616
  struct RowsLiveStateReader {
1563
- rows: Vec<LiveStateRow>,
1617
+ rows: Vec<MaterializedLiveStateRow>,
1564
1618
  }
1565
1619
  struct DummyBlobReader;
1566
1620
 
1567
1621
  #[derive(Default)]
1568
1622
  struct DummyWriteContext {
1569
- rows: Vec<LiveStateRow>,
1623
+ rows: Vec<MaterializedLiveStateRow>,
1570
1624
  }
1571
1625
 
1572
1626
  #[derive(Default)]
1573
1627
  struct CapturingWriteContext {
1574
- rows: Vec<LiveStateRow>,
1575
- writes: Vec<StageWrite>,
1628
+ rows: Vec<MaterializedLiveStateRow>,
1629
+ writes: Vec<TransactionWrite>,
1576
1630
  }
1577
1631
 
1578
1632
  struct SingleBatchExec {
@@ -1663,14 +1717,14 @@ mod tests {
1663
1717
  async fn scan_rows(
1664
1718
  &self,
1665
1719
  _request: &LiveStateScanRequest,
1666
- ) -> Result<Vec<LiveStateRow>, LixError> {
1720
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1667
1721
  Ok(vec![])
1668
1722
  }
1669
1723
 
1670
1724
  async fn load_row(
1671
1725
  &self,
1672
1726
  _request: &LiveStateRowRequest,
1673
- ) -> Result<Option<LiveStateRow>, LixError> {
1727
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1674
1728
  Ok(None)
1675
1729
  }
1676
1730
  }
@@ -1695,14 +1749,14 @@ mod tests {
1695
1749
  async fn scan_rows(
1696
1750
  &self,
1697
1751
  _request: &LiveStateScanRequest,
1698
- ) -> Result<Vec<LiveStateRow>, LixError> {
1752
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1699
1753
  Ok(self.rows.clone())
1700
1754
  }
1701
1755
 
1702
1756
  async fn load_row(
1703
1757
  &self,
1704
1758
  _request: &LiveStateRowRequest,
1705
- ) -> Result<Option<LiveStateRow>, LixError> {
1759
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1706
1760
  Ok(None)
1707
1761
  }
1708
1762
  }
@@ -1719,7 +1773,10 @@ mod tests {
1719
1773
  &self,
1720
1774
  hashes: &[crate::binary_cas::BlobHash],
1721
1775
  ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1722
- Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
1776
+ Ok(crate::binary_cas::BlobBytesBatch::new(vec![
1777
+ None;
1778
+ hashes.len()
1779
+ ]))
1723
1780
  }
1724
1781
  }
1725
1782
 
@@ -1747,7 +1804,7 @@ mod tests {
1747
1804
  async fn scan_live_state(
1748
1805
  &mut self,
1749
1806
  _request: &LiveStateScanRequest,
1750
- ) -> Result<Vec<LiveStateRow>, LixError> {
1807
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1751
1808
  Ok(self.rows.clone())
1752
1809
  }
1753
1810
 
@@ -1761,8 +1818,11 @@ mod tests {
1761
1818
  Ok(Some(format!("commit-{version_id}")))
1762
1819
  }
1763
1820
 
1764
- async fn stage_write(&mut self, _write: StageWrite) -> Result<StageWriteOutcome, LixError> {
1765
- Ok(StageWriteOutcome { count: 0 })
1821
+ async fn stage_write(
1822
+ &mut self,
1823
+ _write: TransactionWrite,
1824
+ ) -> Result<TransactionWriteOutcome, LixError> {
1825
+ Ok(TransactionWriteOutcome { count: 0 })
1766
1826
  }
1767
1827
  }
1768
1828
 
@@ -1790,7 +1850,7 @@ mod tests {
1790
1850
  async fn scan_live_state(
1791
1851
  &mut self,
1792
1852
  _request: &LiveStateScanRequest,
1793
- ) -> Result<Vec<LiveStateRow>, LixError> {
1853
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1794
1854
  Ok(self.rows.clone())
1795
1855
  }
1796
1856
 
@@ -1804,9 +1864,12 @@ mod tests {
1804
1864
  Ok(Some(format!("commit-{version_id}")))
1805
1865
  }
1806
1866
 
1807
- async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
1867
+ async fn stage_write(
1868
+ &mut self,
1869
+ write: TransactionWrite,
1870
+ ) -> Result<TransactionWriteOutcome, LixError> {
1808
1871
  self.writes.push(write);
1809
- Ok(StageWriteOutcome { count: 0 })
1872
+ Ok(TransactionWriteOutcome { count: 0 })
1810
1873
  }
1811
1874
  }
1812
1875
 
@@ -1818,6 +1881,18 @@ mod tests {
1818
1881
  Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
1819
1882
  }
1820
1883
 
1884
+ fn json_lit(value: &str) -> Expr {
1885
+ Expr::Literal(
1886
+ ScalarValue::Utf8(Some(value.to_string())),
1887
+ Some(datafusion::common::metadata::FieldMetadata::new(
1888
+ std::collections::BTreeMap::from([(
1889
+ crate::sql2::result_metadata::LIX_VALUE_TYPE_METADATA_KEY.to_string(),
1890
+ crate::sql2::result_metadata::LIX_VALUE_TYPE_JSON.to_string(),
1891
+ )]),
1892
+ )),
1893
+ )
1894
+ }
1895
+
1821
1896
  fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
1822
1897
  Arc::new(StringArray::from(values)) as ArrayRef
1823
1898
  }
@@ -1826,12 +1901,11 @@ mod tests {
1826
1901
  RecordBatch::try_new(
1827
1902
  lix_state_schema(),
1828
1903
  vec![
1829
- string_column(vec![Some("entity-1")]),
1904
+ string_column(vec![Some("[\"entity-1\"]")]),
1830
1905
  string_column(vec![Some("lix_key_value")]),
1831
1906
  string_column(vec![None]),
1832
1907
  string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1833
1908
  string_column(vec![Some("{\"source\":\"test\"}")]),
1834
- string_column(vec![Some("1")]),
1835
1909
  string_column(vec![Some("2026-04-23T00:00:00Z")]),
1836
1910
  string_column(vec![Some("2026-04-23T01:00:00Z")]),
1837
1911
  Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
@@ -1847,12 +1921,11 @@ mod tests {
1847
1921
  RecordBatch::try_new(
1848
1922
  lix_state_schema(),
1849
1923
  vec![
1850
- string_column(vec![Some("entity-1")]),
1924
+ string_column(vec![Some("[\"entity-1\"]")]),
1851
1925
  string_column(vec![Some("lix_key_value")]),
1852
1926
  string_column(vec![None]),
1853
1927
  string_column(vec![Some("{\"key\":\"hello\",\"value\":\"world\"}")]),
1854
1928
  string_column(vec![None]),
1855
- string_column(vec![Some("1")]),
1856
1929
  string_column(vec![None]),
1857
1930
  string_column(vec![None]),
1858
1931
  Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
@@ -1864,16 +1937,14 @@ mod tests {
1864
1937
  .expect("valid stageable lix_state batch")
1865
1938
  }
1866
1939
 
1867
- fn live_row(entity_id: &str, metadata: Option<&str>) -> LiveStateRow {
1868
- LiveStateRow {
1869
- entity_id: EntityIdentity::from_string(entity_id).expect("entity id should decode"),
1940
+ fn live_row(entity_id: &str, metadata: Option<&str>) -> MaterializedLiveStateRow {
1941
+ MaterializedLiveStateRow {
1942
+ entity_id: EntityIdentity::single(entity_id),
1870
1943
  schema_key: "lix_key_value".to_string(),
1871
1944
  file_id: None,
1872
1945
  snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
1873
- metadata: metadata.map(|value| {
1874
- serde_json::from_str(value).expect("test metadata should be valid JSON")
1875
- }),
1876
- schema_version: "1".to_string(),
1946
+ metadata: metadata.map(str::to_string),
1947
+ deleted: false,
1877
1948
  version_id: "version-a".to_string(),
1878
1949
  change_id: Some(format!("change-{entity_id}")),
1879
1950
  commit_id: Some(format!("commit-{entity_id}")),
@@ -1935,7 +2006,7 @@ mod tests {
1935
2006
  ]);
1936
2007
 
1937
2008
  let request =
1938
- lix_state_scan_request(&schema, None, Some(&vec![0, 1, 12]), &route, Some(10));
2009
+ lix_state_scan_request(&schema, None, Some(&vec![0, 1, 11]), &route, Some(10));
1939
2010
 
1940
2011
  assert_eq!(request.filter.schema_keys, vec!["profile".to_string()]);
1941
2012
  assert_eq!(request.filter.version_ids, vec!["v1".to_string()]);
@@ -1957,7 +2028,7 @@ mod tests {
1957
2028
  Box::new(Expr::BinaryExpr(BinaryExpr::new(
1958
2029
  Box::new(col("entity_id")),
1959
2030
  Operator::Eq,
1960
- Box::new(str_lit("entity-a")),
2031
+ Box::new(str_lit("[\"entity-a\"]")),
1961
2032
  ))),
1962
2033
  Operator::And,
1963
2034
  Box::new(Expr::InList(InList::new(
@@ -1969,7 +2040,7 @@ mod tests {
1969
2040
 
1970
2041
  assert_eq!(
1971
2042
  route.entity_ids,
1972
- Some(BTreeSet::from(["entity-a".to_string()]))
2043
+ Some(BTreeSet::from(["[\"entity-a\"]".to_string()]))
1973
2044
  );
1974
2045
  assert_eq!(
1975
2046
  route.version_ids,
@@ -2186,14 +2257,17 @@ mod tests {
2186
2257
 
2187
2258
  assert_eq!(
2188
2259
  rows,
2189
- vec![StageRow {
2260
+ vec![TransactionWriteRow {
2190
2261
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2191
2262
  schema_key: "lix_key_value".to_string(),
2192
2263
  file_id: None,
2193
- snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
2194
- metadata: Some(json!({"source": "test"})),
2264
+ snapshot: Some(TransactionJson::from_value_for_test(
2265
+ json!({"key":"hello","value":"world"})
2266
+ )),
2267
+ metadata: Some(TransactionJson::from_value_for_test(
2268
+ json!({"source": "test"})
2269
+ )),
2195
2270
  origin: None,
2196
- schema_version: "1".to_string(),
2197
2271
  created_at: Some("2026-04-23T00:00:00Z".to_string()),
2198
2272
  updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2199
2273
  global: false,
@@ -2228,16 +2302,19 @@ mod tests {
2228
2302
  assert_eq!(count, 1);
2229
2303
  assert_eq!(
2230
2304
  write_context.writes.as_slice(),
2231
- &[StageWrite::Rows {
2232
- mode: StageWriteMode::Insert,
2233
- rows: vec![StageRow {
2305
+ &[TransactionWrite::Rows {
2306
+ mode: TransactionWriteMode::Insert,
2307
+ rows: vec![TransactionWriteRow {
2234
2308
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2235
2309
  schema_key: "lix_key_value".to_string(),
2236
2310
  file_id: None,
2237
- snapshot_content: Some("{\"key\":\"hello\",\"value\":\"world\"}".to_string()),
2238
- metadata: Some(json!({"source": "test"})),
2311
+ snapshot: Some(TransactionJson::from_value_for_test(
2312
+ json!({"key":"hello","value":"world"})
2313
+ )),
2314
+ metadata: Some(TransactionJson::from_value_for_test(
2315
+ json!({"source": "test"})
2316
+ )),
2239
2317
  origin: None,
2240
- schema_version: "1".to_string(),
2241
2318
  created_at: Some("2026-04-23T00:00:00Z".to_string()),
2242
2319
  updated_at: Some("2026-04-23T01:00:00Z".to_string()),
2243
2320
  global: false,
@@ -2312,7 +2389,7 @@ mod tests {
2312
2389
  vec![Expr::BinaryExpr(BinaryExpr::new(
2313
2390
  Box::new(col("metadata")),
2314
2391
  Operator::Eq,
2315
- Box::new(str_lit("{\"source\":\"match\"}")),
2392
+ Box::new(json_lit("{\"source\":\"match\"}")),
2316
2393
  ))],
2317
2394
  )
2318
2395
  .await
@@ -2333,16 +2410,19 @@ mod tests {
2333
2410
 
2334
2411
  assert_eq!(
2335
2412
  write_context.writes.as_slice(),
2336
- &[StageWrite::Rows {
2337
- mode: StageWriteMode::Replace,
2338
- rows: vec![StageRow {
2413
+ &[TransactionWrite::Rows {
2414
+ mode: TransactionWriteMode::Replace,
2415
+ rows: vec![TransactionWriteRow {
2339
2416
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2340
2417
  schema_key: "lix_key_value".to_string(),
2341
2418
  file_id: None,
2342
- snapshot_content: Some("{\"key\":\"hello\",\"value\":\"updated\"}".to_string()),
2343
- metadata: Some(json!({"schema_key": "lix_key_value"})),
2419
+ snapshot: Some(TransactionJson::from_value_for_test(
2420
+ json!({"key":"hello","value":"updated"})
2421
+ )),
2422
+ metadata: Some(TransactionJson::from_value_for_test(
2423
+ json!({"schema_key": "lix_key_value"})
2424
+ )),
2344
2425
  origin: None,
2345
- schema_version: "1".to_string(),
2346
2426
  created_at: None,
2347
2427
  updated_at: None,
2348
2428
  global: false,
@@ -2388,17 +2468,18 @@ mod tests {
2388
2468
 
2389
2469
  assert_eq!(
2390
2470
  write_context.writes.as_slice(),
2391
- &[StageWrite::Rows {
2392
- mode: StageWriteMode::Replace,
2471
+ &[TransactionWrite::Rows {
2472
+ mode: TransactionWriteMode::Replace,
2393
2473
  rows: vec![
2394
- StageRow {
2474
+ TransactionWriteRow {
2395
2475
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2396
2476
  schema_key: "lix_key_value".to_string(),
2397
2477
  file_id: None,
2398
- snapshot_content: None,
2399
- metadata: Some(json!({"source": "one"})),
2478
+ snapshot: None,
2479
+ metadata: Some(TransactionJson::from_value_for_test(
2480
+ json!({"source": "one"})
2481
+ )),
2400
2482
  origin: None,
2401
- schema_version: "1".to_string(),
2402
2483
  created_at: None,
2403
2484
  updated_at: None,
2404
2485
  global: false,
@@ -2407,14 +2488,15 @@ mod tests {
2407
2488
  untracked: false,
2408
2489
  version_id: "version-a".to_string(),
2409
2490
  },
2410
- StageRow {
2491
+ TransactionWriteRow {
2411
2492
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-2")),
2412
2493
  schema_key: "lix_key_value".to_string(),
2413
2494
  file_id: None,
2414
- snapshot_content: None,
2415
- metadata: Some(json!({"source": "two"})),
2495
+ snapshot: None,
2496
+ metadata: Some(TransactionJson::from_value_for_test(
2497
+ json!({"source": "two"})
2498
+ )),
2416
2499
  origin: None,
2417
- schema_version: "1".to_string(),
2418
2500
  created_at: None,
2419
2501
  updated_at: None,
2420
2502
  global: false,