@lix-js/sdk 0.6.0-preview.2 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/SKILL.md +4 -5
  2. package/dist/engine-wasm/wasm/lix_engine.js +1 -1
  3. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  4. package/dist/generated/builtin-schemas.d.ts +87 -162
  5. package/dist/generated/builtin-schemas.js +139 -236
  6. package/dist/open-lix.d.ts +1 -1
  7. package/dist-engine-src/src/binary_cas/types.rs +0 -6
  8. package/dist-engine-src/src/catalog/context.rs +412 -0
  9. package/dist-engine-src/src/catalog/mod.rs +10 -0
  10. package/dist-engine-src/src/catalog/schema.rs +4 -0
  11. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  12. package/dist-engine-src/src/cel/mod.rs +1 -1
  13. package/dist-engine-src/src/cel/provider.rs +1 -1
  14. package/dist-engine-src/src/commit_graph/context.rs +328 -1015
  15. package/dist-engine-src/src/commit_graph/mod.rs +2 -3
  16. package/dist-engine-src/src/commit_graph/types.rs +7 -43
  17. package/dist-engine-src/src/commit_graph/walker.rs +57 -81
  18. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  19. package/dist-engine-src/src/commit_store/context.rs +944 -0
  20. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  21. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  22. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  23. package/dist-engine-src/src/commit_store/types.rs +215 -0
  24. package/dist-engine-src/src/common/identity.rs +15 -5
  25. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  26. package/dist-engine-src/src/common/metadata.rs +17 -12
  27. package/dist-engine-src/src/common/mod.rs +5 -5
  28. package/dist-engine-src/src/domain.rs +324 -0
  29. package/dist-engine-src/src/engine.rs +29 -43
  30. package/dist-engine-src/src/entity_identity.rs +238 -118
  31. package/dist-engine-src/src/functions/context.rs +17 -52
  32. package/dist-engine-src/src/functions/deterministic.rs +1 -1
  33. package/dist-engine-src/src/functions/mod.rs +1 -1
  34. package/dist-engine-src/src/functions/provider.rs +4 -4
  35. package/dist-engine-src/src/functions/state.rs +39 -66
  36. package/dist-engine-src/src/functions/types.rs +1 -1
  37. package/dist-engine-src/src/init.rs +204 -151
  38. package/dist-engine-src/src/json_store/context.rs +354 -60
  39. package/dist-engine-src/src/json_store/encoded.rs +6 -6
  40. package/dist-engine-src/src/json_store/mod.rs +4 -1
  41. package/dist-engine-src/src/json_store/store.rs +884 -11
  42. package/dist-engine-src/src/json_store/types.rs +166 -1
  43. package/dist-engine-src/src/lib.rs +10 -9
  44. package/dist-engine-src/src/live_state/context.rs +608 -830
  45. package/dist-engine-src/src/live_state/mod.rs +3 -3
  46. package/dist-engine-src/src/live_state/overlay.rs +7 -7
  47. package/dist-engine-src/src/live_state/reader.rs +5 -5
  48. package/dist-engine-src/src/live_state/types.rs +19 -36
  49. package/dist-engine-src/src/live_state/visibility.rs +19 -14
  50. package/dist-engine-src/src/plugin/archive.rs +3 -6
  51. package/dist-engine-src/src/plugin/install.rs +0 -18
  52. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -1
  53. package/dist-engine-src/src/schema/annotations/defaults.rs +2 -7
  54. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -1
  55. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -1
  56. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -1
  57. package/dist-engine-src/src/schema/builtin/lix_change.json +11 -10
  58. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -1
  59. package/dist-engine-src/src/schema/builtin/lix_commit.json +8 -46
  60. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +29 -22
  61. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -1
  62. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -1
  63. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -1
  64. package/dist-engine-src/src/schema/builtin/lix_label.json +10 -3
  65. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  66. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +2 -8
  67. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -1
  68. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -1
  69. package/dist-engine-src/src/schema/builtin/mod.rs +10 -59
  70. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  71. package/dist-engine-src/src/schema/definition.json +47 -17
  72. package/dist-engine-src/src/schema/definition.rs +202 -96
  73. package/dist-engine-src/src/schema/key.rs +9 -77
  74. package/dist-engine-src/src/schema/mod.rs +4 -4
  75. package/dist-engine-src/src/schema/tests.rs +133 -92
  76. package/dist-engine-src/src/session/context.rs +40 -42
  77. package/dist-engine-src/src/session/create_version.rs +22 -14
  78. package/dist-engine-src/src/session/execute.rs +45 -14
  79. package/dist-engine-src/src/session/merge/apply.rs +4 -4
  80. package/dist-engine-src/src/session/merge/conflicts.rs +3 -2
  81. package/dist-engine-src/src/session/merge/stats.rs +1 -1
  82. package/dist-engine-src/src/session/merge/version.rs +35 -45
  83. package/dist-engine-src/src/session/mod.rs +4 -2
  84. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  85. package/dist-engine-src/src/session/switch_version.rs +16 -28
  86. package/dist-engine-src/src/sql2/change_provider.rs +14 -20
  87. package/dist-engine-src/src/sql2/classify.rs +61 -26
  88. package/dist-engine-src/src/sql2/context.rs +22 -18
  89. package/dist-engine-src/src/sql2/directory_history_provider.rs +28 -20
  90. package/dist-engine-src/src/sql2/directory_provider.rs +131 -83
  91. package/dist-engine-src/src/sql2/entity_history_provider.rs +10 -14
  92. package/dist-engine-src/src/sql2/entity_provider.rs +680 -169
  93. package/dist-engine-src/src/sql2/error.rs +21 -1
  94. package/dist-engine-src/src/sql2/execute.rs +325 -264
  95. package/dist-engine-src/src/sql2/file_history_provider.rs +29 -21
  96. package/dist-engine-src/src/sql2/file_provider.rs +533 -108
  97. package/dist-engine-src/src/sql2/filesystem_planner.rs +58 -94
  98. package/dist-engine-src/src/sql2/filesystem_visibility.rs +37 -23
  99. package/dist-engine-src/src/sql2/history_projection.rs +3 -27
  100. package/dist-engine-src/src/sql2/history_provider.rs +11 -17
  101. package/dist-engine-src/src/sql2/history_route.rs +22 -8
  102. package/dist-engine-src/src/sql2/lix_state_provider.rs +178 -96
  103. package/dist-engine-src/src/sql2/mod.rs +6 -3
  104. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  105. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  106. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  107. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  108. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  109. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  110. package/dist-engine-src/src/sql2/read_only.rs +10 -12
  111. package/dist-engine-src/src/sql2/session.rs +7 -10
  112. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  113. package/dist-engine-src/src/sql2/udfs/mod.rs +8 -1
  114. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  115. package/dist-engine-src/src/sql2/version_provider.rs +46 -31
  116. package/dist-engine-src/src/sql2/version_scope.rs +4 -4
  117. package/dist-engine-src/src/storage_bench.rs +1782 -325
  118. package/dist-engine-src/src/test_support.rs +183 -36
  119. package/dist-engine-src/src/tracked_state/by_file_index.rs +20 -24
  120. package/dist-engine-src/src/tracked_state/codec.rs +1519 -181
  121. package/dist-engine-src/src/tracked_state/context.rs +1155 -271
  122. package/dist-engine-src/src/tracked_state/diff.rs +249 -57
  123. package/dist-engine-src/src/tracked_state/materialization.rs +365 -103
  124. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  125. package/dist-engine-src/src/tracked_state/merge.rs +37 -19
  126. package/dist-engine-src/src/tracked_state/mod.rs +8 -7
  127. package/dist-engine-src/src/tracked_state/storage.rs +138 -6
  128. package/dist-engine-src/src/tracked_state/tree.rs +695 -252
  129. package/dist-engine-src/src/tracked_state/types.rs +176 -6
  130. package/dist-engine-src/src/transaction/commit.rs +695 -435
  131. package/dist-engine-src/src/transaction/context.rs +551 -310
  132. package/dist-engine-src/src/transaction/live_state_overlay.rs +9 -8
  133. package/dist-engine-src/src/transaction/mod.rs +2 -0
  134. package/dist-engine-src/src/transaction/normalization.rs +311 -447
  135. package/dist-engine-src/src/transaction/prep.rs +37 -0
  136. package/dist-engine-src/src/transaction/schema_resolver.rs +93 -71
  137. package/dist-engine-src/src/transaction/staging.rs +701 -406
  138. package/dist-engine-src/src/transaction/types.rs +231 -122
  139. package/dist-engine-src/src/transaction/validation.rs +2717 -1698
  140. package/dist-engine-src/src/untracked_state/codec.rs +40 -96
  141. package/dist-engine-src/src/untracked_state/context.rs +21 -5
  142. package/dist-engine-src/src/untracked_state/materialization.rs +10 -104
  143. package/dist-engine-src/src/untracked_state/mod.rs +3 -5
  144. package/dist-engine-src/src/untracked_state/storage.rs +105 -57
  145. package/dist-engine-src/src/untracked_state/types.rs +63 -13
  146. package/dist-engine-src/src/version/context.rs +1 -13
  147. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  148. package/dist-engine-src/src/version/mod.rs +3 -2
  149. package/dist-engine-src/src/version/refs.rs +12 -103
  150. package/dist-engine-src/src/version/stage_rows.rs +15 -19
  151. package/package.json +1 -1
  152. package/dist-engine-src/src/changelog/codec.rs +0 -321
  153. package/dist-engine-src/src/changelog/context.rs +0 -92
  154. package/dist-engine-src/src/changelog/materialization.rs +0 -121
  155. package/dist-engine-src/src/changelog/mod.rs +0 -13
  156. package/dist-engine-src/src/changelog/reader.rs +0 -20
  157. package/dist-engine-src/src/changelog/storage.rs +0 -220
  158. package/dist-engine-src/src/changelog/types.rs +0 -38
  159. package/dist-engine-src/src/schema/builtin/lix_change_set.json +0 -18
  160. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +0 -75
  161. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +0 -63
  162. package/dist-engine-src/src/schema_registry.rs +0 -294
  163. package/dist-engine-src/src/sql2/commit_derived_provider.rs +0 -591
  164. package/dist-engine-src/src/tracked_state/rebuild.rs +0 -771
  165. package/dist-engine-src/src/tracked_state/tree_types.rs +0 -176
@@ -1,5 +1,5 @@
1
1
  use std::any::Any;
2
- use std::collections::BTreeSet;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
3
  use std::sync::Arc;
4
4
 
5
5
  use async_trait::async_trait;
@@ -14,7 +14,8 @@ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, Scalar
14
14
  use datafusion::datasource::TableType;
15
15
  use datafusion::execution::TaskContext;
16
16
  use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
17
+ use datafusion::logical_expr::expr::InList;
18
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
18
19
  use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
20
  use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
21
  use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
@@ -27,11 +28,12 @@ use serde_json::Value as JsonValue;
27
28
 
28
29
  use crate::commit_graph::CommitGraphReader;
29
30
  use crate::entity_identity::EntityIdentity;
30
- use crate::live_state::LiveStateRow;
31
+ use crate::live_state::MaterializedLiveStateRow;
31
32
  use crate::live_state::{
32
33
  LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
33
34
  };
34
35
  use crate::sql2::dml::{InsertExec, InsertSink};
36
+ use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
35
37
  use crate::sql2::read_only::reject_read_only_entity_surface;
36
38
  use crate::sql2::version_scope::{
37
39
  explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
@@ -40,23 +42,22 @@ use crate::sql2::version_scope::{
40
42
  use crate::sql2::write_normalization::{
41
43
  InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues, UpdateCell,
42
44
  };
43
- use crate::transaction::types::StageRow;
45
+ use crate::transaction::types::{TransactionJson, TransactionWriteRow};
44
46
  use crate::version::VersionRefReader;
45
- use crate::{parse_row_metadata, serialize_row_metadata, LixError, RowMetadata};
47
+ use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
46
48
 
47
49
  use super::entity_history_provider::EntityHistoryProvider;
48
50
  use super::history_route::{
49
51
  HISTORY_COL_CHANGE_ID, HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID,
50
52
  HISTORY_COL_FILE_ID, HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID,
51
- HISTORY_COL_SCHEMA_KEY, HISTORY_COL_SCHEMA_VERSION, HISTORY_COL_SNAPSHOT_CONTENT,
52
- HISTORY_COL_START_COMMIT_ID,
53
+ HISTORY_COL_SCHEMA_KEY, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
53
54
  };
54
55
  use super::result_metadata::{json_field, mark_json_field};
55
56
  use crate::sql2::{
56
- SqlChangelogQuerySource, SqlWriteContext, WriteAccess, WriteContextLiveStateReader,
57
+ SqlCommitStoreQuerySource, SqlWriteContext, WriteAccess, WriteContextLiveStateReader,
57
58
  WriteContextVersionRefReader,
58
59
  };
59
- use crate::transaction::types::{StageWrite, StageWriteMode};
60
+ use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
60
61
 
61
62
  pub(crate) async fn register_entity_providers(
62
63
  ctx: &SessionContext,
@@ -64,7 +65,7 @@ pub(crate) async fn register_entity_providers(
64
65
  live_state: Arc<dyn LiveStateReader>,
65
66
  version_ref: Arc<dyn VersionRefReader>,
66
67
  commit_graph: Arc<tokio::sync::Mutex<Box<dyn CommitGraphReader>>>,
67
- query_source: SqlChangelogQuerySource,
68
+ query_source: SqlCommitStoreQuerySource,
68
69
  schema_definitions: &[JsonValue],
69
70
  ) -> Result<(), LixError> {
70
71
  for schema in schema_definitions {
@@ -99,16 +100,18 @@ pub(crate) async fn register_entity_providers(
99
100
  )
100
101
  .map_err(datafusion_error_to_lix_error)?;
101
102
 
102
- let history_name = format!("{}_history", spec.schema_key);
103
- ctx.register_table(
104
- &history_name,
105
- Arc::new(EntityHistoryProvider::new(
106
- Arc::clone(&spec),
107
- Arc::clone(&commit_graph),
108
- query_source.clone(),
109
- )),
110
- )
111
- .map_err(datafusion_error_to_lix_error)?;
103
+ if schema_exposed_as_entity_history_surface(&spec.schema_key) {
104
+ let history_name = format!("{}_history", spec.schema_key);
105
+ ctx.register_table(
106
+ &history_name,
107
+ Arc::new(EntityHistoryProvider::new(
108
+ Arc::clone(&spec),
109
+ Arc::clone(&commit_graph),
110
+ query_source.clone(),
111
+ )),
112
+ )
113
+ .map_err(datafusion_error_to_lix_error)?;
114
+ }
112
115
  }
113
116
 
114
117
  Ok(())
@@ -177,7 +180,6 @@ pub(super) struct EntitySurfaceColumn {
177
180
  #[derive(Debug, Clone, PartialEq, Eq)]
178
181
  pub(super) struct EntitySurfaceSpec {
179
182
  pub(super) schema_key: String,
180
- schema_version: Option<String>,
181
183
  pub(super) primary_key_paths: Vec<Vec<String>>,
182
184
  pub(super) columns: Vec<EntitySurfaceColumn>,
183
185
  }
@@ -300,13 +302,14 @@ impl TableProvider for EntityProvider {
300
302
  &self,
301
303
  filters: &[&Expr],
302
304
  ) -> Result<Vec<TableProviderFilterPushDown>> {
305
+ let analyzer = EntityPrimaryKeyFilterAnalyzer::new(&self.spec);
303
306
  Ok(filters
304
307
  .iter()
305
308
  .map(|filter| {
306
- if explicit_version_ids_from_dml_filters(&[(*filter).clone()]).is_empty() {
307
- TableProviderFilterPushDown::Unsupported
309
+ if ExactVersionIdFilterAnalyzer.supports(filter) || analyzer.supports(filter) {
310
+ TableProviderFilterPushDown::Exact
308
311
  } else {
309
- TableProviderFilterPushDown::Inexact
312
+ TableProviderFilterPushDown::Unsupported
310
313
  }
311
314
  })
312
315
  .collect())
@@ -323,6 +326,7 @@ impl TableProvider for EntityProvider {
323
326
  let mut request = entity_live_state_scan_request(
324
327
  &self.spec.schema_key,
325
328
  self.version_binding.active_version_id(),
329
+ Some(projected_schema.as_ref()),
326
330
  limit,
327
331
  );
328
332
  if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
@@ -342,6 +346,8 @@ impl TableProvider for EntityProvider {
342
346
  )
343
347
  .await
344
348
  .map_err(lix_error_to_datafusion_error)?;
349
+ apply_exact_version_id_filter(&mut request, exact_version_ids_from_filters(filters)?);
350
+ apply_exact_entity_id_filters(&mut request, &self.spec, filters)?;
345
351
 
346
352
  Ok(Arc::new(EntityScanExec::new(
347
353
  Arc::clone(&self.spec),
@@ -406,6 +412,7 @@ impl TableProvider for EntityProvider {
406
412
  };
407
413
 
408
414
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
415
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
409
416
  let physical_filters = filters
410
417
  .iter()
411
418
  .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
@@ -414,16 +421,19 @@ impl TableProvider for EntityProvider {
414
421
  &self.spec.schema_key,
415
422
  version_binding.active_version_id(),
416
423
  None,
424
+ None,
417
425
  );
418
426
  if matches!(version_binding, VersionBinding::Explicit) {
419
- request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
420
- if request.filter.version_ids.is_empty() {
427
+ let exact_version_ids = exact_version_ids_from_filters(&filters)?;
428
+ if exact_version_ids.is_none() {
421
429
  return Err(DataFusionError::Plan(format!(
422
430
  "DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
423
431
  self.spec.schema_key
424
432
  )));
425
433
  }
434
+ apply_exact_version_id_filter(&mut request, exact_version_ids);
426
435
  }
436
+ apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
427
437
 
428
438
  Ok(Arc::new(EntityDeleteExec::new(
429
439
  Arc::clone(&self.spec),
@@ -458,6 +468,7 @@ impl TableProvider for EntityProvider {
458
468
  };
459
469
 
460
470
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
471
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
461
472
  let physical_assignments = assignments
462
473
  .iter()
463
474
  .map(|(column_name, expr)| {
@@ -471,11 +482,13 @@ impl TableProvider for EntityProvider {
471
482
  .iter()
472
483
  .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
473
484
  .collect::<Result<Vec<_>>>()?;
474
- let request = entity_live_state_scan_request(
485
+ let mut request = entity_live_state_scan_request(
475
486
  &self.spec.schema_key,
476
487
  version_binding.active_version_id(),
477
488
  None,
489
+ None,
478
490
  );
491
+ apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
479
492
 
480
493
  Ok(Arc::new(EntityUpdateExec::new(
481
494
  Arc::clone(&self.spec),
@@ -489,6 +502,413 @@ impl TableProvider for EntityProvider {
489
502
  }
490
503
  }
491
504
 
505
+ fn entity_ids_from_primary_key_filters(
506
+ spec: &EntitySurfaceSpec,
507
+ filters: &[Expr],
508
+ ) -> Result<Option<Vec<EntityIdentity>>> {
509
+ let analyzer = EntityPrimaryKeyFilterAnalyzer::new(spec);
510
+ let mut entity_ids: Option<BTreeSet<EntityIdentity>> = None;
511
+ for filter in filters {
512
+ let Some(filter_ids) = analyzer.analyze(filter)? else {
513
+ continue;
514
+ };
515
+ entity_ids = Some(match entity_ids {
516
+ Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
517
+ None => filter_ids,
518
+ });
519
+ }
520
+
521
+ Ok(entity_ids.map(|ids| ids.into_iter().collect()))
522
+ }
523
+
524
+ fn apply_exact_entity_id_filters(
525
+ request: &mut LiveStateScanRequest,
526
+ spec: &EntitySurfaceSpec,
527
+ filters: &[Expr],
528
+ ) -> Result<()> {
529
+ if let Some(entity_ids) = entity_ids_from_primary_key_filters(spec, filters)? {
530
+ if entity_ids.is_empty() {
531
+ request.limit = Some(0);
532
+ }
533
+ request.filter.entity_ids = entity_ids;
534
+ }
535
+ Ok(())
536
+ }
537
+
538
+ fn exact_version_ids_from_filters(filters: &[Expr]) -> Result<Option<Vec<String>>> {
539
+ let analyzer = ExactVersionIdFilterAnalyzer;
540
+ let mut version_ids: Option<BTreeSet<String>> = None;
541
+ for filter in filters {
542
+ let Some(filter_ids) = analyzer.analyze(filter)? else {
543
+ continue;
544
+ };
545
+ version_ids = Some(match version_ids {
546
+ Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
547
+ None => filter_ids,
548
+ });
549
+ }
550
+ Ok(version_ids.map(|ids| ids.into_iter().collect()))
551
+ }
552
+
553
+ fn apply_exact_version_id_filter(
554
+ request: &mut LiveStateScanRequest,
555
+ version_ids: Option<Vec<String>>,
556
+ ) {
557
+ if let Some(version_ids) = version_ids {
558
+ if version_ids.is_empty() {
559
+ request.limit = Some(0);
560
+ }
561
+ request.filter.version_ids = version_ids;
562
+ }
563
+ }
564
+
565
+ struct EntityPrimaryKeyFilterAnalyzer<'a> {
566
+ primary_key_columns: Vec<&'a str>,
567
+ }
568
+
569
+ struct ExactVersionIdFilterAnalyzer;
570
+
571
+ impl ExactVersionIdFilterAnalyzer {
572
+ fn supports(&self, expr: &Expr) -> bool {
573
+ self.analyze(expr)
574
+ .is_ok_and(|constraint| constraint.is_some())
575
+ }
576
+
577
+ fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<String>>> {
578
+ match expr {
579
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
580
+ let Some(left) = self.analyze(&binary_expr.left)? else {
581
+ return Ok(None);
582
+ };
583
+ let Some(right) = self.analyze(&binary_expr.right)? else {
584
+ return Ok(None);
585
+ };
586
+ Ok(Some(left.intersection(&right).cloned().collect()))
587
+ }
588
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
589
+ let Some(mut left) = self.analyze(&binary_expr.left)? else {
590
+ return Ok(None);
591
+ };
592
+ let Some(right) = self.analyze(&binary_expr.right)? else {
593
+ return Ok(None);
594
+ };
595
+ left.extend(right);
596
+ Ok(Some(left))
597
+ }
598
+ Expr::BinaryExpr(binary_expr) => {
599
+ Ok(version_id_from_binary_filter(binary_expr).map(|value| BTreeSet::from([value])))
600
+ }
601
+ Expr::InList(in_list) => {
602
+ Ok(version_ids_from_in_list_filter(in_list)
603
+ .map(|values| values.into_iter().collect()))
604
+ }
605
+ _ => Ok(None),
606
+ }
607
+ }
608
+ }
609
+
610
+ fn version_id_from_binary_filter(binary_expr: &BinaryExpr) -> Option<String> {
611
+ if binary_expr.op != Operator::Eq {
612
+ return None;
613
+ }
614
+
615
+ version_id_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
616
+ .or_else(|| version_id_from_column_literal_filter(&binary_expr.right, &binary_expr.left))
617
+ }
618
+
619
+ fn version_ids_from_in_list_filter(in_list: &InList) -> Option<Vec<String>> {
620
+ if in_list.negated {
621
+ return None;
622
+ }
623
+ let Expr::Column(column) = in_list.expr.as_ref() else {
624
+ return None;
625
+ };
626
+ if column.name != "lixcol_version_id" {
627
+ return None;
628
+ }
629
+
630
+ let values = in_list
631
+ .list
632
+ .iter()
633
+ .map(string_expr_literal)
634
+ .collect::<Option<Vec<_>>>()?;
635
+ if values.is_empty() {
636
+ return None;
637
+ }
638
+ Some(values)
639
+ }
640
+
641
+ fn version_id_from_column_literal_filter(
642
+ column_expr: &Expr,
643
+ literal_expr: &Expr,
644
+ ) -> Option<String> {
645
+ let Expr::Column(column) = column_expr else {
646
+ return None;
647
+ };
648
+ if column.name != "lixcol_version_id" {
649
+ return None;
650
+ }
651
+ string_expr_literal(literal_expr)
652
+ }
653
+
654
+ impl<'a> EntityPrimaryKeyFilterAnalyzer<'a> {
655
+ fn new(spec: &'a EntitySurfaceSpec) -> Self {
656
+ Self {
657
+ primary_key_columns: string_primary_key_columns(spec),
658
+ }
659
+ }
660
+
661
+ fn supports(&self, expr: &Expr) -> bool {
662
+ self.analyze(expr)
663
+ .is_ok_and(|constraint| constraint.is_some())
664
+ }
665
+
666
+ fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<EntityIdentity>>> {
667
+ if self.primary_key_columns.is_empty() {
668
+ return Ok(None);
669
+ };
670
+ let Some(constraint) = self.analyze_constraint(expr)? else {
671
+ return Ok(None);
672
+ };
673
+ Ok(constraint.into_entity_ids(&self.primary_key_columns))
674
+ }
675
+
676
+ fn analyze_constraint(&self, expr: &Expr) -> Result<Option<EntityIdentityConstraint>> {
677
+ match expr {
678
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
679
+ let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
680
+ return Ok(None);
681
+ };
682
+ let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
683
+ return Ok(None);
684
+ };
685
+ Ok(Some(left.intersect(right, &self.primary_key_columns)))
686
+ }
687
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
688
+ let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
689
+ return Ok(None);
690
+ };
691
+ let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
692
+ return Ok(None);
693
+ };
694
+ let Some(left_ids) = left.into_entity_ids(&self.primary_key_columns) else {
695
+ return Ok(None);
696
+ };
697
+ let Some(mut right_ids) = right.into_entity_ids(&self.primary_key_columns) else {
698
+ return Ok(None);
699
+ };
700
+ right_ids.extend(left_ids);
701
+ Ok(Some(EntityIdentityConstraint::Full(right_ids)))
702
+ }
703
+ Expr::BinaryExpr(binary_expr) => Ok(entity_identity_constraint_from_binary_filter(
704
+ binary_expr,
705
+ &self.primary_key_columns,
706
+ )),
707
+ Expr::InList(in_list) => Ok(entity_identity_constraint_from_in_list_filter(
708
+ in_list,
709
+ &self.primary_key_columns,
710
+ )),
711
+ _ => Ok(None),
712
+ }
713
+ }
714
+ }
715
+
716
+ #[derive(Debug, Clone, PartialEq, Eq)]
717
+ enum EntityIdentityConstraint {
718
+ Full(BTreeSet<EntityIdentity>),
719
+ Parts(BTreeMap<String, BTreeSet<String>>),
720
+ }
721
+
722
+ impl EntityIdentityConstraint {
723
+ fn intersect(self, other: Self, primary_key_columns: &[&str]) -> Self {
724
+ match (self, other) {
725
+ (Self::Full(left), Self::Full(right)) => {
726
+ Self::Full(left.intersection(&right).cloned().collect())
727
+ }
728
+ (Self::Full(ids), Self::Parts(parts)) | (Self::Parts(parts), Self::Full(ids)) => {
729
+ Self::Full(
730
+ ids.into_iter()
731
+ .filter(|identity| {
732
+ identity_matches_parts(identity, primary_key_columns, &parts)
733
+ })
734
+ .collect(),
735
+ )
736
+ }
737
+ (Self::Parts(mut left), Self::Parts(right)) => {
738
+ for (column, right_values) in right {
739
+ left.entry(column)
740
+ .and_modify(|left_values| {
741
+ *left_values =
742
+ left_values.intersection(&right_values).cloned().collect();
743
+ })
744
+ .or_insert(right_values);
745
+ }
746
+ Self::Parts(left)
747
+ }
748
+ }
749
+ }
750
+
751
+ fn into_entity_ids(self, primary_key_columns: &[&str]) -> Option<BTreeSet<EntityIdentity>> {
752
+ match self {
753
+ Self::Full(ids) => Some(ids),
754
+ Self::Parts(parts) => entity_ids_from_primary_key_parts(primary_key_columns, parts),
755
+ }
756
+ }
757
+ }
758
+
759
+ fn string_primary_key_columns(spec: &EntitySurfaceSpec) -> Vec<&str> {
760
+ spec.primary_key_paths
761
+ .iter()
762
+ .map(|path| {
763
+ let [column_name] = path.as_slice() else {
764
+ return None;
765
+ };
766
+ let column = spec.visible_column(column_name)?;
767
+ (column.column_type == EntityColumnType::String).then_some(column.name.as_str())
768
+ })
769
+ .collect::<Option<Vec<_>>>()
770
+ .unwrap_or_default()
771
+ }
772
+
773
+ fn entity_identity_constraint_from_binary_filter(
774
+ binary_expr: &BinaryExpr,
775
+ primary_key_columns: &[&str],
776
+ ) -> Option<EntityIdentityConstraint> {
777
+ if binary_expr.op != Operator::Eq {
778
+ return None;
779
+ }
780
+ entity_identity_constraint_from_column_literal_filter(
781
+ &binary_expr.left,
782
+ &binary_expr.right,
783
+ primary_key_columns,
784
+ )
785
+ .or_else(|| {
786
+ entity_identity_constraint_from_column_literal_filter(
787
+ &binary_expr.right,
788
+ &binary_expr.left,
789
+ primary_key_columns,
790
+ )
791
+ })
792
+ }
793
+
794
+ fn entity_identity_constraint_from_in_list_filter(
795
+ in_list: &InList,
796
+ primary_key_columns: &[&str],
797
+ ) -> Option<EntityIdentityConstraint> {
798
+ if in_list.negated {
799
+ return None;
800
+ }
801
+ let Expr::Column(column) = in_list.expr.as_ref() else {
802
+ return None;
803
+ };
804
+ let values = in_list
805
+ .list
806
+ .iter()
807
+ .map(string_expr_literal)
808
+ .collect::<Option<Vec<_>>>()?;
809
+ if values.is_empty() {
810
+ return None;
811
+ }
812
+ match column.name.as_str() {
813
+ "lixcol_entity_id" => values
814
+ .into_iter()
815
+ .map(|value| EntityIdentity::from_json_array_text(&value).ok())
816
+ .collect::<Option<BTreeSet<_>>>()
817
+ .map(EntityIdentityConstraint::Full),
818
+ column_name if primary_key_columns.contains(&column_name) => {
819
+ Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
820
+ column_name.to_string(),
821
+ values.into_iter().collect(),
822
+ )])))
823
+ }
824
+ _ => None,
825
+ }
826
+ }
827
+
828
+ fn entity_identity_constraint_from_column_literal_filter(
829
+ column_expr: &Expr,
830
+ literal_expr: &Expr,
831
+ primary_key_columns: &[&str],
832
+ ) -> Option<EntityIdentityConstraint> {
833
+ let Expr::Column(column) = column_expr else {
834
+ return None;
835
+ };
836
+ let value = string_expr_literal(literal_expr)?;
837
+ match column.name.as_str() {
838
+ "lixcol_entity_id" => EntityIdentity::from_json_array_text(&value)
839
+ .ok()
840
+ .map(|identity| EntityIdentityConstraint::Full(BTreeSet::from([identity]))),
841
+ column_name if primary_key_columns.contains(&column_name) => {
842
+ Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
843
+ column_name.to_string(),
844
+ BTreeSet::from([value]),
845
+ )])))
846
+ }
847
+ _ => None,
848
+ }
849
+ }
850
+
851
+ fn entity_ids_from_primary_key_parts(
852
+ primary_key_columns: &[&str],
853
+ parts: BTreeMap<String, BTreeSet<String>>,
854
+ ) -> Option<BTreeSet<EntityIdentity>> {
855
+ if primary_key_columns
856
+ .iter()
857
+ .any(|column| !parts.contains_key(*column))
858
+ {
859
+ return None;
860
+ }
861
+
862
+ let mut identities = BTreeSet::from([Vec::<String>::new()]);
863
+ for column in primary_key_columns {
864
+ let values = parts.get(*column)?;
865
+ identities = identities
866
+ .into_iter()
867
+ .flat_map(|prefix| {
868
+ values.iter().map(move |value| {
869
+ let mut parts = prefix.clone();
870
+ parts.push(value.clone());
871
+ parts
872
+ })
873
+ })
874
+ .collect();
875
+ }
876
+ Some(
877
+ identities
878
+ .into_iter()
879
+ .map(|parts| EntityIdentity { parts })
880
+ .collect(),
881
+ )
882
+ }
883
+
884
+ fn identity_matches_parts(
885
+ identity: &EntityIdentity,
886
+ primary_key_columns: &[&str],
887
+ parts: &BTreeMap<String, BTreeSet<String>>,
888
+ ) -> bool {
889
+ let identity_parts = identity.parts.as_slice();
890
+ primary_key_columns
891
+ .iter()
892
+ .zip(identity_parts.iter())
893
+ .all(|(column, value)| {
894
+ parts
895
+ .get(*column)
896
+ .is_none_or(|values| values.contains(value))
897
+ })
898
+ }
899
+
900
+ fn string_expr_literal(expr: &Expr) -> Option<String> {
901
+ let Expr::Literal(literal, _) = expr else {
902
+ return None;
903
+ };
904
+ match literal {
905
+ ScalarValue::Utf8(Some(value))
906
+ | ScalarValue::Utf8View(Some(value))
907
+ | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
908
+ _ => None,
909
+ }
910
+ }
911
+
492
912
  struct EntityInsertSink {
493
913
  spec: Arc<EntitySurfaceSpec>,
494
914
  insert_column_intents: InsertColumnIntents,
@@ -552,8 +972,8 @@ impl InsertSink for EntityInsertSink {
552
972
  .map_err(|_| DataFusionError::Execution("entity INSERT row count overflow".into()))?;
553
973
 
554
974
  self.write_ctx
555
- .stage_write(StageWrite::Rows {
556
- mode: StageWriteMode::Insert,
975
+ .stage_write(TransactionWrite::Rows {
976
+ mode: TransactionWriteMode::Insert,
557
977
  rows,
558
978
  })
559
979
  .await
@@ -616,12 +1036,7 @@ impl DisplayAs for EntityDeleteExec {
616
1036
  fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
617
1037
  match t {
618
1038
  DisplayFormatType::Default | DisplayFormatType::Verbose => {
619
- write!(
620
- f,
621
- "EntityDeleteExec(schema_key={}, filters={})",
622
- self.spec.schema_key,
623
- self.filters.len()
624
- )
1039
+ write!(f, "EntityDeleteExec(schema_key={})", self.spec.schema_key)
625
1040
  }
626
1041
  DisplayFormatType::TreeRender => write!(f, "EntityDeleteExec"),
627
1042
  }
@@ -694,7 +1109,7 @@ impl ExecutionPlan for EntityDeleteExec {
694
1109
  version_binding.active_version_id(),
695
1110
  )?;
696
1111
  for row in &mut write_rows {
697
- row.snapshot_content = None;
1112
+ row.snapshot = None;
698
1113
  }
699
1114
  let count = u64::try_from(write_rows.len()).map_err(|_| {
700
1115
  DataFusionError::Execution("entity DELETE row count overflow".to_string())
@@ -702,8 +1117,8 @@ impl ExecutionPlan for EntityDeleteExec {
702
1117
 
703
1118
  if count > 0 {
704
1119
  write_ctx
705
- .stage_write(StageWrite::Rows {
706
- mode: StageWriteMode::Replace,
1120
+ .stage_write(TransactionWrite::Rows {
1121
+ mode: TransactionWriteMode::Replace,
707
1122
  rows: write_rows,
708
1123
  })
709
1124
  .await
@@ -781,10 +1196,9 @@ impl DisplayAs for EntityUpdateExec {
781
1196
  DisplayFormatType::Default | DisplayFormatType::Verbose => {
782
1197
  write!(
783
1198
  f,
784
- "EntityUpdateExec(schema_key={}, assignments={}, filters={})",
1199
+ "EntityUpdateExec(schema_key={}, assignments={})",
785
1200
  self.spec.schema_key,
786
- self.assignments.len(),
787
- self.filters.len()
1201
+ self.assignments.len()
788
1202
  )
789
1203
  }
790
1204
  DisplayFormatType::TreeRender => write!(f, "EntityUpdateExec"),
@@ -865,8 +1279,8 @@ impl ExecutionPlan for EntityUpdateExec {
865
1279
 
866
1280
  if count > 0 {
867
1281
  write_ctx
868
- .stage_write(StageWrite::Rows {
869
- mode: StageWriteMode::Replace,
1282
+ .stage_write(TransactionWrite::Rows {
1283
+ mode: TransactionWriteMode::Replace,
870
1284
  rows: write_rows,
871
1285
  })
872
1286
  .await
@@ -953,7 +1367,7 @@ fn entity_update_write_rows_from_batch(
953
1367
  batch: &RecordBatch,
954
1368
  assignments: &[(String, Arc<dyn PhysicalExpr>)],
955
1369
  version_binding: Option<&str>,
956
- ) -> Result<Vec<StageRow>> {
1370
+ ) -> Result<Vec<TransactionWriteRow>> {
957
1371
  let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
958
1372
  (0..batch.num_rows())
959
1373
  .map(|row_index| {
@@ -965,19 +1379,10 @@ fn entity_update_write_rows_from_batch(
965
1379
  &spec.schema_key,
966
1380
  )?;
967
1381
 
968
- let schema_version = optional_string_value(batch, row_index, "lixcol_schema_version")?
969
- .or_else(|| spec.schema_version.clone())
970
- .ok_or_else(|| {
971
- DataFusionError::Execution(format!(
972
- "UPDATE entity surface '{}' requires lixcol_schema_version",
973
- spec.schema_key
974
- ))
975
- })?;
976
-
977
- Ok(StageRow {
1382
+ Ok(TransactionWriteRow {
978
1383
  entity_id: optional_string_value(batch, row_index, "lixcol_entity_id")?
979
1384
  .map(|entity_id| {
980
- EntityIdentity::from_string(&entity_id).map_err(|error| {
1385
+ EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
981
1386
  DataFusionError::Execution(format!(
982
1387
  "UPDATE entity surface '{}' has invalid lixcol_entity_id: {error}",
983
1388
  spec.schema_key
@@ -987,12 +1392,18 @@ fn entity_update_write_rows_from_batch(
987
1392
  .transpose()?,
988
1393
  schema_key: spec.schema_key.clone(),
989
1394
  file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
990
- snapshot_content: Some(entity_update_snapshot_content_from_batch(
991
- spec,
992
- batch,
993
- &assignment_values,
994
- row_index,
995
- )?),
1395
+ snapshot: Some(
1396
+ TransactionJson::from_value(
1397
+ entity_update_snapshot_content_from_batch(
1398
+ spec,
1399
+ batch,
1400
+ &assignment_values,
1401
+ row_index,
1402
+ )?,
1403
+ &format!("{} update snapshot_content", spec.schema_key),
1404
+ )
1405
+ .map_err(super::error::lix_error_to_datafusion_error)?,
1406
+ ),
996
1407
  metadata: entity_update_optional_metadata_value(
997
1408
  batch,
998
1409
  &assignment_values,
@@ -1001,7 +1412,6 @@ fn entity_update_write_rows_from_batch(
1001
1412
  &spec.schema_key,
1002
1413
  )?,
1003
1414
  origin: None,
1004
- schema_version,
1005
1415
  created_at: None,
1006
1416
  updated_at: None,
1007
1417
  global: scope.global,
@@ -1020,7 +1430,7 @@ fn entity_update_snapshot_content_from_batch(
1020
1430
  batch: &RecordBatch,
1021
1431
  assignment_values: &UpdateAssignmentValues,
1022
1432
  row_index: usize,
1023
- ) -> Result<String> {
1433
+ ) -> Result<JsonValue> {
1024
1434
  let snapshot_content = optional_string_value(batch, row_index, "lixcol_snapshot_content")?
1025
1435
  .ok_or_else(|| {
1026
1436
  DataFusionError::Execution(format!(
@@ -1055,12 +1465,7 @@ fn entity_update_snapshot_content_from_batch(
1055
1465
  };
1056
1466
  object.insert(column.name.clone(), value);
1057
1467
  }
1058
- serde_json::to_string(&JsonValue::Object(object)).map_err(|error| {
1059
- DataFusionError::Execution(format!(
1060
- "failed to serialize entity surface '{}' snapshot_content: {error}",
1061
- spec.schema_key
1062
- ))
1063
- })
1468
+ Ok(JsonValue::Object(object))
1064
1469
  }
1065
1470
 
1066
1471
  fn entity_update_optional_string_value(
@@ -1088,10 +1493,13 @@ fn entity_update_optional_metadata_value(
1088
1493
  row_index: usize,
1089
1494
  column_name: &str,
1090
1495
  context: &str,
1091
- ) -> Result<Option<RowMetadata>> {
1496
+ ) -> Result<Option<TransactionJson>> {
1092
1497
  entity_update_optional_string_value(batch, assignment_values, row_index, column_name)?
1093
1498
  .map(|value| {
1094
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1499
+ let metadata = parse_row_metadata_value(&value, context)
1500
+ .map_err(super::error::lix_error_to_datafusion_error)?;
1501
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
1502
+ .map_err(super::error::lix_error_to_datafusion_error)
1095
1503
  })
1096
1504
  .transpose()
1097
1505
  }
@@ -1132,7 +1540,7 @@ fn entity_lix_state_write_rows_from_batch(
1132
1540
  batch: &RecordBatch,
1133
1541
  insert_column_intents: &InsertColumnIntents,
1134
1542
  version_binding: Option<&str>,
1135
- ) -> Result<Vec<StageRow>> {
1543
+ ) -> Result<Vec<TransactionWriteRow>> {
1136
1544
  entity_lix_state_write_rows_from_batch_with_options(
1137
1545
  spec,
1138
1546
  batch,
@@ -1146,7 +1554,7 @@ fn entity_existing_lix_state_write_rows_from_batch(
1146
1554
  spec: &EntitySurfaceSpec,
1147
1555
  batch: &RecordBatch,
1148
1556
  version_binding: Option<&str>,
1149
- ) -> Result<Vec<StageRow>> {
1557
+ ) -> Result<Vec<TransactionWriteRow>> {
1150
1558
  entity_lix_state_write_rows_from_batch_with_options(
1151
1559
  spec,
1152
1560
  batch,
@@ -1162,7 +1570,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
1162
1570
  insert_column_intents: &InsertColumnIntents,
1163
1571
  version_binding: Option<&str>,
1164
1572
  reject_read_only_fields: bool,
1165
- ) -> Result<Vec<StageRow>> {
1573
+ ) -> Result<Vec<TransactionWriteRow>> {
1166
1574
  (0..batch.num_rows())
1167
1575
  .map(|row_index| {
1168
1576
  let scope = resolve_write_version_scope(
@@ -1194,14 +1602,6 @@ fn entity_lix_state_write_rows_from_batch_with_options(
1194
1602
  reject_present_entity_insert_field(batch, row_index, "lixcol_commit_id")?;
1195
1603
  }
1196
1604
 
1197
- let schema_version = optional_string_value(batch, row_index, "lixcol_schema_version")?
1198
- .or_else(|| spec.schema_version.clone())
1199
- .ok_or_else(|| {
1200
- DataFusionError::Execution(format!(
1201
- "INSERT into entity surface '{}' requires lixcol_schema_version",
1202
- spec.schema_key
1203
- ))
1204
- })?;
1205
1605
  let snapshot_content =
1206
1606
  entity_snapshot_content_from_batch(spec, batch, insert_column_intents, row_index)?;
1207
1607
  let explicit_entity_id = optional_string_value(batch, row_index, "lixcol_entity_id")?;
@@ -1212,7 +1612,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
1212
1612
  spec.schema_key
1213
1613
  ))
1214
1614
  })?;
1215
- Some(EntityIdentity::from_string(&entity_id).map_err(|error| {
1615
+ Some(EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
1216
1616
  DataFusionError::Execution(format!(
1217
1617
  "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1218
1618
  spec.schema_key
@@ -1221,7 +1621,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
1221
1621
  } else {
1222
1622
  explicit_entity_id
1223
1623
  .map(|entity_id| {
1224
- EntityIdentity::from_string(&entity_id).map_err(|error| {
1624
+ EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
1225
1625
  DataFusionError::Execution(format!(
1226
1626
  "INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
1227
1627
  spec.schema_key
@@ -1231,14 +1631,22 @@ fn entity_lix_state_write_rows_from_batch_with_options(
1231
1631
  .transpose()?
1232
1632
  };
1233
1633
 
1234
- Ok(StageRow {
1634
+ Ok(TransactionWriteRow {
1235
1635
  entity_id,
1236
1636
  schema_key: spec.schema_key.clone(),
1237
1637
  file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1238
- snapshot_content: Some(snapshot_content),
1239
- metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", &spec.schema_key)?,
1638
+ snapshot: Some(TransactionJson::from_value(
1639
+ snapshot_content,
1640
+ &format!("{} insert snapshot_content", spec.schema_key),
1641
+ )
1642
+ .map_err(super::error::lix_error_to_datafusion_error)?),
1643
+ metadata: optional_metadata_value(
1644
+ batch,
1645
+ row_index,
1646
+ "lixcol_metadata",
1647
+ &spec.schema_key,
1648
+ )?,
1240
1649
  origin: None,
1241
- schema_version: schema_version,
1242
1650
  created_at: None,
1243
1651
  updated_at: None,
1244
1652
  global: scope.global,
@@ -1257,7 +1665,7 @@ fn entity_snapshot_content_from_batch(
1257
1665
  batch: &RecordBatch,
1258
1666
  insert_column_intents: &InsertColumnIntents,
1259
1667
  row_index: usize,
1260
- ) -> Result<String> {
1668
+ ) -> Result<JsonValue> {
1261
1669
  let mut object = serde_json::Map::new();
1262
1670
  for column in &spec.columns {
1263
1671
  let value = match insert_column_intents.cell(batch, row_index, &column.name)? {
@@ -1271,12 +1679,7 @@ fn entity_snapshot_content_from_batch(
1271
1679
  };
1272
1680
  object.insert(column.name.clone(), value);
1273
1681
  }
1274
- serde_json::to_string(&JsonValue::Object(object)).map_err(|error| {
1275
- DataFusionError::Execution(format!(
1276
- "failed to serialize entity surface '{}' snapshot_content: {error}",
1277
- spec.schema_key
1278
- ))
1279
- })
1682
+ Ok(JsonValue::Object(object))
1280
1683
  }
1281
1684
 
1282
1685
  fn entity_json_value_from_scalar(
@@ -1404,10 +1807,13 @@ fn optional_metadata_value(
1404
1807
  row_index: usize,
1405
1808
  column_name: &str,
1406
1809
  context: &str,
1407
- ) -> Result<Option<RowMetadata>> {
1810
+ ) -> Result<Option<TransactionJson>> {
1408
1811
  optional_string_value(batch, row_index, column_name)?
1409
1812
  .map(|value| {
1410
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1813
+ let metadata = parse_row_metadata_value(&value, context)
1814
+ .map_err(super::error::lix_error_to_datafusion_error)?;
1815
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
1816
+ .map_err(super::error::lix_error_to_datafusion_error)
1411
1817
  })
1412
1818
  .transpose()
1413
1819
  }
@@ -1573,6 +1979,7 @@ impl ExecutionPlan for EntityScanExec {
1573
1979
  fn entity_live_state_scan_request(
1574
1980
  schema_key: &str,
1575
1981
  active_version_id: Option<&str>,
1982
+ projected_schema: Option<&Schema>,
1576
1983
  limit: Option<usize>,
1577
1984
  ) -> LiveStateScanRequest {
1578
1985
  LiveStateScanRequest {
@@ -1583,15 +1990,40 @@ fn entity_live_state_scan_request(
1583
1990
  .unwrap_or_default(),
1584
1991
  ..LiveStateFilter::default()
1585
1992
  },
1586
- projection: LiveStateProjection::default(),
1993
+ projection: entity_live_state_projection(projected_schema),
1587
1994
  limit,
1588
1995
  }
1589
1996
  }
1590
1997
 
1998
+ fn entity_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1999
+ let Some(schema) = projected_schema else {
2000
+ return LiveStateProjection::default();
2001
+ };
2002
+ let mut columns = projection_column_names(schema);
2003
+ if schema
2004
+ .fields()
2005
+ .iter()
2006
+ .any(|field| !field.name().starts_with("lixcol_"))
2007
+ && !columns.iter().any(|column| column == "snapshot_content")
2008
+ {
2009
+ columns.push("snapshot_content".to_string());
2010
+ }
2011
+ LiveStateProjection { columns }
2012
+ }
2013
+
2014
+ fn projection_column_names(schema: &Schema) -> Vec<String> {
2015
+ schema
2016
+ .fields()
2017
+ .iter()
2018
+ .filter_map(|field| field.name().strip_prefix("lixcol_"))
2019
+ .map(str::to_string)
2020
+ .collect()
2021
+ }
2022
+
1591
2023
  fn entity_record_batch(
1592
2024
  spec: &EntitySurfaceSpec,
1593
2025
  schema: SchemaRef,
1594
- rows: &[LiveStateRow],
2026
+ rows: &[MaterializedLiveStateRow],
1595
2027
  ) -> Result<RecordBatch> {
1596
2028
  if schema.fields().is_empty() {
1597
2029
  let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
@@ -1616,7 +2048,7 @@ fn entity_record_batch(
1616
2048
  fn entity_column_array(
1617
2049
  spec: &EntitySurfaceSpec,
1618
2050
  column_name: &str,
1619
- rows: &[LiveStateRow],
2051
+ rows: &[MaterializedLiveStateRow],
1620
2052
  snapshots: &[Option<JsonValue>],
1621
2053
  ) -> Result<ArrayRef> {
1622
2054
  if let Some(property_name) = column_name.strip_prefix("lixcol_") {
@@ -1665,13 +2097,16 @@ fn entity_column_array(
1665
2097
  })
1666
2098
  }
1667
2099
 
1668
- fn entity_system_column_array(column_name: &str, rows: &[LiveStateRow]) -> Result<ArrayRef> {
2100
+ fn entity_system_column_array(
2101
+ column_name: &str,
2102
+ rows: &[MaterializedLiveStateRow],
2103
+ ) -> Result<ArrayRef> {
1669
2104
  Ok(match column_name {
1670
2105
  "entity_id" => Arc::new(StringArray::from(
1671
2106
  rows.iter()
1672
2107
  .map(|row| {
1673
2108
  row.entity_id
1674
- .as_string()
2109
+ .as_json_array_text()
1675
2110
  .map(Some)
1676
2111
  .map_err(lix_error_to_datafusion_error)
1677
2112
  })
@@ -1685,7 +2120,6 @@ fn entity_system_column_array(column_name: &str, rows: &[LiveStateRow]) -> Resul
1685
2120
  .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
1686
2121
  .collect::<Vec<_>>(),
1687
2122
  )) as ArrayRef,
1688
- "schema_version" => string_array(rows.iter().map(|row| Some(row.schema_version.as_str()))),
1689
2123
  "created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
1690
2124
  "updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
1691
2125
  "global" => Arc::new(BooleanArray::from(
@@ -1801,12 +2235,11 @@ fn arrow_data_type_for_entity_column_type(column_type: EntityColumnType) -> Data
1801
2235
  pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field> {
1802
2236
  if variant == EntityProviderVariant::History {
1803
2237
  return vec![
1804
- Field::new(HISTORY_COL_ENTITY_ID, DataType::Utf8, false),
2238
+ json_field(HISTORY_COL_ENTITY_ID, false),
1805
2239
  Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
1806
2240
  Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
1807
2241
  json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
1808
2242
  json_field(HISTORY_COL_METADATA, true),
1809
- Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
1810
2243
  Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
1811
2244
  Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
1812
2245
  Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
@@ -1816,12 +2249,11 @@ pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field>
1816
2249
  }
1817
2250
 
1818
2251
  let mut fields = vec![
1819
- Field::new("lixcol_entity_id", DataType::Utf8, true),
2252
+ json_field("lixcol_entity_id", true),
1820
2253
  Field::new("lixcol_schema_key", DataType::Utf8, false),
1821
2254
  Field::new("lixcol_file_id", DataType::Utf8, true),
1822
2255
  json_field("lixcol_snapshot_content", true),
1823
2256
  json_field("lixcol_metadata", true),
1824
- Field::new("lixcol_schema_version", DataType::Utf8, true),
1825
2257
  Field::new("lixcol_created_at", DataType::Utf8, true),
1826
2258
  Field::new("lixcol_updated_at", DataType::Utf8, true),
1827
2259
  Field::new("lixcol_global", DataType::Boolean, true),
@@ -1855,11 +2287,6 @@ fn derive_entity_surface_spec_from_schema(
1855
2287
  )
1856
2288
  })?;
1857
2289
 
1858
- let schema_version = schema
1859
- .get("x-lix-version")
1860
- .and_then(JsonValue::as_str)
1861
- .map(ToOwned::to_owned);
1862
-
1863
2290
  let properties = schema
1864
2291
  .get("properties")
1865
2292
  .and_then(JsonValue::as_object)
@@ -1895,7 +2322,6 @@ fn derive_entity_surface_spec_from_schema(
1895
2322
 
1896
2323
  Ok(EntitySurfaceSpec {
1897
2324
  schema_key: schema_key.to_string(),
1898
- schema_version,
1899
2325
  primary_key_paths,
1900
2326
  columns,
1901
2327
  })
@@ -1927,7 +2353,7 @@ fn parse_primary_key_paths(schema: &JsonValue) -> std::result::Result<Vec<Vec<St
1927
2353
  .collect()
1928
2354
  }
1929
2355
 
1930
- // TODO(engine2): share JSON Pointer parsing with schema/canonical validation once
2356
+ // TODO(engine): share JSON Pointer parsing with schema/canonical validation once
1931
2357
  // those helpers have a clean module boundary for SQL providers.
1932
2358
  fn parse_json_pointer(pointer: &str) -> std::result::Result<Vec<String>, LixError> {
1933
2359
  if pointer.is_empty() {
@@ -1968,14 +2394,11 @@ fn decode_json_pointer_segment(segment: &str) -> std::result::Result<String, Lix
1968
2394
  }
1969
2395
 
1970
2396
  fn schema_exposed_as_entity_surface(schema_key: &str) -> bool {
1971
- !matches!(
1972
- schema_key,
1973
- "lix_active_account"
1974
- | "lix_change"
1975
- | "lix_commit_edge"
1976
- | "lix_change_set"
1977
- | "lix_change_set_element"
1978
- )
2397
+ !matches!(schema_key, "lix_active_account" | "lix_change")
2398
+ }
2399
+
2400
+ fn schema_exposed_as_entity_history_surface(schema_key: &str) -> bool {
2401
+ !matches!(schema_key, "lix_commit" | "lix_commit_edge")
1979
2402
  }
1980
2403
 
1981
2404
  fn entity_column_type_from_schema(schema: &JsonValue) -> Option<EntityColumnType> {
@@ -2039,7 +2462,10 @@ mod tests {
2039
2462
  use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
2040
2463
  use datafusion::arrow::datatypes::{DataType, Field, Schema};
2041
2464
  use datafusion::arrow::record_batch::RecordBatch;
2465
+ use datafusion::common::{Column, ScalarValue};
2042
2466
  use datafusion::execution::TaskContext;
2467
+ use datafusion::logical_expr::expr::InList;
2468
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
2043
2469
  use serde_json::json;
2044
2470
 
2045
2471
  use super::{
@@ -2052,12 +2478,15 @@ mod tests {
2052
2478
  FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2053
2479
  };
2054
2480
  use crate::live_state::{
2055
- LiveStateReader, LiveStateRow, LiveStateRowRequest, LiveStateScanRequest,
2481
+ LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
2056
2482
  };
2057
2483
  use crate::sql2::dml::InsertSink;
2058
2484
  use crate::sql2::write_normalization::InsertColumnIntents;
2059
2485
  use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2060
- use crate::transaction::types::{StageRow, StageWrite, StageWriteMode, StageWriteOutcome};
2486
+ use crate::transaction::types::{
2487
+ TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
2488
+ TransactionWriteRow,
2489
+ };
2061
2490
  use crate::version::{VersionHead, VersionRefReader};
2062
2491
  use crate::LixError;
2063
2492
 
@@ -2065,8 +2494,8 @@ mod tests {
2065
2494
  struct EmptyVersionRefReader;
2066
2495
  #[derive(Default)]
2067
2496
  struct CapturingWriteContext {
2068
- rows: Vec<LiveStateRow>,
2069
- writes: Vec<StageWrite>,
2497
+ rows: Vec<MaterializedLiveStateRow>,
2498
+ writes: Vec<TransactionWrite>,
2070
2499
  }
2071
2500
 
2072
2501
  #[async_trait]
@@ -2074,14 +2503,14 @@ mod tests {
2074
2503
  async fn scan_rows(
2075
2504
  &self,
2076
2505
  _request: &LiveStateScanRequest,
2077
- ) -> Result<Vec<LiveStateRow>, LixError> {
2506
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2078
2507
  Ok(vec![])
2079
2508
  }
2080
2509
 
2081
2510
  async fn load_row(
2082
2511
  &self,
2083
2512
  _request: &LiveStateRowRequest,
2084
- ) -> Result<Option<LiveStateRow>, LixError> {
2513
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
2085
2514
  Ok(None)
2086
2515
  }
2087
2516
  }
@@ -2113,7 +2542,10 @@ mod tests {
2113
2542
  &self,
2114
2543
  hashes: &[crate::binary_cas::BlobHash],
2115
2544
  ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2116
- Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
2545
+ Ok(crate::binary_cas::BlobBytesBatch::new(vec![
2546
+ None;
2547
+ hashes.len()
2548
+ ]))
2117
2549
  }
2118
2550
  }
2119
2551
 
@@ -2141,7 +2573,7 @@ mod tests {
2141
2573
  async fn scan_live_state(
2142
2574
  &mut self,
2143
2575
  _request: &LiveStateScanRequest,
2144
- ) -> Result<Vec<LiveStateRow>, LixError> {
2576
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2145
2577
  Ok(self.rows.clone())
2146
2578
  }
2147
2579
 
@@ -2155,14 +2587,17 @@ mod tests {
2155
2587
  Ok(Some(format!("commit-{version_id}")))
2156
2588
  }
2157
2589
 
2158
- async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
2590
+ async fn stage_write(
2591
+ &mut self,
2592
+ write: TransactionWrite,
2593
+ ) -> Result<TransactionWriteOutcome, LixError> {
2159
2594
  self.writes.push(write);
2160
- Ok(StageWriteOutcome { count: 0 })
2595
+ Ok(TransactionWriteOutcome { count: 0 })
2161
2596
  }
2162
2597
  }
2163
2598
 
2164
- fn live_row() -> LiveStateRow {
2165
- LiveStateRow {
2599
+ fn live_row() -> MaterializedLiveStateRow {
2600
+ MaterializedLiveStateRow {
2166
2601
  entity_id: crate::entity_identity::EntityIdentity::single("entity-1"),
2167
2602
  schema_key: "project_message".to_string(),
2168
2603
  file_id: None,
@@ -2170,8 +2605,8 @@ mod tests {
2170
2605
  "{\"body\":\"hello\",\"rating\":4.5,\"count\":7,\"enabled\":true,\"meta\":{\"x\":1}}"
2171
2606
  .to_string(),
2172
2607
  ),
2173
- metadata: Some(json!({"source": "test"})),
2174
- schema_version: "1".to_string(),
2608
+ metadata: Some(json!({"source": "test"}).to_string()),
2609
+ deleted: false,
2175
2610
  version_id: "version-a".to_string(),
2176
2611
  change_id: Some("change-a".to_string()),
2177
2612
  commit_id: Some("commit-a".to_string()),
@@ -2186,7 +2621,6 @@ mod tests {
2186
2621
  Arc::new(
2187
2622
  derive_entity_surface_spec_from_schema(&json!({
2188
2623
  "x-lix-key": "project_message",
2189
- "x-lix-version": "1",
2190
2624
  "type": "object",
2191
2625
  "properties": {
2192
2626
  "body": { "type": "string" },
@@ -2204,7 +2638,6 @@ mod tests {
2204
2638
  Arc::new(
2205
2639
  derive_entity_surface_spec_from_schema(&json!({
2206
2640
  "x-lix-key": "project_message",
2207
- "x-lix-version": "1",
2208
2641
  "x-lix-primary-key": ["/id"],
2209
2642
  "type": "object",
2210
2643
  "properties": {
@@ -2221,6 +2654,22 @@ mod tests {
2221
2654
  Arc::new(StringArray::from(values)) as ArrayRef
2222
2655
  }
2223
2656
 
2657
+ fn string_literal(value: &str) -> Expr {
2658
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
2659
+ }
2660
+
2661
+ fn column(name: &str) -> Expr {
2662
+ Expr::Column(Column::from_name(name))
2663
+ }
2664
+
2665
+ fn eq_filter(column_name: &str, value: &str) -> Expr {
2666
+ Expr::BinaryExpr(BinaryExpr::new(
2667
+ Box::new(column(column_name)),
2668
+ Operator::Eq,
2669
+ Box::new(string_literal(value)),
2670
+ ))
2671
+ }
2672
+
2224
2673
  fn entity_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2225
2674
  let mut fields = vec![
2226
2675
  Field::new("body", DataType::Utf8, true),
@@ -2239,7 +2688,7 @@ mod tests {
2239
2688
  Arc::new(BooleanArray::from(vec![true])) as ArrayRef,
2240
2689
  string_column(vec![Some("{\"x\":1}")]),
2241
2690
  Arc::new(Float64Array::from(vec![4.5])) as ArrayRef,
2242
- string_column(vec![Some("entity-1")]),
2691
+ string_column(vec![Some("[\"entity-1\"]")]),
2243
2692
  string_column(vec![Some("{\"source\":\"entity\"}")]),
2244
2693
  Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2245
2694
  Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
@@ -2266,7 +2715,7 @@ mod tests {
2266
2715
  ];
2267
2716
  if include_entity_id {
2268
2717
  fields.push(Field::new("lixcol_entity_id", DataType::Utf8, false));
2269
- columns.push(string_column(vec![Some("message-1")]));
2718
+ columns.push(string_column(vec![Some("[\"message-1\"]")]));
2270
2719
  }
2271
2720
 
2272
2721
  RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
@@ -2283,7 +2732,6 @@ mod tests {
2283
2732
  fn derives_entity_surface_spec_from_schema_definition() {
2284
2733
  let spec = derive_entity_surface_spec_from_schema(&json!({
2285
2734
  "x-lix-key": "project_message",
2286
- "x-lix-version": "1",
2287
2735
  "type": "object",
2288
2736
  "properties": {
2289
2737
  "body": { "type": "string" },
@@ -2295,7 +2743,6 @@ mod tests {
2295
2743
  .expect("schema should derive entity surface spec");
2296
2744
 
2297
2745
  assert_eq!(spec.schema_key, "project_message");
2298
- assert_eq!(spec.schema_version.as_deref(), Some("1"));
2299
2746
  assert_eq!(
2300
2747
  spec.visible_column_names().collect::<Vec<_>>(),
2301
2748
  vec!["body", "meta", "rating"]
@@ -2320,7 +2767,6 @@ mod tests {
2320
2767
  fn entity_surface_spec_rejects_properties_without_projection_type() {
2321
2768
  let error = derive_entity_surface_spec_from_schema(&json!({
2322
2769
  "x-lix-key": "project_message",
2323
- "x-lix-version": "1",
2324
2770
  "x-lix-primary-key": ["/id"],
2325
2771
  "type": "object",
2326
2772
  "properties": {
@@ -2463,7 +2909,7 @@ mod tests {
2463
2909
  .downcast_ref::<datafusion::arrow::array::StringArray>()
2464
2910
  .expect("entity id is string")
2465
2911
  .value(0),
2466
- "entity-1"
2912
+ "[\"entity-1\"]"
2467
2913
  );
2468
2914
  assert_eq!(
2469
2915
  batch
@@ -2498,6 +2944,80 @@ mod tests {
2498
2944
  assert!(provider.schema.field_with_name("lixcol_version_id").is_ok());
2499
2945
  }
2500
2946
 
2947
+ #[test]
2948
+ fn primary_key_filters_route_entity_ids_for_string_primary_key() {
2949
+ let spec = entity_insert_spec_with_primary_key();
2950
+ let filters = vec![
2951
+ eq_filter("id", "entity-a"),
2952
+ Expr::InList(InList::new(
2953
+ Box::new(column("id")),
2954
+ vec![string_literal("entity-b"), string_literal("entity-a")],
2955
+ false,
2956
+ )),
2957
+ ];
2958
+
2959
+ let entity_ids = super::entity_ids_from_primary_key_filters(&spec, &filters)
2960
+ .expect("primary-key filters should analyze")
2961
+ .expect("primary-key filters should produce a constraint");
2962
+
2963
+ assert_eq!(
2964
+ entity_ids,
2965
+ vec![crate::entity_identity::EntityIdentity::single("entity-a")]
2966
+ );
2967
+ }
2968
+
2969
+ #[test]
2970
+ fn primary_key_filter_analyzer_models_boolean_predicates() {
2971
+ let spec = entity_insert_spec_with_primary_key();
2972
+ let analyzer = super::EntityPrimaryKeyFilterAnalyzer::new(&spec);
2973
+ let disjunction = Expr::BinaryExpr(BinaryExpr::new(
2974
+ Box::new(eq_filter("id", "entity-a")),
2975
+ Operator::Or,
2976
+ Box::new(eq_filter("id", "entity-b")),
2977
+ ));
2978
+ let contradiction = Expr::BinaryExpr(BinaryExpr::new(
2979
+ Box::new(eq_filter("id", "entity-a")),
2980
+ Operator::And,
2981
+ Box::new(eq_filter("id", "entity-b")),
2982
+ ));
2983
+
2984
+ let disjunction_ids = analyzer
2985
+ .analyze(&disjunction)
2986
+ .expect("OR should analyze")
2987
+ .expect("OR should produce an entity-id set");
2988
+ let contradiction_ids = analyzer
2989
+ .analyze(&contradiction)
2990
+ .expect("AND should analyze")
2991
+ .expect("AND should produce an entity-id set");
2992
+
2993
+ assert_eq!(
2994
+ disjunction_ids.into_iter().collect::<Vec<_>>(),
2995
+ vec![
2996
+ crate::entity_identity::EntityIdentity::single("entity-a"),
2997
+ crate::entity_identity::EntityIdentity::single("entity-b"),
2998
+ ]
2999
+ );
3000
+ assert!(contradiction_ids.is_empty());
3001
+ }
3002
+
3003
+ #[test]
3004
+ fn primary_key_filters_ignore_non_key_and_negated_predicates() {
3005
+ let spec = entity_insert_spec_with_primary_key();
3006
+ let filters = vec![
3007
+ eq_filter("body", "hello"),
3008
+ Expr::InList(InList::new(
3009
+ Box::new(column("id")),
3010
+ vec![string_literal("entity-a")],
3011
+ true,
3012
+ )),
3013
+ ];
3014
+
3015
+ assert!(super::entity_ids_from_primary_key_filters(&spec, &filters)
3016
+ .expect("ignored filters should analyze")
3017
+ .unwrap_or_default()
3018
+ .is_empty());
3019
+ }
3020
+
2501
3021
  #[test]
2502
3022
  fn decodes_by_version_entity_insert_into_lix_state_write_row() {
2503
3023
  let spec = entity_insert_spec();
@@ -2515,22 +3035,17 @@ mod tests {
2515
3035
  Some(&crate::entity_identity::EntityIdentity::single("entity-1"))
2516
3036
  );
2517
3037
  assert_eq!(rows[0].schema_key, "project_message");
2518
- assert_eq!(rows[0].schema_version.as_str(), "1");
2519
3038
  assert_eq!(rows[0].version_id, "version-a");
2520
3039
  assert_eq!(
2521
3040
  rows[0].metadata.as_ref(),
2522
- Some(&json!({"source": "entity"}))
3041
+ Some(&TransactionJson::from_value_for_test(
3042
+ json!({"source": "entity"})
3043
+ ))
2523
3044
  );
2524
3045
  assert!(!rows[0].global);
2525
3046
  assert_eq!(
2526
- serde_json::from_str::<serde_json::Value>(
2527
- rows[0]
2528
- .snapshot_content
2529
- .as_deref()
2530
- .expect("snapshot_content")
2531
- )
2532
- .expect("snapshot_content JSON"),
2533
- json!({
3047
+ rows[0].snapshot.as_ref().expect("snapshot_content"),
3048
+ &json!({
2534
3049
  "body": "hello",
2535
3050
  "count": 7,
2536
3051
  "enabled": true,
@@ -2554,14 +3069,8 @@ mod tests {
2554
3069
  assert_eq!(rows.len(), 1);
2555
3070
  assert_eq!(rows[0].entity_id, None);
2556
3071
  assert_eq!(
2557
- serde_json::from_str::<serde_json::Value>(
2558
- rows[0]
2559
- .snapshot_content
2560
- .as_deref()
2561
- .expect("snapshot_content")
2562
- )
2563
- .expect("snapshot_content JSON"),
2564
- json!({
3072
+ rows[0].snapshot.as_ref().expect("snapshot_content"),
3073
+ &json!({
2565
3074
  "body": "hello",
2566
3075
  "id": "message-1"
2567
3076
  })
@@ -2675,17 +3184,19 @@ mod tests {
2675
3184
  assert_eq!(count, 1);
2676
3185
  assert_eq!(
2677
3186
  write_context.writes.as_slice(),
2678
- &[StageWrite::Rows { mode: StageWriteMode::Insert, rows: vec![StageRow {
3187
+ &[TransactionWrite::Rows {
3188
+ mode: TransactionWriteMode::Insert,
3189
+ rows: vec![TransactionWriteRow {
2679
3190
  entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
2680
3191
  schema_key: "project_message".to_string(),
2681
3192
  file_id: None,
2682
- snapshot_content: Some(
2683
- "{\"body\":\"hello\",\"count\":7,\"enabled\":true,\"meta\":{\"x\":1},\"rating\":4.5}"
2684
- .to_string()
2685
- ),
2686
- metadata: Some(json!({"source": "entity"})),
3193
+ snapshot: Some(TransactionJson::from_value_for_test(
3194
+ json!({"body":"hello","count":7,"enabled":true,"meta":{"x":1},"rating":4.5})
3195
+ )),
3196
+ metadata: Some(TransactionJson::from_value_for_test(
3197
+ json!({"source": "entity"})
3198
+ )),
2687
3199
  origin: None,
2688
- schema_version: "1".to_string(),
2689
3200
  created_at: None,
2690
3201
  updated_at: None,
2691
3202
  global: false,