@lix-js/sdk 0.6.0-preview.2 → 0.6.0-preview.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/SKILL.md +46 -8
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +25 -1
  3. package/dist/engine-wasm/wasm/lix_engine.js +60 -2
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +5 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +10 -3
  9. package/dist/open-lix.js +39 -0
  10. package/dist-engine-src/src/binary_cas/types.rs +0 -6
  11. package/dist-engine-src/src/catalog/context.rs +412 -0
  12. package/dist-engine-src/src/catalog/mod.rs +10 -0
  13. package/dist-engine-src/src/catalog/schema.rs +4 -0
  14. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  15. package/dist-engine-src/src/cel/mod.rs +1 -1
  16. package/dist-engine-src/src/cel/provider.rs +1 -1
  17. package/dist-engine-src/src/commit_graph/context.rs +328 -1015
  18. package/dist-engine-src/src/commit_graph/mod.rs +2 -3
  19. package/dist-engine-src/src/commit_graph/types.rs +7 -43
  20. package/dist-engine-src/src/commit_graph/walker.rs +57 -81
  21. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  22. package/dist-engine-src/src/commit_store/context.rs +944 -0
  23. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  24. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  25. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  26. package/dist-engine-src/src/commit_store/types.rs +215 -0
  27. package/dist-engine-src/src/common/identity.rs +15 -5
  28. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  29. package/dist-engine-src/src/common/metadata.rs +17 -12
  30. package/dist-engine-src/src/common/mod.rs +5 -5
  31. package/dist-engine-src/src/domain.rs +324 -0
  32. package/dist-engine-src/src/engine.rs +29 -43
  33. package/dist-engine-src/src/entity_identity.rs +238 -118
  34. package/dist-engine-src/src/functions/context.rs +17 -52
  35. package/dist-engine-src/src/functions/deterministic.rs +1 -1
  36. package/dist-engine-src/src/functions/mod.rs +1 -1
  37. package/dist-engine-src/src/functions/provider.rs +4 -4
  38. package/dist-engine-src/src/functions/state.rs +39 -66
  39. package/dist-engine-src/src/functions/types.rs +1 -1
  40. package/dist-engine-src/src/init.rs +204 -151
  41. package/dist-engine-src/src/json_store/context.rs +354 -60
  42. package/dist-engine-src/src/json_store/encoded.rs +6 -6
  43. package/dist-engine-src/src/json_store/mod.rs +4 -1
  44. package/dist-engine-src/src/json_store/store.rs +884 -11
  45. package/dist-engine-src/src/json_store/types.rs +166 -1
  46. package/dist-engine-src/src/lib.rs +11 -10
  47. package/dist-engine-src/src/live_state/context.rs +608 -830
  48. package/dist-engine-src/src/live_state/mod.rs +3 -3
  49. package/dist-engine-src/src/live_state/overlay.rs +7 -7
  50. package/dist-engine-src/src/live_state/reader.rs +5 -5
  51. package/dist-engine-src/src/live_state/types.rs +19 -36
  52. package/dist-engine-src/src/live_state/visibility.rs +19 -14
  53. package/dist-engine-src/src/plugin/archive.rs +3 -6
  54. package/dist-engine-src/src/plugin/install.rs +0 -18
  55. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -1
  56. package/dist-engine-src/src/schema/annotations/defaults.rs +2 -7
  57. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -1
  58. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -1
  59. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -1
  60. package/dist-engine-src/src/schema/builtin/lix_change.json +11 -10
  61. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -1
  62. package/dist-engine-src/src/schema/builtin/lix_commit.json +8 -46
  63. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +29 -22
  64. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -1
  65. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -1
  66. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -1
  67. package/dist-engine-src/src/schema/builtin/lix_label.json +10 -3
  68. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  69. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +2 -8
  70. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -1
  71. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -1
  72. package/dist-engine-src/src/schema/builtin/mod.rs +10 -59
  73. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  74. package/dist-engine-src/src/schema/definition.json +47 -17
  75. package/dist-engine-src/src/schema/definition.rs +202 -96
  76. package/dist-engine-src/src/schema/key.rs +9 -77
  77. package/dist-engine-src/src/schema/mod.rs +4 -4
  78. package/dist-engine-src/src/schema/tests.rs +133 -92
  79. package/dist-engine-src/src/session/context.rs +86 -48
  80. package/dist-engine-src/src/session/create_version.rs +22 -14
  81. package/dist-engine-src/src/session/execute.rs +117 -23
  82. package/dist-engine-src/src/session/merge/apply.rs +4 -4
  83. package/dist-engine-src/src/session/merge/conflicts.rs +3 -2
  84. package/dist-engine-src/src/session/merge/stats.rs +1 -1
  85. package/dist-engine-src/src/session/merge/version.rs +35 -45
  86. package/dist-engine-src/src/session/mod.rs +9 -7
  87. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  88. package/dist-engine-src/src/session/switch_version.rs +17 -28
  89. package/dist-engine-src/src/session/transaction.rs +76 -0
  90. package/dist-engine-src/src/sql2/change_provider.rs +14 -20
  91. package/dist-engine-src/src/sql2/classify.rs +75 -48
  92. package/dist-engine-src/src/sql2/context.rs +22 -18
  93. package/dist-engine-src/src/sql2/directory_history_provider.rs +28 -20
  94. package/dist-engine-src/src/sql2/directory_provider.rs +131 -83
  95. package/dist-engine-src/src/sql2/entity_history_provider.rs +10 -14
  96. package/dist-engine-src/src/sql2/entity_provider.rs +680 -169
  97. package/dist-engine-src/src/sql2/error.rs +24 -5
  98. package/dist-engine-src/src/sql2/execute.rs +426 -272
  99. package/dist-engine-src/src/sql2/file_history_provider.rs +29 -21
  100. package/dist-engine-src/src/sql2/file_provider.rs +533 -108
  101. package/dist-engine-src/src/sql2/filesystem_planner.rs +58 -94
  102. package/dist-engine-src/src/sql2/filesystem_visibility.rs +37 -23
  103. package/dist-engine-src/src/sql2/history_projection.rs +3 -27
  104. package/dist-engine-src/src/sql2/history_provider.rs +11 -17
  105. package/dist-engine-src/src/sql2/history_route.rs +22 -8
  106. package/dist-engine-src/src/sql2/lix_state_provider.rs +178 -96
  107. package/dist-engine-src/src/sql2/mod.rs +8 -4
  108. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  109. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  110. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  111. package/dist-engine-src/src/sql2/public_bind/dml.rs +172 -0
  112. package/dist-engine-src/src/sql2/public_bind/mod.rs +26 -0
  113. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  114. package/dist-engine-src/src/sql2/read_only.rs +10 -12
  115. package/dist-engine-src/src/sql2/session.rs +7 -10
  116. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  117. package/dist-engine-src/src/sql2/udfs/mod.rs +8 -1
  118. package/dist-engine-src/src/sql2/udfs/public_call.rs +238 -0
  119. package/dist-engine-src/src/sql2/version_provider.rs +46 -31
  120. package/dist-engine-src/src/sql2/version_scope.rs +4 -4
  121. package/dist-engine-src/src/storage_bench.rs +1782 -325
  122. package/dist-engine-src/src/test_support.rs +183 -36
  123. package/dist-engine-src/src/tracked_state/by_file_index.rs +20 -24
  124. package/dist-engine-src/src/tracked_state/codec.rs +1519 -181
  125. package/dist-engine-src/src/tracked_state/context.rs +1155 -271
  126. package/dist-engine-src/src/tracked_state/diff.rs +249 -57
  127. package/dist-engine-src/src/tracked_state/materialization.rs +365 -103
  128. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  129. package/dist-engine-src/src/tracked_state/merge.rs +37 -19
  130. package/dist-engine-src/src/tracked_state/mod.rs +8 -7
  131. package/dist-engine-src/src/tracked_state/storage.rs +138 -6
  132. package/dist-engine-src/src/tracked_state/tree.rs +695 -252
  133. package/dist-engine-src/src/tracked_state/types.rs +176 -6
  134. package/dist-engine-src/src/transaction/commit.rs +695 -435
  135. package/dist-engine-src/src/transaction/context.rs +551 -310
  136. package/dist-engine-src/src/transaction/live_state_overlay.rs +9 -8
  137. package/dist-engine-src/src/transaction/mod.rs +2 -0
  138. package/dist-engine-src/src/transaction/normalization.rs +311 -447
  139. package/dist-engine-src/src/transaction/prep.rs +37 -0
  140. package/dist-engine-src/src/transaction/schema_resolver.rs +93 -71
  141. package/dist-engine-src/src/transaction/staging.rs +701 -406
  142. package/dist-engine-src/src/transaction/types.rs +231 -122
  143. package/dist-engine-src/src/transaction/validation.rs +2717 -1698
  144. package/dist-engine-src/src/untracked_state/codec.rs +40 -96
  145. package/dist-engine-src/src/untracked_state/context.rs +21 -5
  146. package/dist-engine-src/src/untracked_state/materialization.rs +10 -104
  147. package/dist-engine-src/src/untracked_state/mod.rs +3 -5
  148. package/dist-engine-src/src/untracked_state/storage.rs +105 -57
  149. package/dist-engine-src/src/untracked_state/types.rs +63 -13
  150. package/dist-engine-src/src/version/context.rs +1 -13
  151. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  152. package/dist-engine-src/src/version/mod.rs +3 -2
  153. package/dist-engine-src/src/version/refs.rs +12 -103
  154. package/dist-engine-src/src/version/stage_rows.rs +15 -19
  155. package/package.json +1 -1
  156. package/dist-engine-src/src/changelog/codec.rs +0 -321
  157. package/dist-engine-src/src/changelog/context.rs +0 -92
  158. package/dist-engine-src/src/changelog/materialization.rs +0 -121
  159. package/dist-engine-src/src/changelog/mod.rs +0 -13
  160. package/dist-engine-src/src/changelog/reader.rs +0 -20
  161. package/dist-engine-src/src/changelog/storage.rs +0 -220
  162. package/dist-engine-src/src/changelog/types.rs +0 -38
  163. package/dist-engine-src/src/schema/builtin/lix_change_set.json +0 -18
  164. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +0 -75
  165. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +0 -63
  166. package/dist-engine-src/src/schema_registry.rs +0 -294
  167. package/dist-engine-src/src/sql2/commit_derived_provider.rs +0 -591
  168. package/dist-engine-src/src/tracked_state/rebuild.rs +0 -771
  169. package/dist-engine-src/src/tracked_state/tree_types.rs +0 -176
@@ -14,7 +14,8 @@ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, Scalar
14
14
  use datafusion::datasource::TableType;
15
15
  use datafusion::execution::TaskContext;
16
16
  use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
17
+ use datafusion::logical_expr::expr::InList;
18
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
18
19
  use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
20
  use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
21
  use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
@@ -26,8 +27,9 @@ use futures_util::{stream, TryStreamExt};
26
27
  use serde::Deserialize;
27
28
 
28
29
  use crate::binary_cas::{BlobDataReader, BlobHash};
30
+ use crate::entity_identity::EntityIdentity;
29
31
  use crate::functions::FunctionProviderHandle;
30
- use crate::live_state::LiveStateRow;
32
+ use crate::live_state::MaterializedLiveStateRow;
31
33
  use crate::live_state::{
32
34
  LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
33
35
  };
@@ -35,6 +37,7 @@ use crate::sql2::dml::{InsertExec, InsertSink};
35
37
  use crate::sql2::filesystem_predicates::{
36
38
  canonicalize_filesystem_path_filters, FilesystemPathKind,
37
39
  };
40
+ use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
38
41
  use crate::sql2::version_scope::{
39
42
  explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
40
43
  resolve_write_version_scope, VersionBinding,
@@ -45,9 +48,9 @@ use crate::sql2::write_normalization::{
45
48
  scalar_is_binary_or_null, InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues,
46
49
  UpdateCell,
47
50
  };
48
- use crate::transaction::types::StageRow;
51
+ use crate::transaction::types::{TransactionJson, TransactionWriteRow};
49
52
  use crate::version::VersionRefReader;
50
- use crate::{parse_row_metadata, serialize_row_metadata, LixError, RowMetadata};
53
+ use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
51
54
 
52
55
  const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
53
56
  const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
@@ -65,8 +68,8 @@ use crate::sql2::{
65
68
  SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
66
69
  };
67
70
  use crate::transaction::types::{
68
- LogicalPrimaryKey, StageFileData, StageRowOrigin, StageWrite, StageWriteMode,
69
- StageWriteOperation,
71
+ LogicalPrimaryKey, TransactionFileData, TransactionWrite, TransactionWriteMode,
72
+ TransactionWriteOperation, TransactionWriteOrigin,
70
73
  };
71
74
 
72
75
  pub(crate) async fn register_lix_file_providers(
@@ -226,9 +229,19 @@ impl TableProvider for LixFileProvider {
226
229
  &self,
227
230
  filters: &[&Expr],
228
231
  ) -> Result<Vec<TableProviderFilterPushDown>> {
232
+ let analyzer = LixFileIdFilterAnalyzer;
229
233
  Ok(filters
230
234
  .iter()
231
- .map(|_| TableProviderFilterPushDown::Exact)
235
+ .map(|filter| {
236
+ if ExactStringColumnFilterAnalyzer::new("lixcol_version_id").supports(filter)
237
+ || analyzer.supports(filter)
238
+ || contains_column(filter, "path")
239
+ {
240
+ TableProviderFilterPushDown::Exact
241
+ } else {
242
+ TableProviderFilterPushDown::Unsupported
243
+ }
244
+ })
232
245
  .collect())
233
246
  }
234
247
 
@@ -241,8 +254,11 @@ impl TableProvider for LixFileProvider {
241
254
  ) -> Result<Arc<dyn ExecutionPlan>> {
242
255
  let projected_schema = projected_schema(&self.schema, projection)?;
243
256
  let scan_limit = if filters.is_empty() { limit } else { None };
244
- let mut request =
245
- lix_file_scan_request(self.version_binding.active_version_id(), scan_limit);
257
+ let mut request = lix_file_scan_request(
258
+ self.version_binding.active_version_id(),
259
+ Some(projected_schema.as_ref()),
260
+ scan_limit,
261
+ );
246
262
  if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
247
263
  {
248
264
  request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
@@ -261,7 +277,9 @@ impl TableProvider for LixFileProvider {
261
277
  .await
262
278
  .map_err(lix_error_to_datafusion_error)?;
263
279
  let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::File)?;
280
+ let target_file_ids = file_id_constraint_from_filters(&filters)?;
264
281
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
282
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
265
283
  let physical_filters = filters
266
284
  .iter()
267
285
  .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
@@ -273,6 +291,7 @@ impl TableProvider for LixFileProvider {
273
291
  projected_schema,
274
292
  projection.cloned(),
275
293
  request,
294
+ target_file_ids,
276
295
  physical_filters,
277
296
  limit,
278
297
  )))
@@ -314,11 +333,14 @@ impl TableProvider for LixFileProvider {
314
333
 
315
334
  let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
316
335
  let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
336
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
317
337
  let physical_filters = filters
318
338
  .iter()
319
339
  .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
320
340
  .collect::<Result<Vec<_>>>()?;
321
- let mut request = lix_file_scan_request(self.version_binding.active_version_id(), None);
341
+ let target_file_ids = file_id_constraint_from_filters(&filters)?;
342
+ let mut request =
343
+ lix_file_scan_request(self.version_binding.active_version_id(), None, None);
322
344
  if matches!(self.version_binding, VersionBinding::Explicit) {
323
345
  request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
324
346
  if request.filter.version_ids.is_empty() {
@@ -335,6 +357,7 @@ impl TableProvider for LixFileProvider {
335
357
  Arc::clone(&self.schema),
336
358
  self.version_binding.clone(),
337
359
  request,
360
+ target_file_ids,
338
361
  physical_filters,
339
362
  )))
340
363
  }
@@ -360,11 +383,13 @@ impl TableProvider for LixFileProvider {
360
383
  })
361
384
  .collect::<Result<Vec<_>>>()?;
362
385
  let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
386
+ let target_file_ids = file_id_constraint_from_filters(&filters)?;
387
+ validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
363
388
  let physical_filters = filters
364
389
  .iter()
365
390
  .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
366
391
  .collect::<Result<Vec<_>>>()?;
367
- let request = lix_file_scan_request(self.version_binding.active_version_id(), None);
392
+ let request = lix_file_scan_request(self.version_binding.active_version_id(), None, None);
368
393
 
369
394
  Ok(Arc::new(LixFileUpdateExec::new(
370
395
  Arc::clone(&self.blob_reader),
@@ -373,6 +398,7 @@ impl TableProvider for LixFileProvider {
373
398
  self.version_binding.clone(),
374
399
  self.functions.clone(),
375
400
  request,
401
+ target_file_ids,
376
402
  physical_assignments,
377
403
  physical_filters,
378
404
  )))
@@ -473,13 +499,13 @@ impl InsertSink for LixFileInsertSink {
473
499
 
474
500
  if !staged.state_rows.is_empty() || !staged.file_data_writes.is_empty() {
475
501
  let intent = if staged.file_data_writes.is_empty() {
476
- StageWrite::Rows {
477
- mode: StageWriteMode::Insert,
502
+ TransactionWrite::Rows {
503
+ mode: TransactionWriteMode::Insert,
478
504
  rows: staged.state_rows,
479
505
  }
480
506
  } else {
481
- StageWrite::RowsWithFileData {
482
- mode: StageWriteMode::Insert,
507
+ TransactionWrite::RowsWithFileData {
508
+ mode: TransactionWriteMode::Insert,
483
509
  rows: staged.state_rows,
484
510
  file_data: staged.file_data_writes,
485
511
  count: staged.count,
@@ -509,6 +535,7 @@ struct LixFileDeleteExec {
509
535
  table_schema: SchemaRef,
510
536
  version_binding: VersionBinding,
511
537
  request: LiveStateScanRequest,
538
+ target_file_ids: FileIdConstraint,
512
539
  filters: Vec<Arc<dyn PhysicalExpr>>,
513
540
  result_schema: SchemaRef,
514
541
  properties: Arc<PlanProperties>,
@@ -527,6 +554,7 @@ impl LixFileDeleteExec {
527
554
  table_schema: SchemaRef,
528
555
  version_binding: VersionBinding,
529
556
  request: LiveStateScanRequest,
557
+ target_file_ids: FileIdConstraint,
530
558
  filters: Vec<Arc<dyn PhysicalExpr>>,
531
559
  ) -> Self {
532
560
  let result_schema = dml_count_schema();
@@ -542,6 +570,7 @@ impl LixFileDeleteExec {
542
570
  table_schema,
543
571
  version_binding,
544
572
  request,
573
+ target_file_ids,
545
574
  filters,
546
575
  result_schema,
547
576
  properties: Arc::new(properties),
@@ -605,15 +634,19 @@ impl ExecutionPlan for LixFileDeleteExec {
605
634
  let table_schema = Arc::clone(&self.table_schema);
606
635
  let version_binding = self.version_binding.clone();
607
636
  let request = self.request.clone();
637
+ let target_file_ids = self.target_file_ids.clone();
608
638
  let filters = self.filters.clone();
609
639
  let result_schema = Arc::clone(&self.result_schema);
610
640
  let stream_schema = Arc::clone(&result_schema);
611
641
 
612
642
  let stream = stream::once(async move {
613
- let rows = write_ctx
614
- .scan_live_state(&request)
615
- .await
616
- .map_err(lix_error_to_datafusion_error)?;
643
+ let rows = scan_lix_file_live_rows(
644
+ Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
645
+ &request,
646
+ &target_file_ids,
647
+ )
648
+ .await
649
+ .map_err(lix_error_to_datafusion_error)?;
617
650
  let blob_ref_file_ids =
618
651
  blob_ref_file_ids_from_live_rows(&rows).map_err(lix_error_to_datafusion_error)?;
619
652
  let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
@@ -629,8 +662,8 @@ impl ExecutionPlan for LixFileDeleteExec {
629
662
 
630
663
  if count > 0 {
631
664
  write_ctx
632
- .stage_write(StageWrite::Rows {
633
- mode: StageWriteMode::Replace,
665
+ .stage_write(TransactionWrite::Rows {
666
+ mode: TransactionWriteMode::Replace,
634
667
  rows: staged.state_rows,
635
668
  })
636
669
  .await
@@ -658,6 +691,7 @@ struct LixFileUpdateExec {
658
691
  version_binding: VersionBinding,
659
692
  functions: FunctionProviderHandle,
660
693
  request: LiveStateScanRequest,
694
+ target_file_ids: FileIdConstraint,
661
695
  assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
662
696
  filters: Vec<Arc<dyn PhysicalExpr>>,
663
697
  result_schema: SchemaRef,
@@ -678,6 +712,7 @@ impl LixFileUpdateExec {
678
712
  version_binding: VersionBinding,
679
713
  functions: FunctionProviderHandle,
680
714
  request: LiveStateScanRequest,
715
+ target_file_ids: FileIdConstraint,
681
716
  assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
682
717
  filters: Vec<Arc<dyn PhysicalExpr>>,
683
718
  ) -> Self {
@@ -695,6 +730,7 @@ impl LixFileUpdateExec {
695
730
  version_binding,
696
731
  functions,
697
732
  request,
733
+ target_file_ids,
698
734
  assignments,
699
735
  filters,
700
736
  result_schema,
@@ -765,16 +801,20 @@ impl ExecutionPlan for LixFileUpdateExec {
765
801
  let version_binding = self.version_binding.clone();
766
802
  let functions = self.functions.clone();
767
803
  let request = self.request.clone();
804
+ let target_file_ids = self.target_file_ids.clone();
768
805
  let assignments = self.assignments.clone();
769
806
  let filters = self.filters.clone();
770
807
  let result_schema = Arc::clone(&self.result_schema);
771
808
  let stream_schema = Arc::clone(&result_schema);
772
809
 
773
810
  let stream = stream::once(async move {
774
- let rows = write_ctx
775
- .scan_live_state(&request)
776
- .await
777
- .map_err(lix_error_to_datafusion_error)?;
811
+ let rows = scan_lix_file_live_rows(
812
+ Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
813
+ &request,
814
+ &target_file_ids,
815
+ )
816
+ .await
817
+ .map_err(lix_error_to_datafusion_error)?;
778
818
  let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
779
819
  .await
780
820
  .map_err(lix_error_to_datafusion_error)?;
@@ -804,13 +844,13 @@ impl ExecutionPlan for LixFileUpdateExec {
804
844
 
805
845
  if count > 0 {
806
846
  let intent = if staged.file_data_writes.is_empty() {
807
- StageWrite::Rows {
808
- mode: StageWriteMode::Replace,
847
+ TransactionWrite::Rows {
848
+ mode: TransactionWriteMode::Replace,
809
849
  rows: staged.state_rows,
810
850
  }
811
851
  } else {
812
- StageWrite::RowsWithFileData {
813
- mode: StageWriteMode::Replace,
852
+ TransactionWrite::RowsWithFileData {
853
+ mode: TransactionWriteMode::Replace,
814
854
  rows: staged.state_rows,
815
855
  file_data: staged.file_data_writes,
816
856
  count,
@@ -842,6 +882,7 @@ struct LixFileScanExec {
842
882
  output_schema: SchemaRef,
843
883
  projection: Option<Vec<usize>>,
844
884
  request: LiveStateScanRequest,
885
+ target_file_ids: FileIdConstraint,
845
886
  filters: Vec<Arc<dyn PhysicalExpr>>,
846
887
  limit: Option<usize>,
847
888
  properties: Arc<PlanProperties>,
@@ -861,6 +902,7 @@ impl LixFileScanExec {
861
902
  output_schema: SchemaRef,
862
903
  projection: Option<Vec<usize>>,
863
904
  request: LiveStateScanRequest,
905
+ target_file_ids: FileIdConstraint,
864
906
  filters: Vec<Arc<dyn PhysicalExpr>>,
865
907
  limit: Option<usize>,
866
908
  ) -> Self {
@@ -877,6 +919,7 @@ impl LixFileScanExec {
877
919
  output_schema,
878
920
  projection,
879
921
  request,
922
+ target_file_ids,
880
923
  filters,
881
924
  limit,
882
925
  properties: Arc::new(properties),
@@ -938,15 +981,18 @@ impl ExecutionPlan for LixFileScanExec {
938
981
  let live_state = Arc::clone(&self.live_state);
939
982
  let blob_reader = Arc::clone(&self.blob_reader);
940
983
  let request = self.request.clone();
984
+ let target_file_ids = self.target_file_ids.clone();
941
985
  let filters = self.filters.clone();
942
986
  let limit = self.limit;
943
987
  let output_schema = Arc::clone(&self.output_schema);
944
988
  let batch_schema = Arc::clone(&self.batch_schema);
945
989
  let projection = self.projection.clone();
946
990
  let fut = async move {
947
- let rows = live_state.scan_rows(&request).await.map_err(|error| {
948
- DataFusionError::Execution(format!("sql2 lix_file scan failed: {error}"))
949
- })?;
991
+ let rows = scan_lix_file_live_rows(live_state, &request, &target_file_ids)
992
+ .await
993
+ .map_err(|error| {
994
+ DataFusionError::Execution(format!("sql2 lix_file scan failed: {error}"))
995
+ })?;
950
996
  let batch = lix_file_record_batch(&batch_schema, &blob_reader, rows)
951
997
  .await
952
998
  .map_err(|error| {
@@ -976,7 +1022,7 @@ struct FileDescriptorRecord {
976
1022
  directory_id: Option<String>,
977
1023
  name: String,
978
1024
  hidden: bool,
979
- live: LiveStateRow,
1025
+ live: MaterializedLiveStateRow,
980
1026
  }
981
1027
 
982
1028
  #[derive(Debug, Clone)]
@@ -1015,8 +1061,8 @@ struct DirectoryDescriptorSnapshot {
1015
1061
 
1016
1062
  #[derive(Debug, Default)]
1017
1063
  struct LixFileStagedBatch {
1018
- state_rows: Vec<StageRow>,
1019
- file_data_writes: Vec<StageFileData>,
1064
+ state_rows: Vec<TransactionWriteRow>,
1065
+ file_data_writes: Vec<TransactionFileData>,
1020
1066
  count: u64,
1021
1067
  }
1022
1068
 
@@ -1043,7 +1089,7 @@ impl LixFileStagedBatch {
1043
1089
  fn lix_file_write_rows_from_batch(
1044
1090
  batch: &RecordBatch,
1045
1091
  version_binding: Option<&str>,
1046
- ) -> Result<Vec<StageRow>> {
1092
+ ) -> Result<Vec<TransactionWriteRow>> {
1047
1093
  Ok(lix_file_insert_stage_from_batch(batch, version_binding)?.state_rows)
1048
1094
  }
1049
1095
 
@@ -1066,7 +1112,7 @@ fn lix_file_delete_stage_from_batch(
1066
1112
  }
1067
1113
 
1068
1114
  fn blob_ref_file_ids_from_live_rows(
1069
- rows: &[LiveStateRow],
1115
+ rows: &[MaterializedLiveStateRow],
1070
1116
  ) -> std::result::Result<BTreeSet<String>, LixError> {
1071
1117
  let mut file_ids = BTreeSet::new();
1072
1118
  for row in rows {
@@ -1462,7 +1508,7 @@ fn stage_lix_file_data_write(
1462
1508
  file_id: String,
1463
1509
  data: Vec<u8>,
1464
1510
  context: FilesystemRowContext,
1465
- origin: Option<StageRowOrigin>,
1511
+ origin: Option<TransactionWriteOrigin>,
1466
1512
  ) -> Result<()> {
1467
1513
  let mut row = blob_ref_row(BlobRefRowInput {
1468
1514
  file_id: file_id.clone(),
@@ -1476,7 +1522,7 @@ fn stage_lix_file_data_write(
1476
1522
  .map_err(lix_error_to_datafusion_error)?;
1477
1523
  row.origin = origin;
1478
1524
  staged.state_rows.push(row);
1479
- staged.file_data_writes.push(StageFileData {
1525
+ staged.file_data_writes.push(TransactionFileData {
1480
1526
  file_id,
1481
1527
  version_id: context.version_id,
1482
1528
  untracked: context.untracked,
@@ -1485,7 +1531,11 @@ fn stage_lix_file_data_write(
1485
1531
  Ok(())
1486
1532
  }
1487
1533
 
1488
- fn attach_lix_file_insert_origin(rows: &mut [StageRow], surface_name: &str, file_id: &str) {
1534
+ fn attach_lix_file_insert_origin(
1535
+ rows: &mut [TransactionWriteRow],
1536
+ surface_name: &str,
1537
+ file_id: &str,
1538
+ ) {
1489
1539
  let origin = lix_file_insert_origin(surface_name, file_id);
1490
1540
  for row in rows {
1491
1541
  if row.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY || row.schema_key == BLOB_REF_SCHEMA_KEY {
@@ -1494,10 +1544,10 @@ fn attach_lix_file_insert_origin(rows: &mut [StageRow], surface_name: &str, file
1494
1544
  }
1495
1545
  }
1496
1546
 
1497
- fn lix_file_insert_origin(surface_name: &str, file_id: &str) -> StageRowOrigin {
1498
- StageRowOrigin {
1547
+ fn lix_file_insert_origin(surface_name: &str, file_id: &str) -> TransactionWriteOrigin {
1548
+ TransactionWriteOrigin {
1499
1549
  surface: surface_name.to_string(),
1500
- operation: StageWriteOperation::Insert,
1550
+ operation: TransactionWriteOperation::Insert,
1501
1551
  primary_key: Some(LogicalPrimaryKey {
1502
1552
  columns: vec!["id".to_string()],
1503
1553
  values: vec![file_id.to_string()],
@@ -1599,7 +1649,7 @@ async fn file_path_resolvers_from_live_state(
1599
1649
  async fn lix_file_record_batch(
1600
1650
  schema: &SchemaRef,
1601
1651
  blob_reader: &Arc<dyn BlobDataReader>,
1602
- rows: Vec<LiveStateRow>,
1652
+ rows: Vec<MaterializedLiveStateRow>,
1603
1653
  ) -> Result<RecordBatch, LixError> {
1604
1654
  let projected_columns = schema
1605
1655
  .fields()
@@ -1688,7 +1738,6 @@ async fn lix_file_record_batch(
1688
1738
  let mut entity_ids = Vec::new();
1689
1739
  let mut schema_keys = Vec::new();
1690
1740
  let mut file_ids = Vec::new();
1691
- let mut schema_versions = Vec::new();
1692
1741
  let mut globals = Vec::new();
1693
1742
  let mut change_ids = Vec::new();
1694
1743
  let mut created_ats = Vec::new();
@@ -1734,10 +1783,9 @@ async fn lix_file_record_batch(
1734
1783
  names.push(Some(file.name));
1735
1784
  hiddens.push(Some(file.hidden));
1736
1785
  data_values.push(data);
1737
- entity_ids.push(Some(file.live.entity_id.as_string()?));
1786
+ entity_ids.push(Some(file.live.entity_id.as_json_array_text()?));
1738
1787
  schema_keys.push(Some(file.live.schema_key));
1739
1788
  file_ids.push(file.live.file_id);
1740
- schema_versions.push(file.live.schema_version);
1741
1789
  globals.push(Some(file.live.global));
1742
1790
  change_ids.push(file.live.change_id);
1743
1791
  created_ats.push(file.live.created_at);
@@ -1765,7 +1813,6 @@ async fn lix_file_record_batch(
1765
1813
  "lixcol_entity_id" => Arc::new(StringArray::from(entity_ids.clone())),
1766
1814
  "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1767
1815
  "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1768
- "lixcol_schema_version" => Arc::new(StringArray::from(schema_versions.clone())),
1769
1816
  "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1770
1817
  "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1771
1818
  "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
@@ -1896,6 +1943,7 @@ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) ->
1896
1943
 
1897
1944
  fn lix_file_scan_request(
1898
1945
  version_binding: Option<&str>,
1946
+ projected_schema: Option<&Schema>,
1899
1947
  limit: Option<usize>,
1900
1948
  ) -> LiveStateScanRequest {
1901
1949
  LiveStateScanRequest {
@@ -1910,11 +1958,261 @@ fn lix_file_scan_request(
1910
1958
  .unwrap_or_default(),
1911
1959
  ..LiveStateFilter::default()
1912
1960
  },
1913
- projection: LiveStateProjection::default(),
1961
+ projection: lix_file_live_state_projection(projected_schema),
1914
1962
  limit,
1915
1963
  }
1916
1964
  }
1917
1965
 
1966
+ fn lix_file_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1967
+ let Some(schema) = projected_schema else {
1968
+ return LiveStateProjection::default();
1969
+ };
1970
+ let mut columns = Vec::new();
1971
+ let needs_snapshot = schema.fields().iter().any(|field| {
1972
+ matches!(
1973
+ field.name().as_str(),
1974
+ "path" | "directory_id" | "name" | "hidden" | "data"
1975
+ )
1976
+ });
1977
+ if needs_snapshot {
1978
+ columns.push("snapshot_content".to_string());
1979
+ }
1980
+ if schema
1981
+ .fields()
1982
+ .iter()
1983
+ .any(|field| field.name() == "lixcol_metadata")
1984
+ {
1985
+ columns.push("metadata".to_string());
1986
+ }
1987
+ LiveStateProjection { columns }
1988
+ }
1989
+
1990
+ async fn scan_lix_file_live_rows(
1991
+ live_state: Arc<dyn LiveStateReader>,
1992
+ request: &LiveStateScanRequest,
1993
+ target_file_ids: &FileIdConstraint,
1994
+ ) -> std::result::Result<Vec<MaterializedLiveStateRow>, LixError> {
1995
+ let target_file_ids = match target_file_ids {
1996
+ FileIdConstraint::All => return live_state.scan_rows(request).await,
1997
+ FileIdConstraint::None => return Ok(Vec::new()),
1998
+ FileIdConstraint::Ids(target_file_ids) => target_file_ids,
1999
+ };
2000
+
2001
+ let mut file_request = request.clone();
2002
+ file_request.filter.schema_keys = vec![
2003
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2004
+ BLOB_REF_SCHEMA_KEY.to_string(),
2005
+ ];
2006
+ file_request.filter.entity_ids = target_file_ids
2007
+ .iter()
2008
+ .map(|file_id| EntityIdentity::single(file_id.clone()))
2009
+ .collect();
2010
+
2011
+ let mut rows = live_state.scan_rows(&file_request).await?;
2012
+
2013
+ let mut directory_request = request.clone();
2014
+ directory_request.filter.schema_keys = vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()];
2015
+ directory_request.filter.entity_ids.clear();
2016
+ directory_request.limit = None;
2017
+ rows.extend(live_state.scan_rows(&directory_request).await?);
2018
+
2019
+ Ok(rows)
2020
+ }
2021
+
2022
+ #[derive(Debug, Clone, PartialEq, Eq)]
2023
+ enum FileIdConstraint {
2024
+ All,
2025
+ None,
2026
+ Ids(BTreeSet<String>),
2027
+ }
2028
+
2029
+ impl FileIdConstraint {
2030
+ fn from_ids(ids: Vec<String>) -> Self {
2031
+ let ids = ids.into_iter().collect::<BTreeSet<_>>();
2032
+ if ids.is_empty() {
2033
+ Self::None
2034
+ } else {
2035
+ Self::Ids(ids)
2036
+ }
2037
+ }
2038
+
2039
+ fn intersect(self, other: Self) -> Self {
2040
+ match (self, other) {
2041
+ (Self::None, _) | (_, Self::None) => Self::None,
2042
+ (Self::All, constraint) | (constraint, Self::All) => constraint,
2043
+ (Self::Ids(left), Self::Ids(right)) => {
2044
+ let ids = left.intersection(&right).cloned().collect::<BTreeSet<_>>();
2045
+ if ids.is_empty() {
2046
+ Self::None
2047
+ } else {
2048
+ Self::Ids(ids)
2049
+ }
2050
+ }
2051
+ }
2052
+ }
2053
+
2054
+ fn union(self, other: Self) -> Self {
2055
+ match (self, other) {
2056
+ (Self::All, _) | (_, Self::All) => Self::All,
2057
+ (Self::None, constraint) | (constraint, Self::None) => constraint,
2058
+ (Self::Ids(mut left), Self::Ids(right)) => {
2059
+ left.extend(right);
2060
+ Self::Ids(left)
2061
+ }
2062
+ }
2063
+ }
2064
+ }
2065
+
2066
+ fn file_id_constraint_from_filters(filters: &[Expr]) -> Result<FileIdConstraint> {
2067
+ let analyzer = LixFileIdFilterAnalyzer;
2068
+ let mut constraint = FileIdConstraint::All;
2069
+ for filter in filters {
2070
+ if let Some(filter_constraint) = analyzer.analyze(filter)? {
2071
+ constraint = constraint.intersect(filter_constraint);
2072
+ }
2073
+ }
2074
+ Ok(constraint)
2075
+ }
2076
+
2077
+ struct LixFileIdFilterAnalyzer;
2078
+
2079
+ impl LixFileIdFilterAnalyzer {
2080
+ fn supports(&self, expr: &Expr) -> bool {
2081
+ self.analyze(expr)
2082
+ .is_ok_and(|constraint| constraint.is_some())
2083
+ }
2084
+
2085
+ fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2086
+ ExactStringColumnFilterAnalyzer::new("id").analyze(expr)
2087
+ }
2088
+ }
2089
+
2090
+ struct ExactStringColumnFilterAnalyzer {
2091
+ column_name: &'static str,
2092
+ }
2093
+
2094
+ impl ExactStringColumnFilterAnalyzer {
2095
+ fn new(column_name: &'static str) -> Self {
2096
+ Self { column_name }
2097
+ }
2098
+
2099
+ fn supports(&self, expr: &Expr) -> bool {
2100
+ self.analyze(expr)
2101
+ .is_ok_and(|constraint| constraint.is_some())
2102
+ }
2103
+
2104
+ fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2105
+ match expr {
2106
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
2107
+ let Some(left) = self.analyze(&binary_expr.left)? else {
2108
+ return Ok(None);
2109
+ };
2110
+ let Some(right) = self.analyze(&binary_expr.right)? else {
2111
+ return Ok(None);
2112
+ };
2113
+ Ok(Some(left.intersect(right)))
2114
+ }
2115
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
2116
+ let Some(left) = self.analyze(&binary_expr.left)? else {
2117
+ return Ok(None);
2118
+ };
2119
+ let Some(right) = self.analyze(&binary_expr.right)? else {
2120
+ return Ok(None);
2121
+ };
2122
+ Ok(Some(left.union(right)))
2123
+ }
2124
+ Expr::BinaryExpr(binary_expr) => Ok(self
2125
+ .value_from_binary_filter(binary_expr)
2126
+ .map(|value| FileIdConstraint::Ids(BTreeSet::from([value])))),
2127
+ Expr::InList(in_list) => Ok(self
2128
+ .values_from_in_list_filter(in_list)
2129
+ .map(FileIdConstraint::from_ids)),
2130
+ _ => Ok(None),
2131
+ }
2132
+ }
2133
+
2134
+ fn value_from_binary_filter(&self, binary_expr: &BinaryExpr) -> Option<String> {
2135
+ if binary_expr.op != Operator::Eq {
2136
+ return None;
2137
+ }
2138
+ self.value_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
2139
+ .or_else(|| {
2140
+ self.value_from_column_literal_filter(&binary_expr.right, &binary_expr.left)
2141
+ })
2142
+ }
2143
+
2144
+ fn values_from_in_list_filter(&self, in_list: &InList) -> Option<Vec<String>> {
2145
+ if in_list.negated {
2146
+ return None;
2147
+ }
2148
+ let Expr::Column(column) = in_list.expr.as_ref() else {
2149
+ return None;
2150
+ };
2151
+ if column.name != self.column_name {
2152
+ return None;
2153
+ }
2154
+ let values = in_list
2155
+ .list
2156
+ .iter()
2157
+ .map(string_expr_literal)
2158
+ .collect::<Option<Vec<_>>>()?;
2159
+ Some(values)
2160
+ }
2161
+
2162
+ fn value_from_column_literal_filter(
2163
+ &self,
2164
+ column_expr: &Expr,
2165
+ literal_expr: &Expr,
2166
+ ) -> Option<String> {
2167
+ let Expr::Column(column) = column_expr else {
2168
+ return None;
2169
+ };
2170
+ if column.name != self.column_name {
2171
+ return None;
2172
+ }
2173
+ string_expr_literal(literal_expr)
2174
+ }
2175
+ }
2176
+
2177
+ fn string_expr_literal(expr: &Expr) -> Option<String> {
2178
+ let Expr::Literal(literal, _) = expr else {
2179
+ return None;
2180
+ };
2181
+ match literal {
2182
+ ScalarValue::Utf8(Some(value))
2183
+ | ScalarValue::Utf8View(Some(value))
2184
+ | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
2185
+ _ => None,
2186
+ }
2187
+ }
2188
+
2189
+ fn contains_column(expr: &Expr, column_name: &str) -> bool {
2190
+ match expr {
2191
+ Expr::Column(column) => column.name == column_name,
2192
+ Expr::BinaryExpr(binary_expr) => {
2193
+ contains_column(&binary_expr.left, column_name)
2194
+ || contains_column(&binary_expr.right, column_name)
2195
+ }
2196
+ Expr::InList(in_list) => {
2197
+ contains_column(&in_list.expr, column_name)
2198
+ || in_list
2199
+ .list
2200
+ .iter()
2201
+ .any(|expr| contains_column(expr, column_name))
2202
+ }
2203
+ Expr::Between(between) => {
2204
+ contains_column(&between.expr, column_name)
2205
+ || contains_column(&between.low, column_name)
2206
+ || contains_column(&between.high, column_name)
2207
+ }
2208
+ Expr::Not(expr) | Expr::IsNull(expr) | Expr::IsNotNull(expr) => {
2209
+ contains_column(expr, column_name)
2210
+ }
2211
+ Expr::Negative(expr) => contains_column(expr, column_name),
2212
+ _ => false,
2213
+ }
2214
+ }
2215
+
1918
2216
  fn validate_lix_file_update_assignments(
1919
2217
  schema: &SchemaRef,
1920
2218
  assignments: &[(String, Expr)],
@@ -2098,10 +2396,13 @@ fn update_optional_metadata_value(
2098
2396
  row_index: usize,
2099
2397
  column_name: &str,
2100
2398
  context: &str,
2101
- ) -> Result<Option<RowMetadata>> {
2399
+ ) -> Result<Option<TransactionJson>> {
2102
2400
  update_optional_string_value(batch, assignment_values, row_index, column_name)?
2103
2401
  .map(|value| {
2104
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
2402
+ let metadata = parse_row_metadata_value(&value, context)
2403
+ .map_err(super::error::lix_error_to_datafusion_error)?;
2404
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
2405
+ .map_err(super::error::lix_error_to_datafusion_error)
2105
2406
  })
2106
2407
  .transpose()
2107
2408
  }
@@ -2174,10 +2475,13 @@ fn optional_metadata_value(
2174
2475
  row_index: usize,
2175
2476
  column_name: &str,
2176
2477
  context: &str,
2177
- ) -> Result<Option<RowMetadata>> {
2478
+ ) -> Result<Option<TransactionJson>> {
2178
2479
  optional_string_value(batch, row_index, column_name)?
2179
2480
  .map(|value| {
2180
- parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
2481
+ let metadata = parse_row_metadata_value(&value, context)
2482
+ .map_err(super::error::lix_error_to_datafusion_error)?;
2483
+ TransactionJson::from_value(metadata, &format!("{context} metadata"))
2484
+ .map_err(super::error::lix_error_to_datafusion_error)
2181
2485
  })
2182
2486
  .transpose()
2183
2487
  }
@@ -2257,10 +2561,9 @@ fn lix_file_schema() -> SchemaRef {
2257
2561
  Field::new("name", DataType::Utf8, false),
2258
2562
  Field::new("hidden", DataType::Boolean, true),
2259
2563
  Field::new("data", DataType::Binary, true),
2260
- Field::new("lixcol_entity_id", DataType::Utf8, false),
2564
+ json_field("lixcol_entity_id", false),
2261
2565
  Field::new("lixcol_schema_key", DataType::Utf8, false),
2262
2566
  Field::new("lixcol_file_id", DataType::Utf8, true),
2263
- Field::new("lixcol_schema_version", DataType::Utf8, false),
2264
2567
  Field::new("lixcol_global", DataType::Boolean, true),
2265
2568
  Field::new("lixcol_change_id", DataType::Utf8, true),
2266
2569
  Field::new("lixcol_created_at", DataType::Utf8, true),
@@ -2298,19 +2601,24 @@ mod tests {
2298
2601
  use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, StringArray};
2299
2602
  use datafusion::arrow::datatypes::{DataType, Field, Schema};
2300
2603
  use datafusion::arrow::record_batch::RecordBatch;
2604
+ use datafusion::common::{Column, ScalarValue};
2301
2605
  use datafusion::execution::TaskContext;
2606
+ use datafusion::logical_expr::expr::InList;
2302
2607
  use datafusion::logical_expr::lit;
2303
- use serde_json::{json, Value as JsonValue};
2608
+ use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
2609
+ use serde_json::Value as JsonValue;
2304
2610
 
2305
2611
  use crate::binary_cas::BlobDataReader;
2306
2612
  use crate::functions::{
2307
2613
  FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2308
2614
  };
2309
- use crate::live_state::LiveStateRow;
2615
+ use crate::live_state::MaterializedLiveStateRow;
2310
2616
  use crate::live_state::{LiveStateReader, LiveStateRowRequest, LiveStateScanRequest};
2311
2617
  use crate::sql2::dml::InsertSink;
2312
2618
  use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2313
- use crate::transaction::types::{StageWrite, StageWriteMode, StageWriteOutcome};
2619
+ use crate::transaction::types::{
2620
+ TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
2621
+ };
2314
2622
  use crate::LixError;
2315
2623
 
2316
2624
  use super::{
@@ -2331,6 +2639,120 @@ mod tests {
2331
2639
  )
2332
2640
  }
2333
2641
 
2642
+ fn string_literal(value: &str) -> Expr {
2643
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
2644
+ }
2645
+
2646
+ fn column(name: &str) -> Expr {
2647
+ Expr::Column(Column::from_name(name))
2648
+ }
2649
+
2650
+ fn eq_filter(column_name: &str, value: &str) -> Expr {
2651
+ Expr::BinaryExpr(BinaryExpr::new(
2652
+ Box::new(column(column_name)),
2653
+ Operator::Eq,
2654
+ Box::new(string_literal(value)),
2655
+ ))
2656
+ }
2657
+
2658
+ #[test]
2659
+ fn file_id_filters_support_string_id_predicates() {
2660
+ let analyzer = super::LixFileIdFilterAnalyzer;
2661
+ let constraint = analyzer
2662
+ .analyze(&Expr::InList(InList::new(
2663
+ Box::new(column("id")),
2664
+ vec![string_literal("file-b"), string_literal("file-a")],
2665
+ false,
2666
+ )))
2667
+ .unwrap()
2668
+ .unwrap();
2669
+
2670
+ assert_eq!(
2671
+ constraint,
2672
+ super::FileIdConstraint::Ids(BTreeSet::from([
2673
+ "file-a".to_string(),
2674
+ "file-b".to_string()
2675
+ ]))
2676
+ );
2677
+ assert!(analyzer.supports(&eq_filter("id", "file-a")));
2678
+ assert!(analyzer.supports(&Expr::BinaryExpr(BinaryExpr::new(
2679
+ Box::new(string_literal("file-a")),
2680
+ Operator::Eq,
2681
+ Box::new(column("id")),
2682
+ ))));
2683
+ }
2684
+
2685
+ #[test]
2686
+ fn file_id_filters_intersect_and_union_boolean_predicates() {
2687
+ let analyzer = super::LixFileIdFilterAnalyzer;
2688
+ let left = Expr::InList(InList::new(
2689
+ Box::new(column("id")),
2690
+ vec![string_literal("file-a"), string_literal("file-b")],
2691
+ false,
2692
+ ));
2693
+ let right = Expr::InList(InList::new(
2694
+ Box::new(column("id")),
2695
+ vec![string_literal("file-b"), string_literal("file-c")],
2696
+ false,
2697
+ ));
2698
+
2699
+ let and_constraint = analyzer
2700
+ .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2701
+ Box::new(left.clone()),
2702
+ Operator::And,
2703
+ Box::new(right.clone()),
2704
+ )))
2705
+ .unwrap()
2706
+ .unwrap();
2707
+ assert_eq!(
2708
+ and_constraint,
2709
+ super::FileIdConstraint::Ids(BTreeSet::from(["file-b".to_string()]))
2710
+ );
2711
+
2712
+ let or_constraint = analyzer
2713
+ .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2714
+ Box::new(left),
2715
+ Operator::Or,
2716
+ Box::new(right),
2717
+ )))
2718
+ .unwrap()
2719
+ .unwrap();
2720
+ assert_eq!(
2721
+ or_constraint,
2722
+ super::FileIdConstraint::Ids(BTreeSet::from([
2723
+ "file-a".to_string(),
2724
+ "file-b".to_string(),
2725
+ "file-c".to_string()
2726
+ ]))
2727
+ );
2728
+ }
2729
+
2730
+ #[test]
2731
+ fn file_id_filters_detect_contradictions() {
2732
+ let filters = vec![Expr::BinaryExpr(BinaryExpr::new(
2733
+ Box::new(eq_filter("id", "file-a")),
2734
+ Operator::And,
2735
+ Box::new(eq_filter("id", "file-b")),
2736
+ ))];
2737
+
2738
+ assert_eq!(
2739
+ super::file_id_constraint_from_filters(&filters).unwrap(),
2740
+ super::FileIdConstraint::None
2741
+ );
2742
+ }
2743
+
2744
+ #[test]
2745
+ fn file_id_filters_ignore_non_id_and_negated_predicates() {
2746
+ let analyzer = super::LixFileIdFilterAnalyzer;
2747
+
2748
+ assert!(!analyzer.supports(&eq_filter("name", "readme.md")));
2749
+ assert!(!analyzer.supports(&Expr::InList(InList::new(
2750
+ Box::new(column("id")),
2751
+ vec![string_literal("file-a")],
2752
+ true,
2753
+ ))));
2754
+ }
2755
+
2334
2756
  fn lix_file_update_stage_from_batch_for_test(
2335
2757
  batch: &RecordBatch,
2336
2758
  version_binding: Option<&str>,
@@ -2361,8 +2783,8 @@ mod tests {
2361
2783
 
2362
2784
  #[derive(Default)]
2363
2785
  struct CapturingWriteContext {
2364
- rows: Vec<LiveStateRow>,
2365
- writes: Vec<StageWrite>,
2786
+ rows: Vec<MaterializedLiveStateRow>,
2787
+ writes: Vec<TransactionWrite>,
2366
2788
  }
2367
2789
 
2368
2790
  #[async_trait]
@@ -2371,7 +2793,10 @@ mod tests {
2371
2793
  &self,
2372
2794
  hashes: &[crate::binary_cas::BlobHash],
2373
2795
  ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2374
- Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
2796
+ Ok(crate::binary_cas::BlobBytesBatch::new(vec![
2797
+ None;
2798
+ hashes.len()
2799
+ ]))
2375
2800
  }
2376
2801
  }
2377
2802
 
@@ -2399,7 +2824,7 @@ mod tests {
2399
2824
  async fn scan_live_state(
2400
2825
  &mut self,
2401
2826
  _request: &LiveStateScanRequest,
2402
- ) -> Result<Vec<LiveStateRow>, LixError> {
2827
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2403
2828
  Ok(self.rows.clone())
2404
2829
  }
2405
2830
 
@@ -2413,15 +2838,18 @@ mod tests {
2413
2838
  Ok(Some(format!("commit-{version_id}")))
2414
2839
  }
2415
2840
 
2416
- async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
2841
+ async fn stage_write(
2842
+ &mut self,
2843
+ write: TransactionWrite,
2844
+ ) -> Result<TransactionWriteOutcome, LixError> {
2417
2845
  self.writes.push(write);
2418
- Ok(StageWriteOutcome { count: 0 })
2846
+ Ok(TransactionWriteOutcome { count: 0 })
2419
2847
  }
2420
2848
  }
2421
2849
 
2422
2850
  #[derive(Default)]
2423
2851
  struct RowsLiveStateReader {
2424
- rows: Vec<LiveStateRow>,
2852
+ rows: Vec<MaterializedLiveStateRow>,
2425
2853
  }
2426
2854
 
2427
2855
  #[async_trait]
@@ -2429,14 +2857,14 @@ mod tests {
2429
2857
  async fn scan_rows(
2430
2858
  &self,
2431
2859
  _request: &LiveStateScanRequest,
2432
- ) -> Result<Vec<LiveStateRow>, LixError> {
2860
+ ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2433
2861
  Ok(self.rows.clone())
2434
2862
  }
2435
2863
 
2436
2864
  async fn load_row(
2437
2865
  &self,
2438
2866
  _request: &LiveStateRowRequest,
2439
- ) -> Result<Option<LiveStateRow>, LixError> {
2867
+ ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
2440
2868
  Ok(None)
2441
2869
  }
2442
2870
  }
@@ -2445,15 +2873,14 @@ mod tests {
2445
2873
  entity_id: &str,
2446
2874
  version_id: &str,
2447
2875
  snapshot_content: &str,
2448
- ) -> LiveStateRow {
2449
- LiveStateRow {
2450
- entity_id: crate::entity_identity::EntityIdentity::from_string(entity_id)
2451
- .expect("entity id should decode"),
2876
+ ) -> MaterializedLiveStateRow {
2877
+ MaterializedLiveStateRow {
2878
+ entity_id: crate::entity_identity::EntityIdentity::single(entity_id),
2452
2879
  schema_key: super::DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
2453
2880
  file_id: None,
2454
2881
  snapshot_content: Some(snapshot_content.to_string()),
2455
2882
  metadata: None,
2456
- schema_version: "1".to_string(),
2883
+ deleted: false,
2457
2884
  version_id: version_id.to_string(),
2458
2885
  change_id: Some(format!("change-{entity_id}")),
2459
2886
  commit_id: Some(format!("commit-{entity_id}")),
@@ -2464,15 +2891,18 @@ mod tests {
2464
2891
  }
2465
2892
  }
2466
2893
 
2467
- fn live_file_row(entity_id: &str, version_id: &str, snapshot_content: &str) -> LiveStateRow {
2468
- LiveStateRow {
2469
- entity_id: crate::entity_identity::EntityIdentity::from_string(entity_id)
2470
- .expect("entity id should decode"),
2894
+ fn live_file_row(
2895
+ entity_id: &str,
2896
+ version_id: &str,
2897
+ snapshot_content: &str,
2898
+ ) -> MaterializedLiveStateRow {
2899
+ MaterializedLiveStateRow {
2900
+ entity_id: crate::entity_identity::EntityIdentity::single(entity_id),
2471
2901
  schema_key: super::FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2472
2902
  file_id: None,
2473
2903
  snapshot_content: Some(snapshot_content.to_string()),
2474
2904
  metadata: None,
2475
- schema_version: "1".to_string(),
2905
+ deleted: false,
2476
2906
  version_id: version_id.to_string(),
2477
2907
  change_id: Some(format!("change-{entity_id}")),
2478
2908
  commit_id: Some(format!("commit-{entity_id}")),
@@ -2653,11 +3083,13 @@ mod tests {
2653
3083
  );
2654
3084
  assert_eq!(rows[0].schema_key, "lix_file_descriptor");
2655
3085
  assert_eq!(rows[0].version_id, "version-b");
2656
- assert_eq!(rows[0].schema_version.as_str(), "1");
2657
- assert_eq!(rows[0].metadata.as_ref(), Some(&json!({"source": "file"})));
2658
- let snapshot: JsonValue =
2659
- serde_json::from_str(rows[0].snapshot_content.as_deref().unwrap())
2660
- .expect("descriptor snapshot JSON");
3086
+ assert_eq!(
3087
+ rows[0].metadata.as_ref(),
3088
+ Some(&TransactionJson::from_value_for_test(
3089
+ serde_json::json!({"source": "file"})
3090
+ ))
3091
+ );
3092
+ let snapshot = rows[0].snapshot.as_ref().expect("descriptor snapshot JSON");
2661
3093
  assert_eq!(snapshot["id"], "file-readme");
2662
3094
  assert_eq!(snapshot["directory_id"], "dir-docs");
2663
3095
  assert_eq!(snapshot["name"], "readme.md");
@@ -2743,9 +3175,7 @@ mod tests {
2743
3175
  .iter()
2744
3176
  .find(|row| row.schema_key == "lix_file_descriptor")
2745
3177
  .expect("file descriptor row should be staged");
2746
- let snapshot: JsonValue =
2747
- serde_json::from_str(descriptor.snapshot_content.as_deref().unwrap())
2748
- .expect("descriptor snapshot JSON");
3178
+ let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
2749
3179
  assert_eq!(snapshot["id"], "file-readme");
2750
3180
  assert_eq!(snapshot["directory_id"], "dir-docs");
2751
3181
  assert_eq!(snapshot["name"], "renamed.md");
@@ -2825,9 +3255,12 @@ mod tests {
2825
3255
  .iter()
2826
3256
  .all(|row| row.schema_key != "lix_directory_descriptor"));
2827
3257
 
2828
- let snapshot: JsonValue =
2829
- serde_json::from_str(staged.state_rows[0].snapshot_content.as_deref().unwrap())
2830
- .expect("descriptor snapshot JSON");
3258
+ let snapshot: JsonValue = staged.state_rows[0]
3259
+ .snapshot
3260
+ .as_ref()
3261
+ .unwrap()
3262
+ .value()
3263
+ .clone();
2831
3264
  assert_eq!(snapshot["directory_id"], "dir-docs");
2832
3265
  assert_eq!(snapshot["name"], "renamed.md");
2833
3266
  }
@@ -2882,9 +3315,7 @@ mod tests {
2882
3315
  .iter()
2883
3316
  .find(|row| row.schema_key == "lix_file_descriptor")
2884
3317
  .expect("file descriptor should be staged");
2885
- let snapshot: JsonValue =
2886
- serde_json::from_str(descriptor.snapshot_content.as_deref().unwrap())
2887
- .expect("descriptor snapshot JSON");
3318
+ let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
2888
3319
  assert_eq!(snapshot["directory_id"], "dir-generated-docs");
2889
3320
  }
2890
3321
 
@@ -3003,7 +3434,7 @@ mod tests {
3003
3434
  ))
3004
3435
  );
3005
3436
  assert_eq!(descriptor.file_id, None);
3006
- assert_eq!(descriptor.snapshot_content, None);
3437
+ assert_eq!(descriptor.snapshot, None);
3007
3438
 
3008
3439
  let blob_ref = staged
3009
3440
  .state_rows
@@ -3017,7 +3448,7 @@ mod tests {
3017
3448
  ))
3018
3449
  );
3019
3450
  assert_eq!(blob_ref.file_id.as_deref(), Some("file-readme"));
3020
- assert_eq!(blob_ref.snapshot_content, None);
3451
+ assert_eq!(blob_ref.snapshot, None);
3021
3452
  }
3022
3453
 
3023
3454
  #[test]
@@ -3035,7 +3466,7 @@ mod tests {
3035
3466
  "file-readme"
3036
3467
  ))
3037
3468
  );
3038
- assert_eq!(staged.state_rows[0].snapshot_content, None);
3469
+ assert_eq!(staged.state_rows[0].snapshot, None);
3039
3470
  }
3040
3471
 
3041
3472
  #[test]
@@ -3069,9 +3500,7 @@ mod tests {
3069
3500
  .iter()
3070
3501
  .find(|row| row.schema_key == "lix_file_descriptor")
3071
3502
  .expect("file descriptor row should be staged");
3072
- let snapshot: JsonValue =
3073
- serde_json::from_str(descriptor.snapshot_content.as_deref().unwrap())
3074
- .expect("descriptor snapshot JSON");
3503
+ let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3075
3504
  assert_eq!(snapshot["id"], "file-readme");
3076
3505
  assert_eq!(snapshot["directory_id"], "dir-guides");
3077
3506
  assert_eq!(snapshot["name"], "readme.md");
@@ -3105,9 +3534,7 @@ mod tests {
3105
3534
  .iter()
3106
3535
  .find(|row| row.schema_key == "lix_file_descriptor")
3107
3536
  .expect("file descriptor row should be staged");
3108
- let snapshot: JsonValue =
3109
- serde_json::from_str(descriptor.snapshot_content.as_deref().unwrap())
3110
- .expect("descriptor snapshot JSON");
3537
+ let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3111
3538
  assert_eq!(snapshot["directory_id"], "dir-generated-guides");
3112
3539
  }
3113
3540
 
@@ -3133,8 +3560,8 @@ mod tests {
3133
3560
  let writes = &write_context.writes;
3134
3561
  assert_eq!(writes.len(), 1);
3135
3562
  match &writes[0] {
3136
- StageWrite::Rows { mode, rows } => {
3137
- assert_eq!(*mode, StageWriteMode::Insert);
3563
+ TransactionWrite::Rows { mode, rows } => {
3564
+ assert_eq!(*mode, TransactionWriteMode::Insert);
3138
3565
  assert_eq!(rows.len(), 1);
3139
3566
  assert_eq!(
3140
3567
  rows[0].entity_id.as_ref(),
@@ -3170,14 +3597,14 @@ mod tests {
3170
3597
  let writes = &write_context.writes;
3171
3598
  assert_eq!(writes.len(), 1);
3172
3599
  match &writes[0] {
3173
- StageWrite::RowsWithFileData {
3600
+ TransactionWrite::RowsWithFileData {
3174
3601
  mode,
3175
3602
  rows,
3176
3603
  file_data,
3177
3604
  count,
3178
3605
  ..
3179
3606
  } => {
3180
- assert_eq!(*mode, StageWriteMode::Insert);
3607
+ assert_eq!(*mode, TransactionWriteMode::Insert);
3181
3608
  assert_eq!(*count, 1);
3182
3609
  assert_eq!(rows.len(), 2);
3183
3610
  assert!(rows
@@ -3230,7 +3657,7 @@ mod tests {
3230
3657
  let writes = &write_context.writes;
3231
3658
  assert_eq!(writes.len(), 1);
3232
3659
  match &writes[0] {
3233
- StageWrite::RowsWithFileData {
3660
+ TransactionWrite::RowsWithFileData {
3234
3661
  rows,
3235
3662
  file_data,
3236
3663
  count,
@@ -3243,9 +3670,7 @@ mod tests {
3243
3670
  .iter()
3244
3671
  .find(|row| row.schema_key == "lix_file_descriptor")
3245
3672
  .expect("file descriptor row should be staged");
3246
- let snapshot: JsonValue =
3247
- serde_json::from_str(descriptor.snapshot_content.as_deref().unwrap())
3248
- .expect("descriptor snapshot JSON");
3673
+ let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3249
3674
  assert_eq!(snapshot["directory_id"], "dir-guides");
3250
3675
  }
3251
3676
  other => panic!("expected insert with file data staged write, got {other:?}"),