@lix-js/sdk 0.6.0-preview.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +39 -201
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +20 -50
  16. package/SKILL.md +0 -506
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -821
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -26
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -303
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/kv.rs +0 -358
  31. package/dist-engine-src/src/backend/mod.rs +0 -12
  32. package/dist-engine-src/src/backend/testing.rs +0 -658
  33. package/dist-engine-src/src/backend/types.rs +0 -96
  34. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  35. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  36. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  37. package/dist-engine-src/src/binary_cas/kv.rs +0 -1063
  38. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  39. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  40. package/dist-engine-src/src/catalog/context.rs +0 -412
  41. package/dist-engine-src/src/catalog/mod.rs +0 -10
  42. package/dist-engine-src/src/catalog/schema.rs +0 -4
  43. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  44. package/dist-engine-src/src/cel/context.rs +0 -86
  45. package/dist-engine-src/src/cel/error.rs +0 -19
  46. package/dist-engine-src/src/cel/mod.rs +0 -8
  47. package/dist-engine-src/src/cel/provider.rs +0 -9
  48. package/dist-engine-src/src/cel/runtime.rs +0 -167
  49. package/dist-engine-src/src/cel/value.rs +0 -50
  50. package/dist-engine-src/src/commit_graph/context.rs +0 -901
  51. package/dist-engine-src/src/commit_graph/mod.rs +0 -11
  52. package/dist-engine-src/src/commit_graph/types.rs +0 -109
  53. package/dist-engine-src/src/commit_graph/walker.rs +0 -756
  54. package/dist-engine-src/src/commit_store/codec.rs +0 -887
  55. package/dist-engine-src/src/commit_store/context.rs +0 -944
  56. package/dist-engine-src/src/commit_store/materialization.rs +0 -84
  57. package/dist-engine-src/src/commit_store/mod.rs +0 -16
  58. package/dist-engine-src/src/commit_store/storage.rs +0 -600
  59. package/dist-engine-src/src/commit_store/types.rs +0 -215
  60. package/dist-engine-src/src/common/error.rs +0 -313
  61. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  62. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  63. package/dist-engine-src/src/common/identity.rs +0 -145
  64. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  65. package/dist-engine-src/src/common/metadata.rs +0 -40
  66. package/dist-engine-src/src/common/mod.rs +0 -23
  67. package/dist-engine-src/src/common/types.rs +0 -105
  68. package/dist-engine-src/src/common/wire.rs +0 -222
  69. package/dist-engine-src/src/domain.rs +0 -324
  70. package/dist-engine-src/src/engine.rs +0 -225
  71. package/dist-engine-src/src/entity_identity.rs +0 -405
  72. package/dist-engine-src/src/functions/context.rs +0 -292
  73. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  74. package/dist-engine-src/src/functions/mod.rs +0 -18
  75. package/dist-engine-src/src/functions/provider.rs +0 -130
  76. package/dist-engine-src/src/functions/state.rs +0 -336
  77. package/dist-engine-src/src/functions/types.rs +0 -37
  78. package/dist-engine-src/src/init.rs +0 -558
  79. package/dist-engine-src/src/json_store/compression.rs +0 -77
  80. package/dist-engine-src/src/json_store/context.rs +0 -423
  81. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  82. package/dist-engine-src/src/json_store/mod.rs +0 -12
  83. package/dist-engine-src/src/json_store/store.rs +0 -1109
  84. package/dist-engine-src/src/json_store/types.rs +0 -217
  85. package/dist-engine-src/src/lib.rs +0 -62
  86. package/dist-engine-src/src/live_state/context.rs +0 -2019
  87. package/dist-engine-src/src/live_state/mod.rs +0 -15
  88. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  89. package/dist-engine-src/src/live_state/reader.rs +0 -23
  90. package/dist-engine-src/src/live_state/types.rs +0 -222
  91. package/dist-engine-src/src/live_state/visibility.rs +0 -223
  92. package/dist-engine-src/src/plugin/archive.rs +0 -438
  93. package/dist-engine-src/src/plugin/component.rs +0 -183
  94. package/dist-engine-src/src/plugin/install.rs +0 -619
  95. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  96. package/dist-engine-src/src/plugin/materializer.rs +0 -477
  97. package/dist-engine-src/src/plugin/mod.rs +0 -33
  98. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -118
  99. package/dist-engine-src/src/plugin/storage.rs +0 -74
  100. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  101. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  102. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  103. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  104. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  105. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  106. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  107. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  108. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  109. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  110. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  111. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  112. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  113. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  114. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  115. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -34
  116. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -48
  117. package/dist-engine-src/src/schema/builtin/mod.rs +0 -222
  118. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  119. package/dist-engine-src/src/schema/definition.json +0 -187
  120. package/dist-engine-src/src/schema/definition.rs +0 -742
  121. package/dist-engine-src/src/schema/key.rs +0 -138
  122. package/dist-engine-src/src/schema/mod.rs +0 -20
  123. package/dist-engine-src/src/schema/seed.rs +0 -14
  124. package/dist-engine-src/src/schema/tests.rs +0 -780
  125. package/dist-engine-src/src/session/context.rs +0 -404
  126. package/dist-engine-src/src/session/create_version.rs +0 -88
  127. package/dist-engine-src/src/session/execute.rs +0 -541
  128. package/dist-engine-src/src/session/merge/analysis.rs +0 -102
  129. package/dist-engine-src/src/session/merge/apply.rs +0 -23
  130. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  131. package/dist-engine-src/src/session/merge/mod.rs +0 -11
  132. package/dist-engine-src/src/session/merge/stats.rs +0 -65
  133. package/dist-engine-src/src/session/merge/version.rs +0 -427
  134. package/dist-engine-src/src/session/mod.rs +0 -27
  135. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +0 -100
  136. package/dist-engine-src/src/session/switch_version.rs +0 -110
  137. package/dist-engine-src/src/session/transaction.rs +0 -76
  138. package/dist-engine-src/src/sql2/change_provider.rs +0 -331
  139. package/dist-engine-src/src/sql2/classify.rs +0 -174
  140. package/dist-engine-src/src/sql2/context.rs +0 -311
  141. package/dist-engine-src/src/sql2/directory_history_provider.rs +0 -631
  142. package/dist-engine-src/src/sql2/directory_provider.rs +0 -2453
  143. package/dist-engine-src/src/sql2/dml.rs +0 -148
  144. package/dist-engine-src/src/sql2/entity_history_provider.rs +0 -440
  145. package/dist-engine-src/src/sql2/entity_provider.rs +0 -3211
  146. package/dist-engine-src/src/sql2/error.rs +0 -215
  147. package/dist-engine-src/src/sql2/execute.rs +0 -3533
  148. package/dist-engine-src/src/sql2/file_history_provider.rs +0 -910
  149. package/dist-engine-src/src/sql2/file_provider.rs +0 -3679
  150. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1490
  151. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  152. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  153. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  154. package/dist-engine-src/src/sql2/history_provider.rs +0 -412
  155. package/dist-engine-src/src/sql2/history_route.rs +0 -657
  156. package/dist-engine-src/src/sql2/lix_state_provider.rs +0 -2512
  157. package/dist-engine-src/src/sql2/mod.rs +0 -47
  158. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -246
  159. package/dist-engine-src/src/sql2/public_bind/assignment.rs +0 -46
  160. package/dist-engine-src/src/sql2/public_bind/capability.rs +0 -41
  161. package/dist-engine-src/src/sql2/public_bind/dml.rs +0 -172
  162. package/dist-engine-src/src/sql2/public_bind/mod.rs +0 -26
  163. package/dist-engine-src/src/sql2/public_bind/table.rs +0 -168
  164. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  165. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  166. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  167. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  168. package/dist-engine-src/src/sql2/session.rs +0 -132
  169. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  170. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +0 -53
  171. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  172. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  173. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  174. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  175. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  176. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  177. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  178. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  179. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -89
  180. package/dist-engine-src/src/sql2/udfs/public_call.rs +0 -238
  181. package/dist-engine-src/src/sql2/version_provider.rs +0 -1202
  182. package/dist-engine-src/src/sql2/version_scope.rs +0 -394
  183. package/dist-engine-src/src/sql2/write_normalization.rs +0 -345
  184. package/dist-engine-src/src/storage/context.rs +0 -356
  185. package/dist-engine-src/src/storage/mod.rs +0 -14
  186. package/dist-engine-src/src/storage/read_scope.rs +0 -88
  187. package/dist-engine-src/src/storage/types.rs +0 -501
  188. package/dist-engine-src/src/storage_bench.rs +0 -4863
  189. package/dist-engine-src/src/test_support.rs +0 -228
  190. package/dist-engine-src/src/tracked_state/by_file_index.rs +0 -98
  191. package/dist-engine-src/src/tracked_state/codec.rs +0 -2085
  192. package/dist-engine-src/src/tracked_state/context.rs +0 -1867
  193. package/dist-engine-src/src/tracked_state/diff.rs +0 -686
  194. package/dist-engine-src/src/tracked_state/materialization.rs +0 -403
  195. package/dist-engine-src/src/tracked_state/materializer.rs +0 -488
  196. package/dist-engine-src/src/tracked_state/merge.rs +0 -492
  197. package/dist-engine-src/src/tracked_state/mod.rs +0 -32
  198. package/dist-engine-src/src/tracked_state/storage.rs +0 -375
  199. package/dist-engine-src/src/tracked_state/tree.rs +0 -3187
  200. package/dist-engine-src/src/tracked_state/types.rs +0 -231
  201. package/dist-engine-src/src/transaction/commit.rs +0 -1484
  202. package/dist-engine-src/src/transaction/context.rs +0 -1548
  203. package/dist-engine-src/src/transaction/live_state_overlay.rs +0 -35
  204. package/dist-engine-src/src/transaction/mod.rs +0 -13
  205. package/dist-engine-src/src/transaction/normalization.rs +0 -890
  206. package/dist-engine-src/src/transaction/prep.rs +0 -37
  207. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -149
  208. package/dist-engine-src/src/transaction/staging.rs +0 -1731
  209. package/dist-engine-src/src/transaction/types.rs +0 -460
  210. package/dist-engine-src/src/transaction/validation.rs +0 -5830
  211. package/dist-engine-src/src/untracked_state/codec.rs +0 -307
  212. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  213. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  214. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  215. package/dist-engine-src/src/untracked_state/storage.rs +0 -396
  216. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  217. package/dist-engine-src/src/version/context.rs +0 -40
  218. package/dist-engine-src/src/version/lifecycle.rs +0 -221
  219. package/dist-engine-src/src/version/mod.rs +0 -13
  220. package/dist-engine-src/src/version/refs.rs +0 -330
  221. package/dist-engine-src/src/version/stage_rows.rs +0 -67
  222. package/dist-engine-src/src/version/types.rs +0 -21
  223. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,3679 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{
7
- ArrayRef, BinaryArray, BooleanArray, RecordBatchOptions, StringArray, UInt64Array,
8
- };
9
- use datafusion::arrow::compute::{and, filter_record_batch};
10
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
- use datafusion::arrow::record_batch::RecordBatch;
12
- use datafusion::catalog::{Session, TableProvider};
13
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
14
- use datafusion::datasource::TableType;
15
- use datafusion::execution::TaskContext;
16
- use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::expr::InList;
18
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
19
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
20
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
21
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
22
- use datafusion::physical_plan::{
23
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
24
- };
25
- use datafusion::prelude::SessionContext;
26
- use futures_util::{stream, TryStreamExt};
27
- use serde::Deserialize;
28
-
29
- use crate::binary_cas::{BlobDataReader, BlobHash};
30
- use crate::entity_identity::EntityIdentity;
31
- use crate::functions::FunctionProviderHandle;
32
- use crate::live_state::MaterializedLiveStateRow;
33
- use crate::live_state::{
34
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
35
- };
36
- use crate::sql2::dml::{InsertExec, InsertSink};
37
- use crate::sql2::filesystem_predicates::{
38
- canonicalize_filesystem_path_filters, FilesystemPathKind,
39
- };
40
- use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
41
- use crate::sql2::version_scope::{
42
- explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
43
- resolve_write_version_scope, VersionBinding,
44
- };
45
- use crate::sql2::write_normalization::{
46
- is_binary_type, lix_file_data_type_error, lix_file_data_type_error_with_value,
47
- logical_expr_is_binary_or_null, reject_non_binary_casts_for_insert_column,
48
- scalar_is_binary_or_null, InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues,
49
- UpdateCell,
50
- };
51
- use crate::transaction::types::{TransactionJson, TransactionWriteRow};
52
- use crate::version::VersionRefReader;
53
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
54
-
55
- const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
56
- const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
57
- const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
58
-
59
- use super::filesystem_planner::{
60
- blob_ref_row, directory_path_resolvers_from_state_rows, file_descriptor_row,
61
- file_descriptor_write_row, filesystem_storage_scope_key, plan_file_delete,
62
- plan_file_path_update, BlobRefRowInput, DirectoryPathResolver, FileDeleteInput,
63
- FileDescriptorRowInput, FileDescriptorWriteIntent, FilePathWriteInput, FilesystemDeletePlan,
64
- FilesystemRowContext,
65
- };
66
- use super::result_metadata::json_field;
67
- use crate::sql2::{
68
- SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
69
- };
70
- use crate::transaction::types::{
71
- LogicalPrimaryKey, TransactionFileData, TransactionWrite, TransactionWriteMode,
72
- TransactionWriteOperation, TransactionWriteOrigin,
73
- };
74
-
75
- pub(crate) async fn register_lix_file_providers(
76
- session: &SessionContext,
77
- active_version_id: &str,
78
- live_state: Arc<dyn LiveStateReader>,
79
- version_ref: Arc<dyn VersionRefReader>,
80
- blob_reader: Arc<dyn BlobDataReader>,
81
- functions: FunctionProviderHandle,
82
- ) -> Result<(), LixError> {
83
- session
84
- .register_table(
85
- "lix_file_by_version",
86
- Arc::new(LixFileProvider::by_version(
87
- Arc::clone(&live_state),
88
- Arc::clone(&version_ref),
89
- Arc::clone(&blob_reader),
90
- functions.clone(),
91
- )),
92
- )
93
- .map_err(datafusion_error_to_lix_error)?;
94
- session
95
- .register_table(
96
- "lix_file",
97
- Arc::new(LixFileProvider::active_version(
98
- active_version_id,
99
- live_state,
100
- version_ref,
101
- Arc::clone(&blob_reader),
102
- functions,
103
- )),
104
- )
105
- .map_err(datafusion_error_to_lix_error)?;
106
- Ok(())
107
- }
108
-
109
- pub(crate) async fn register_lix_file_write_providers(
110
- session: &SessionContext,
111
- write_ctx: SqlWriteContext,
112
- ) -> Result<(), LixError> {
113
- session
114
- .register_table(
115
- "lix_file_by_version",
116
- Arc::new(LixFileProvider::by_version_with_write(write_ctx.clone())),
117
- )
118
- .map_err(datafusion_error_to_lix_error)?;
119
- session
120
- .register_table(
121
- "lix_file",
122
- Arc::new(LixFileProvider::active_version_with_write(write_ctx)),
123
- )
124
- .map_err(datafusion_error_to_lix_error)?;
125
- Ok(())
126
- }
127
-
128
- pub(crate) struct LixFileProvider {
129
- schema: SchemaRef,
130
- live_state: Arc<dyn LiveStateReader>,
131
- version_ref: Arc<dyn VersionRefReader>,
132
- blob_reader: Arc<dyn BlobDataReader>,
133
- write_access: WriteAccess,
134
- functions: FunctionProviderHandle,
135
- version_binding: VersionBinding,
136
- }
137
-
138
- impl std::fmt::Debug for LixFileProvider {
139
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
140
- f.debug_struct("LixFileProvider").finish()
141
- }
142
- }
143
-
144
- impl LixFileProvider {
145
- pub(crate) fn active_version(
146
- active_version_id: impl Into<String>,
147
- live_state: Arc<dyn LiveStateReader>,
148
- version_ref: Arc<dyn VersionRefReader>,
149
- blob_reader: Arc<dyn BlobDataReader>,
150
- functions: FunctionProviderHandle,
151
- ) -> Self {
152
- Self {
153
- schema: lix_file_schema(),
154
- live_state,
155
- version_ref,
156
- blob_reader,
157
- write_access: WriteAccess::read_only(),
158
- functions,
159
- version_binding: VersionBinding::active(active_version_id),
160
- }
161
- }
162
-
163
- pub(crate) fn active_version_with_write(write_ctx: SqlWriteContext) -> Self {
164
- let active_version_id = write_ctx.active_version_id();
165
- let functions = write_ctx.functions();
166
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
167
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
168
- let blob_reader = write_ctx.blob_reader();
169
- Self {
170
- schema: lix_file_schema(),
171
- live_state,
172
- version_ref,
173
- blob_reader,
174
- write_access: WriteAccess::write(write_ctx),
175
- functions,
176
- version_binding: VersionBinding::active(active_version_id),
177
- }
178
- }
179
-
180
- pub(crate) fn by_version(
181
- live_state: Arc<dyn LiveStateReader>,
182
- version_ref: Arc<dyn VersionRefReader>,
183
- blob_reader: Arc<dyn BlobDataReader>,
184
- functions: FunctionProviderHandle,
185
- ) -> Self {
186
- Self {
187
- schema: lix_file_by_version_schema(),
188
- live_state,
189
- version_ref,
190
- blob_reader,
191
- write_access: WriteAccess::read_only(),
192
- functions,
193
- version_binding: VersionBinding::explicit(),
194
- }
195
- }
196
-
197
- pub(crate) fn by_version_with_write(write_ctx: SqlWriteContext) -> Self {
198
- let functions = write_ctx.functions();
199
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
200
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
201
- let blob_reader = write_ctx.blob_reader();
202
- Self {
203
- schema: lix_file_by_version_schema(),
204
- live_state,
205
- version_ref,
206
- blob_reader,
207
- write_access: WriteAccess::write(write_ctx),
208
- functions,
209
- version_binding: VersionBinding::explicit(),
210
- }
211
- }
212
- }
213
-
214
- #[async_trait]
215
- impl TableProvider for LixFileProvider {
216
- fn as_any(&self) -> &dyn Any {
217
- self
218
- }
219
-
220
- fn schema(&self) -> SchemaRef {
221
- Arc::clone(&self.schema)
222
- }
223
-
224
- fn table_type(&self) -> TableType {
225
- TableType::Base
226
- }
227
-
228
- fn supports_filters_pushdown(
229
- &self,
230
- filters: &[&Expr],
231
- ) -> Result<Vec<TableProviderFilterPushDown>> {
232
- let analyzer = LixFileIdFilterAnalyzer;
233
- Ok(filters
234
- .iter()
235
- .map(|filter| {
236
- if ExactStringColumnFilterAnalyzer::new("lixcol_version_id").supports(filter)
237
- || analyzer.supports(filter)
238
- || contains_column(filter, "path")
239
- {
240
- TableProviderFilterPushDown::Exact
241
- } else {
242
- TableProviderFilterPushDown::Unsupported
243
- }
244
- })
245
- .collect())
246
- }
247
-
248
- async fn scan(
249
- &self,
250
- _state: &dyn Session,
251
- projection: Option<&Vec<usize>>,
252
- filters: &[Expr],
253
- limit: Option<usize>,
254
- ) -> Result<Arc<dyn ExecutionPlan>> {
255
- let projected_schema = projected_schema(&self.schema, projection)?;
256
- let scan_limit = if filters.is_empty() { limit } else { None };
257
- let mut request = lix_file_scan_request(
258
- self.version_binding.active_version_id(),
259
- Some(projected_schema.as_ref()),
260
- scan_limit,
261
- );
262
- if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
263
- {
264
- request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
265
- if request.filter.version_ids.is_empty() {
266
- return Err(DataFusionError::Plan(
267
- "DELETE FROM lix_file_by_version requires an explicit lixcol_version_id predicate"
268
- .to_string(),
269
- ));
270
- }
271
- }
272
- request.filter.version_ids = resolve_provider_version_ids(
273
- self.version_ref.as_ref(),
274
- &self.version_binding,
275
- request.filter.version_ids,
276
- )
277
- .await
278
- .map_err(lix_error_to_datafusion_error)?;
279
- let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::File)?;
280
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
281
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
282
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
283
- let physical_filters = filters
284
- .iter()
285
- .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
286
- .collect::<Result<Vec<_>>>()?;
287
- Ok(Arc::new(LixFileScanExec::new(
288
- Arc::clone(&self.live_state),
289
- Arc::clone(&self.blob_reader),
290
- Arc::clone(&self.schema),
291
- projected_schema,
292
- projection.cloned(),
293
- request,
294
- target_file_ids,
295
- physical_filters,
296
- limit,
297
- )))
298
- }
299
-
300
- async fn insert_into(
301
- &self,
302
- _state: &dyn Session,
303
- input: Arc<dyn ExecutionPlan>,
304
- insert_op: InsertOp,
305
- ) -> Result<Arc<dyn ExecutionPlan>> {
306
- if insert_op != InsertOp::Append {
307
- return not_impl_err!("{insert_op} not implemented for lix_file yet");
308
- }
309
-
310
- let write_ctx = self.write_access.require_write("INSERT into lix_file")?;
311
- let insert_column_intents = InsertColumnIntents::from_input(&input);
312
- let include_data_writes = insert_column_intents.includes_column("data");
313
- if include_data_writes {
314
- reject_non_binary_casts_for_insert_column(&input, "data", "INSERT into lix_file")?;
315
- }
316
-
317
- let sink = LixFileInsertSink::new(
318
- input.schema(),
319
- write_ctx.clone(),
320
- self.functions.clone(),
321
- self.version_binding.clone(),
322
- include_data_writes,
323
- );
324
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
325
- }
326
-
327
- async fn delete_from(
328
- &self,
329
- state: &dyn Session,
330
- filters: Vec<Expr>,
331
- ) -> Result<Arc<dyn ExecutionPlan>> {
332
- let write_ctx = self.write_access.require_write("DELETE FROM lix_file")?;
333
-
334
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
335
- let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
336
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
337
- let physical_filters = filters
338
- .iter()
339
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
340
- .collect::<Result<Vec<_>>>()?;
341
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
342
- let mut request =
343
- lix_file_scan_request(self.version_binding.active_version_id(), None, None);
344
- if matches!(self.version_binding, VersionBinding::Explicit) {
345
- request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
346
- if request.filter.version_ids.is_empty() {
347
- return Err(DataFusionError::Plan(
348
- "DELETE FROM lix_file_by_version requires an explicit lixcol_version_id predicate"
349
- .to_string(),
350
- ));
351
- }
352
- }
353
-
354
- Ok(Arc::new(LixFileDeleteExec::new(
355
- Arc::clone(&self.blob_reader),
356
- write_ctx.clone(),
357
- Arc::clone(&self.schema),
358
- self.version_binding.clone(),
359
- request,
360
- target_file_ids,
361
- physical_filters,
362
- )))
363
- }
364
-
365
- async fn update(
366
- &self,
367
- state: &dyn Session,
368
- assignments: Vec<(String, Expr)>,
369
- filters: Vec<Expr>,
370
- ) -> Result<Arc<dyn ExecutionPlan>> {
371
- let write_ctx = self.write_access.require_write("UPDATE lix_file")?;
372
-
373
- validate_lix_file_update_assignments(&self.schema, &assignments)?;
374
-
375
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
376
- let physical_assignments = assignments
377
- .iter()
378
- .map(|(column_name, expr)| {
379
- Ok((
380
- column_name.clone(),
381
- create_physical_expr(expr, &df_schema, state.execution_props())?,
382
- ))
383
- })
384
- .collect::<Result<Vec<_>>>()?;
385
- let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
386
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
387
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
388
- let physical_filters = filters
389
- .iter()
390
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
391
- .collect::<Result<Vec<_>>>()?;
392
- let request = lix_file_scan_request(self.version_binding.active_version_id(), None, None);
393
-
394
- Ok(Arc::new(LixFileUpdateExec::new(
395
- Arc::clone(&self.blob_reader),
396
- write_ctx.clone(),
397
- Arc::clone(&self.schema),
398
- self.version_binding.clone(),
399
- self.functions.clone(),
400
- request,
401
- target_file_ids,
402
- physical_assignments,
403
- physical_filters,
404
- )))
405
- }
406
- }
407
-
408
- #[allow(dead_code)]
409
- struct LixFileInsertSink {
410
- write_ctx: SqlWriteContext,
411
- functions: FunctionProviderHandle,
412
- version_binding: VersionBinding,
413
- surface_name: &'static str,
414
- include_data_writes: bool,
415
- }
416
-
417
- impl std::fmt::Debug for LixFileInsertSink {
418
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
419
- f.debug_struct("LixFileInsertSink").finish()
420
- }
421
- }
422
-
423
- impl LixFileInsertSink {
424
- fn new(
425
- _schema: SchemaRef,
426
- write_ctx: SqlWriteContext,
427
- functions: FunctionProviderHandle,
428
- version_binding: VersionBinding,
429
- include_data_writes: bool,
430
- ) -> Self {
431
- let surface_name = lix_file_surface_name(&version_binding);
432
- Self {
433
- write_ctx,
434
- functions,
435
- version_binding,
436
- surface_name,
437
- include_data_writes,
438
- }
439
- }
440
- }
441
-
442
- impl DisplayAs for LixFileInsertSink {
443
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444
- match t {
445
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
446
- write!(f, "LixFileInsertSink")
447
- }
448
- DisplayFormatType::TreeRender => write!(f, "LixFileInsertSink"),
449
- }
450
- }
451
- }
452
-
453
- #[async_trait]
454
- impl InsertSink for LixFileInsertSink {
455
- async fn write_batches(
456
- &self,
457
- batches: Vec<RecordBatch>,
458
- _context: &Arc<TaskContext>,
459
- ) -> Result<u64> {
460
- let mut staged = LixFileStagedBatch::default();
461
- let mut path_resolvers = None;
462
- for batch in batches {
463
- if path_resolvers.is_none() {
464
- path_resolvers = Some(
465
- file_path_resolvers_from_live_state(
466
- Arc::new(WriteContextLiveStateReader::new(self.write_ctx.clone())),
467
- self.version_binding.active_version_id(),
468
- )
469
- .await
470
- .map_err(lix_error_to_datafusion_error)?,
471
- );
472
- }
473
- if record_batch_has_non_null_column(&batch, "path")? {
474
- staged.extend(lix_file_insert_stage_from_batch_with_path_resolvers(
475
- &batch,
476
- self.version_binding.active_version_id(),
477
- self.surface_name,
478
- path_resolvers
479
- .as_mut()
480
- .expect("path resolver should be initialized"),
481
- &mut || self.functions.call_uuid_v7(),
482
- self.include_data_writes,
483
- )?);
484
- } else {
485
- staged.extend(
486
- lix_file_insert_stage_from_batch_with_id_generator_and_path_resolvers(
487
- &batch,
488
- self.version_binding.active_version_id(),
489
- self.surface_name,
490
- path_resolvers
491
- .as_mut()
492
- .expect("path resolver should be initialized"),
493
- &mut || self.functions.call_uuid_v7(),
494
- self.include_data_writes,
495
- )?,
496
- );
497
- }
498
- }
499
-
500
- if !staged.state_rows.is_empty() || !staged.file_data_writes.is_empty() {
501
- let intent = if staged.file_data_writes.is_empty() {
502
- TransactionWrite::Rows {
503
- mode: TransactionWriteMode::Insert,
504
- rows: staged.state_rows,
505
- }
506
- } else {
507
- TransactionWrite::RowsWithFileData {
508
- mode: TransactionWriteMode::Insert,
509
- rows: staged.state_rows,
510
- file_data: staged.file_data_writes,
511
- count: staged.count,
512
- }
513
- };
514
- self.write_ctx
515
- .stage_write(intent)
516
- .await
517
- .map_err(lix_error_to_datafusion_error)?;
518
- }
519
-
520
- Ok(staged.count)
521
- }
522
- }
523
-
524
- fn lix_file_surface_name(version_binding: &VersionBinding) -> &'static str {
525
- match version_binding {
526
- VersionBinding::Active { .. } => "lix_file",
527
- VersionBinding::Explicit => "lix_file_by_version",
528
- }
529
- }
530
-
531
- #[allow(dead_code)]
532
- struct LixFileDeleteExec {
533
- blob_reader: Arc<dyn BlobDataReader>,
534
- write_ctx: SqlWriteContext,
535
- table_schema: SchemaRef,
536
- version_binding: VersionBinding,
537
- request: LiveStateScanRequest,
538
- target_file_ids: FileIdConstraint,
539
- filters: Vec<Arc<dyn PhysicalExpr>>,
540
- result_schema: SchemaRef,
541
- properties: Arc<PlanProperties>,
542
- }
543
-
544
- impl std::fmt::Debug for LixFileDeleteExec {
545
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
546
- f.debug_struct("LixFileDeleteExec").finish()
547
- }
548
- }
549
-
550
- impl LixFileDeleteExec {
551
- fn new(
552
- blob_reader: Arc<dyn BlobDataReader>,
553
- write_ctx: SqlWriteContext,
554
- table_schema: SchemaRef,
555
- version_binding: VersionBinding,
556
- request: LiveStateScanRequest,
557
- target_file_ids: FileIdConstraint,
558
- filters: Vec<Arc<dyn PhysicalExpr>>,
559
- ) -> Self {
560
- let result_schema = dml_count_schema();
561
- let properties = PlanProperties::new(
562
- EquivalenceProperties::new(Arc::clone(&result_schema)),
563
- Partitioning::UnknownPartitioning(1),
564
- EmissionType::Final,
565
- Boundedness::Bounded,
566
- );
567
- Self {
568
- blob_reader,
569
- write_ctx,
570
- table_schema,
571
- version_binding,
572
- request,
573
- target_file_ids,
574
- filters,
575
- result_schema,
576
- properties: Arc::new(properties),
577
- }
578
- }
579
- }
580
-
581
- impl DisplayAs for LixFileDeleteExec {
582
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583
- match t {
584
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
585
- write!(f, "LixFileDeleteExec(filters={})", self.filters.len())
586
- }
587
- DisplayFormatType::TreeRender => write!(f, "LixFileDeleteExec"),
588
- }
589
- }
590
- }
591
-
592
- impl ExecutionPlan for LixFileDeleteExec {
593
- fn name(&self) -> &str {
594
- "LixFileDeleteExec"
595
- }
596
-
597
- fn as_any(&self) -> &dyn Any {
598
- self
599
- }
600
-
601
- fn properties(&self) -> &Arc<PlanProperties> {
602
- &self.properties
603
- }
604
-
605
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
606
- Vec::new()
607
- }
608
-
609
- fn with_new_children(
610
- self: Arc<Self>,
611
- children: Vec<Arc<dyn ExecutionPlan>>,
612
- ) -> Result<Arc<dyn ExecutionPlan>> {
613
- if !children.is_empty() {
614
- return Err(DataFusionError::Execution(
615
- "LixFileDeleteExec does not accept children".to_string(),
616
- ));
617
- }
618
- Ok(self)
619
- }
620
-
621
- fn execute(
622
- &self,
623
- partition: usize,
624
- _context: Arc<TaskContext>,
625
- ) -> Result<SendableRecordBatchStream> {
626
- if partition != 0 {
627
- return Err(DataFusionError::Execution(format!(
628
- "LixFileDeleteExec only exposes one partition, got {partition}"
629
- )));
630
- }
631
-
632
- let blob_reader = Arc::clone(&self.blob_reader);
633
- let write_ctx = self.write_ctx.clone();
634
- let table_schema = Arc::clone(&self.table_schema);
635
- let version_binding = self.version_binding.clone();
636
- let request = self.request.clone();
637
- let target_file_ids = self.target_file_ids.clone();
638
- let filters = self.filters.clone();
639
- let result_schema = Arc::clone(&self.result_schema);
640
- let stream_schema = Arc::clone(&result_schema);
641
-
642
- let stream = stream::once(async move {
643
- let rows = scan_lix_file_live_rows(
644
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
645
- &request,
646
- &target_file_ids,
647
- )
648
- .await
649
- .map_err(lix_error_to_datafusion_error)?;
650
- let blob_ref_file_ids =
651
- blob_ref_file_ids_from_live_rows(&rows).map_err(lix_error_to_datafusion_error)?;
652
- let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
653
- .await
654
- .map_err(lix_error_to_datafusion_error)?;
655
- let matched_batch = filter_lix_file_batch(source_batch, &filters)?;
656
- let staged = lix_file_delete_stage_from_batch(
657
- &matched_batch,
658
- version_binding.active_version_id(),
659
- &blob_ref_file_ids,
660
- )?;
661
- let count = staged.count;
662
-
663
- if count > 0 {
664
- write_ctx
665
- .stage_write(TransactionWrite::Rows {
666
- mode: TransactionWriteMode::Replace,
667
- rows: staged.state_rows,
668
- })
669
- .await
670
- .map_err(lix_error_to_datafusion_error)?;
671
- }
672
-
673
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
674
- dml_count_batch(Arc::clone(&stream_schema), count)?,
675
- )]))
676
- })
677
- .try_flatten();
678
-
679
- Ok(Box::pin(RecordBatchStreamAdapter::new(
680
- result_schema,
681
- stream,
682
- )))
683
- }
684
- }
685
-
686
- #[allow(dead_code)]
687
- struct LixFileUpdateExec {
688
- blob_reader: Arc<dyn BlobDataReader>,
689
- write_ctx: SqlWriteContext,
690
- table_schema: SchemaRef,
691
- version_binding: VersionBinding,
692
- functions: FunctionProviderHandle,
693
- request: LiveStateScanRequest,
694
- target_file_ids: FileIdConstraint,
695
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
696
- filters: Vec<Arc<dyn PhysicalExpr>>,
697
- result_schema: SchemaRef,
698
- properties: Arc<PlanProperties>,
699
- }
700
-
701
- impl std::fmt::Debug for LixFileUpdateExec {
702
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
703
- f.debug_struct("LixFileUpdateExec").finish()
704
- }
705
- }
706
-
707
- impl LixFileUpdateExec {
708
- fn new(
709
- blob_reader: Arc<dyn BlobDataReader>,
710
- write_ctx: SqlWriteContext,
711
- table_schema: SchemaRef,
712
- version_binding: VersionBinding,
713
- functions: FunctionProviderHandle,
714
- request: LiveStateScanRequest,
715
- target_file_ids: FileIdConstraint,
716
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
717
- filters: Vec<Arc<dyn PhysicalExpr>>,
718
- ) -> Self {
719
- let result_schema = dml_count_schema();
720
- let properties = PlanProperties::new(
721
- EquivalenceProperties::new(Arc::clone(&result_schema)),
722
- Partitioning::UnknownPartitioning(1),
723
- EmissionType::Final,
724
- Boundedness::Bounded,
725
- );
726
- Self {
727
- blob_reader,
728
- write_ctx,
729
- table_schema,
730
- version_binding,
731
- functions,
732
- request,
733
- target_file_ids,
734
- assignments,
735
- filters,
736
- result_schema,
737
- properties: Arc::new(properties),
738
- }
739
- }
740
- }
741
-
742
- impl DisplayAs for LixFileUpdateExec {
743
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
744
- match t {
745
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
746
- write!(
747
- f,
748
- "LixFileUpdateExec(assignments={}, filters={})",
749
- self.assignments.len(),
750
- self.filters.len()
751
- )
752
- }
753
- DisplayFormatType::TreeRender => write!(f, "LixFileUpdateExec"),
754
- }
755
- }
756
- }
757
-
758
- impl ExecutionPlan for LixFileUpdateExec {
759
- fn name(&self) -> &str {
760
- "LixFileUpdateExec"
761
- }
762
-
763
- fn as_any(&self) -> &dyn Any {
764
- self
765
- }
766
-
767
- fn properties(&self) -> &Arc<PlanProperties> {
768
- &self.properties
769
- }
770
-
771
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
772
- Vec::new()
773
- }
774
-
775
- fn with_new_children(
776
- self: Arc<Self>,
777
- children: Vec<Arc<dyn ExecutionPlan>>,
778
- ) -> Result<Arc<dyn ExecutionPlan>> {
779
- if !children.is_empty() {
780
- return Err(DataFusionError::Execution(
781
- "LixFileUpdateExec does not accept children".to_string(),
782
- ));
783
- }
784
- Ok(self)
785
- }
786
-
787
- fn execute(
788
- &self,
789
- partition: usize,
790
- _context: Arc<TaskContext>,
791
- ) -> Result<SendableRecordBatchStream> {
792
- if partition != 0 {
793
- return Err(DataFusionError::Execution(format!(
794
- "LixFileUpdateExec only exposes one partition, got {partition}"
795
- )));
796
- }
797
-
798
- let blob_reader = Arc::clone(&self.blob_reader);
799
- let write_ctx = self.write_ctx.clone();
800
- let table_schema = Arc::clone(&self.table_schema);
801
- let version_binding = self.version_binding.clone();
802
- let functions = self.functions.clone();
803
- let request = self.request.clone();
804
- let target_file_ids = self.target_file_ids.clone();
805
- let assignments = self.assignments.clone();
806
- let filters = self.filters.clone();
807
- let result_schema = Arc::clone(&self.result_schema);
808
- let stream_schema = Arc::clone(&result_schema);
809
-
810
- let stream = stream::once(async move {
811
- let rows = scan_lix_file_live_rows(
812
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
813
- &request,
814
- &target_file_ids,
815
- )
816
- .await
817
- .map_err(lix_error_to_datafusion_error)?;
818
- let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
819
- .await
820
- .map_err(lix_error_to_datafusion_error)?;
821
- let matched_batch = filter_lix_file_batch(source_batch, &filters)?;
822
- let assignment_values = UpdateAssignmentValues::evaluate(&matched_batch, &assignments)?;
823
- let update_columns = LixFileUpdateColumns::from_assignments(&assignments);
824
- let mut path_resolvers = None;
825
- if update_columns.path || update_columns.descriptor {
826
- path_resolvers = Some(
827
- file_path_resolvers_from_live_state(
828
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
829
- version_binding.active_version_id(),
830
- )
831
- .await
832
- .map_err(lix_error_to_datafusion_error)?,
833
- );
834
- }
835
- let staged = lix_file_update_stage_from_batch(
836
- &matched_batch,
837
- &assignment_values,
838
- version_binding.active_version_id(),
839
- update_columns,
840
- path_resolvers.as_mut(),
841
- &mut || functions.call_uuid_v7(),
842
- )?;
843
- let count = staged.count;
844
-
845
- if count > 0 {
846
- let intent = if staged.file_data_writes.is_empty() {
847
- TransactionWrite::Rows {
848
- mode: TransactionWriteMode::Replace,
849
- rows: staged.state_rows,
850
- }
851
- } else {
852
- TransactionWrite::RowsWithFileData {
853
- mode: TransactionWriteMode::Replace,
854
- rows: staged.state_rows,
855
- file_data: staged.file_data_writes,
856
- count,
857
- }
858
- };
859
- write_ctx
860
- .stage_write(intent)
861
- .await
862
- .map_err(lix_error_to_datafusion_error)?;
863
- }
864
-
865
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
866
- dml_count_batch(Arc::clone(&stream_schema), count)?,
867
- )]))
868
- })
869
- .try_flatten();
870
-
871
- Ok(Box::pin(RecordBatchStreamAdapter::new(
872
- result_schema,
873
- stream,
874
- )))
875
- }
876
- }
877
-
878
- struct LixFileScanExec {
879
- live_state: Arc<dyn LiveStateReader>,
880
- blob_reader: Arc<dyn BlobDataReader>,
881
- batch_schema: SchemaRef,
882
- output_schema: SchemaRef,
883
- projection: Option<Vec<usize>>,
884
- request: LiveStateScanRequest,
885
- target_file_ids: FileIdConstraint,
886
- filters: Vec<Arc<dyn PhysicalExpr>>,
887
- limit: Option<usize>,
888
- properties: Arc<PlanProperties>,
889
- }
890
-
891
- impl std::fmt::Debug for LixFileScanExec {
892
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
893
- f.debug_struct("LixFileScanExec").finish()
894
- }
895
- }
896
-
897
- impl LixFileScanExec {
898
- fn new(
899
- live_state: Arc<dyn LiveStateReader>,
900
- blob_reader: Arc<dyn BlobDataReader>,
901
- batch_schema: SchemaRef,
902
- output_schema: SchemaRef,
903
- projection: Option<Vec<usize>>,
904
- request: LiveStateScanRequest,
905
- target_file_ids: FileIdConstraint,
906
- filters: Vec<Arc<dyn PhysicalExpr>>,
907
- limit: Option<usize>,
908
- ) -> Self {
909
- let properties = PlanProperties::new(
910
- EquivalenceProperties::new(output_schema.clone()),
911
- Partitioning::UnknownPartitioning(1),
912
- EmissionType::Incremental,
913
- Boundedness::Bounded,
914
- );
915
- Self {
916
- live_state,
917
- blob_reader,
918
- batch_schema,
919
- output_schema,
920
- projection,
921
- request,
922
- target_file_ids,
923
- filters,
924
- limit,
925
- properties: Arc::new(properties),
926
- }
927
- }
928
- }
929
-
930
- impl DisplayAs for LixFileScanExec {
931
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
932
- match t {
933
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
934
- write!(f, "LixFileScanExec(limit={:?})", self.limit)
935
- }
936
- DisplayFormatType::TreeRender => write!(f, "LixFileScanExec"),
937
- }
938
- }
939
- }
940
-
941
- impl ExecutionPlan for LixFileScanExec {
942
- fn name(&self) -> &str {
943
- "LixFileScanExec"
944
- }
945
-
946
- fn as_any(&self) -> &dyn Any {
947
- self
948
- }
949
-
950
- fn properties(&self) -> &Arc<PlanProperties> {
951
- &self.properties
952
- }
953
-
954
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
955
- Vec::new()
956
- }
957
-
958
- fn with_new_children(
959
- self: Arc<Self>,
960
- children: Vec<Arc<dyn ExecutionPlan>>,
961
- ) -> Result<Arc<dyn ExecutionPlan>> {
962
- if !children.is_empty() {
963
- return Err(DataFusionError::Execution(
964
- "LixFileScanExec does not accept children".to_string(),
965
- ));
966
- }
967
- Ok(self)
968
- }
969
-
970
- fn execute(
971
- &self,
972
- partition: usize,
973
- _context: Arc<TaskContext>,
974
- ) -> Result<SendableRecordBatchStream> {
975
- if partition != 0 {
976
- return Err(DataFusionError::Execution(format!(
977
- "LixFileScanExec only supports partition 0, got {partition}"
978
- )));
979
- }
980
-
981
- let live_state = Arc::clone(&self.live_state);
982
- let blob_reader = Arc::clone(&self.blob_reader);
983
- let request = self.request.clone();
984
- let target_file_ids = self.target_file_ids.clone();
985
- let filters = self.filters.clone();
986
- let limit = self.limit;
987
- let output_schema = Arc::clone(&self.output_schema);
988
- let batch_schema = Arc::clone(&self.batch_schema);
989
- let projection = self.projection.clone();
990
- let fut = async move {
991
- let rows = scan_lix_file_live_rows(live_state, &request, &target_file_ids)
992
- .await
993
- .map_err(|error| {
994
- DataFusionError::Execution(format!("sql2 lix_file scan failed: {error}"))
995
- })?;
996
- let batch = lix_file_record_batch(&batch_schema, &blob_reader, rows)
997
- .await
998
- .map_err(|error| {
999
- DataFusionError::Execution(format!("sql2 lix_file batch build failed: {error}"))
1000
- })?;
1001
- let filtered = filter_lix_file_batch(batch, &filters)?;
1002
- let projected = match projection {
1003
- Some(indices) => filtered.project(&indices).map_err(DataFusionError::from),
1004
- None => Ok(filtered),
1005
- }?;
1006
- match limit {
1007
- Some(limit) => Ok(projected.slice(0, limit.min(projected.num_rows()))),
1008
- None => Ok(projected),
1009
- }
1010
- };
1011
-
1012
- Ok(Box::pin(RecordBatchStreamAdapter::new(
1013
- output_schema,
1014
- stream::once(fut).map_ok(|batch| batch),
1015
- )))
1016
- }
1017
- }
1018
-
1019
- #[derive(Debug, Clone)]
1020
- struct FileDescriptorRecord {
1021
- id: String,
1022
- directory_id: Option<String>,
1023
- name: String,
1024
- hidden: bool,
1025
- live: MaterializedLiveStateRow,
1026
- }
1027
-
1028
- #[derive(Debug, Clone)]
1029
- struct BlobRefRecord {
1030
- blob_hash: String,
1031
- }
1032
-
1033
- #[derive(Debug, Clone)]
1034
- struct DirectoryDescriptorRecord {
1035
- id: String,
1036
- parent_id: Option<String>,
1037
- name: String,
1038
- version_id: String,
1039
- }
1040
-
1041
- #[derive(Debug, Deserialize)]
1042
- struct FileDescriptorSnapshot {
1043
- id: String,
1044
- directory_id: Option<String>,
1045
- name: String,
1046
- hidden: bool,
1047
- }
1048
-
1049
- #[derive(Debug, Deserialize)]
1050
- struct BlobRefSnapshot {
1051
- id: String,
1052
- blob_hash: String,
1053
- }
1054
-
1055
- #[derive(Debug, Deserialize)]
1056
- struct DirectoryDescriptorSnapshot {
1057
- id: String,
1058
- parent_id: Option<String>,
1059
- name: String,
1060
- }
1061
-
1062
- #[derive(Debug, Default)]
1063
- struct LixFileStagedBatch {
1064
- state_rows: Vec<TransactionWriteRow>,
1065
- file_data_writes: Vec<TransactionFileData>,
1066
- count: u64,
1067
- }
1068
-
1069
- impl LixFileStagedBatch {
1070
- fn extend(&mut self, other: LixFileStagedBatch) {
1071
- self.state_rows.extend(other.state_rows);
1072
- self.file_data_writes.extend(other.file_data_writes);
1073
- self.count += other.count;
1074
- }
1075
-
1076
- fn extend_filesystem_plan(&mut self, plan: super::filesystem_planner::FilesystemWritePlan) {
1077
- self.state_rows.extend(plan.rows);
1078
- self.file_data_writes.extend(plan.file_data);
1079
- self.count += plan.count;
1080
- }
1081
-
1082
- fn extend_filesystem_delete_plan(&mut self, plan: FilesystemDeletePlan) {
1083
- self.state_rows.extend(plan.rows);
1084
- self.count += plan.count;
1085
- }
1086
- }
1087
-
1088
- #[cfg(test)]
1089
- fn lix_file_write_rows_from_batch(
1090
- batch: &RecordBatch,
1091
- version_binding: Option<&str>,
1092
- ) -> Result<Vec<TransactionWriteRow>> {
1093
- Ok(lix_file_insert_stage_from_batch(batch, version_binding)?.state_rows)
1094
- }
1095
-
1096
- fn lix_file_delete_stage_from_batch(
1097
- batch: &RecordBatch,
1098
- version_binding: Option<&str>,
1099
- blob_ref_file_ids: &BTreeSet<String>,
1100
- ) -> Result<LixFileStagedBatch> {
1101
- let mut staged = LixFileStagedBatch::default();
1102
- for row_index in 0..batch.num_rows() {
1103
- let file_id = required_string_value(batch, row_index, "id")?;
1104
- let context = file_row_context_from_batch(batch, row_index, version_binding)?;
1105
- staged.extend_filesystem_delete_plan(plan_file_delete(FileDeleteInput {
1106
- file_id: file_id.clone(),
1107
- has_blob_ref: blob_ref_file_ids.contains(&file_id),
1108
- context,
1109
- }));
1110
- }
1111
- Ok(staged)
1112
- }
1113
-
1114
- fn blob_ref_file_ids_from_live_rows(
1115
- rows: &[MaterializedLiveStateRow],
1116
- ) -> std::result::Result<BTreeSet<String>, LixError> {
1117
- let mut file_ids = BTreeSet::new();
1118
- for row in rows {
1119
- if row.schema_key != BLOB_REF_SCHEMA_KEY {
1120
- continue;
1121
- }
1122
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1123
- continue;
1124
- };
1125
- let snapshot: BlobRefSnapshot =
1126
- serde_json::from_str(snapshot_content).map_err(|error| {
1127
- LixError::new(
1128
- "LIX_ERROR_UNKNOWN",
1129
- format!("invalid lix_binary_blob_ref snapshot JSON: {error}"),
1130
- )
1131
- })?;
1132
- file_ids.insert(snapshot.id);
1133
- }
1134
- Ok(file_ids)
1135
- }
1136
-
1137
- #[cfg(test)]
1138
- fn lix_file_insert_stage_from_batch(
1139
- batch: &RecordBatch,
1140
- version_binding: Option<&str>,
1141
- ) -> Result<LixFileStagedBatch> {
1142
- lix_file_stage_from_batch_with_options(batch, version_binding, "lix_file", true, true, true)
1143
- }
1144
-
1145
- fn lix_file_insert_stage_from_batch_with_id_generator_and_path_resolvers(
1146
- batch: &RecordBatch,
1147
- version_binding: Option<&str>,
1148
- surface_name: &str,
1149
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1150
- generate_id: &mut dyn FnMut() -> String,
1151
- include_data_writes: bool,
1152
- ) -> Result<LixFileStagedBatch> {
1153
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1154
- batch,
1155
- version_binding,
1156
- surface_name,
1157
- true,
1158
- true,
1159
- include_data_writes,
1160
- Some(path_resolvers),
1161
- Some(generate_id),
1162
- )
1163
- }
1164
-
1165
- fn lix_file_insert_stage_from_batch_with_path_resolvers(
1166
- batch: &RecordBatch,
1167
- version_binding: Option<&str>,
1168
- surface_name: &str,
1169
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1170
- generate_directory_id: &mut dyn FnMut() -> String,
1171
- include_data_writes: bool,
1172
- ) -> Result<LixFileStagedBatch> {
1173
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1174
- batch,
1175
- version_binding,
1176
- surface_name,
1177
- true,
1178
- true,
1179
- include_data_writes,
1180
- Some(path_resolvers),
1181
- Some(generate_directory_id),
1182
- )
1183
- }
1184
-
1185
- fn lix_file_existing_update_stage_from_batch(
1186
- batch: &RecordBatch,
1187
- assignment_values: &UpdateAssignmentValues,
1188
- version_binding: Option<&str>,
1189
- include_descriptor_writes: bool,
1190
- include_data_writes: bool,
1191
- path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1192
- ) -> Result<LixFileStagedBatch> {
1193
- let mut staged = LixFileStagedBatch::default();
1194
- let mut path_resolvers = path_resolvers;
1195
-
1196
- for row_index in 0..batch.num_rows() {
1197
- let id = required_string_value(batch, row_index, "id")?;
1198
- let hidden = update_optional_bool_value(batch, assignment_values, row_index, "hidden")?
1199
- .unwrap_or(false);
1200
- let context =
1201
- file_row_context_from_update(batch, assignment_values, row_index, version_binding)?;
1202
-
1203
- if include_descriptor_writes {
1204
- let directory_id =
1205
- update_optional_string_value(batch, assignment_values, row_index, "directory_id")?;
1206
- let name = update_required_string_value(batch, assignment_values, row_index, "name")?;
1207
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1208
- let resolver = path_resolvers
1209
- .entry(file_path_resolver_key(&context))
1210
- .or_insert_with(DirectoryPathResolver::default);
1211
- resolver
1212
- .reserve_file(directory_id.clone(), name.clone(), id.clone())
1213
- .map_err(lix_error_to_datafusion_error)?;
1214
- }
1215
- staged
1216
- .state_rows
1217
- .push(file_descriptor_row(FileDescriptorRowInput {
1218
- id: id.clone(),
1219
- directory_id,
1220
- name,
1221
- hidden,
1222
- context: context.clone(),
1223
- }));
1224
- }
1225
-
1226
- if include_data_writes {
1227
- let data = update_required_binary_value(batch, assignment_values, row_index, "data")?;
1228
- stage_lix_file_data_write(&mut staged, id, data, context, None)?;
1229
- }
1230
-
1231
- staged.count = staged
1232
- .count
1233
- .checked_add(1)
1234
- .ok_or_else(|| DataFusionError::Execution("lix_file row count overflow".into()))?;
1235
- }
1236
-
1237
- Ok(staged)
1238
- }
1239
-
1240
- #[derive(Debug, Clone, Copy)]
1241
- struct LixFileUpdateColumns {
1242
- path: bool,
1243
- data: bool,
1244
- descriptor: bool,
1245
- }
1246
-
1247
- impl LixFileUpdateColumns {
1248
- fn from_assignments(assignments: &[(String, Arc<dyn PhysicalExpr>)]) -> Self {
1249
- let path = assignments
1250
- .iter()
1251
- .any(|(column_name, _)| column_name == "path");
1252
- let data = assignments
1253
- .iter()
1254
- .any(|(column_name, _)| column_name == "data");
1255
- let descriptor = assignments
1256
- .iter()
1257
- .any(|(column_name, _)| column_name != "path" && column_name != "data");
1258
- Self {
1259
- path,
1260
- data,
1261
- descriptor,
1262
- }
1263
- }
1264
- }
1265
-
1266
- fn lix_file_update_stage_from_batch(
1267
- batch: &RecordBatch,
1268
- assignment_values: &UpdateAssignmentValues,
1269
- version_binding: Option<&str>,
1270
- update_columns: LixFileUpdateColumns,
1271
- path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1272
- generate_directory_id: &mut dyn FnMut() -> String,
1273
- ) -> Result<LixFileStagedBatch> {
1274
- if update_columns.path || update_columns.descriptor {
1275
- let Some(path_resolvers) = path_resolvers else {
1276
- return Err(DataFusionError::Execution(
1277
- "UPDATE lix_file requires filesystem path resolver".to_string(),
1278
- ));
1279
- };
1280
- return if update_columns.path {
1281
- lix_file_path_update_stage_from_batch(
1282
- batch,
1283
- assignment_values,
1284
- version_binding,
1285
- update_columns,
1286
- path_resolvers,
1287
- generate_directory_id,
1288
- )
1289
- } else {
1290
- lix_file_existing_update_stage_from_batch(
1291
- batch,
1292
- assignment_values,
1293
- version_binding,
1294
- update_columns.descriptor,
1295
- update_columns.data,
1296
- Some(path_resolvers),
1297
- )
1298
- };
1299
- }
1300
-
1301
- lix_file_existing_update_stage_from_batch(
1302
- batch,
1303
- assignment_values,
1304
- version_binding,
1305
- update_columns.descriptor,
1306
- update_columns.data,
1307
- None,
1308
- )
1309
- }
1310
-
1311
- fn lix_file_path_update_stage_from_batch(
1312
- batch: &RecordBatch,
1313
- assignment_values: &UpdateAssignmentValues,
1314
- version_binding: Option<&str>,
1315
- update_columns: LixFileUpdateColumns,
1316
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1317
- generate_directory_id: &mut dyn FnMut() -> String,
1318
- ) -> Result<LixFileStagedBatch> {
1319
- let mut staged = LixFileStagedBatch::default();
1320
-
1321
- for row_index in 0..batch.num_rows() {
1322
- let id = required_string_value(batch, row_index, "id")?;
1323
- let path = update_required_string_value(batch, assignment_values, row_index, "path")?;
1324
- let hidden = update_optional_bool_value(batch, assignment_values, row_index, "hidden")?
1325
- .unwrap_or(false);
1326
- let context =
1327
- file_row_context_from_update(batch, assignment_values, row_index, version_binding)?;
1328
- let assigned_data = if update_columns.data {
1329
- Some(update_required_binary_value(
1330
- batch,
1331
- assignment_values,
1332
- row_index,
1333
- "data",
1334
- )?)
1335
- } else {
1336
- None
1337
- };
1338
-
1339
- let resolver = path_resolvers
1340
- .entry(file_path_resolver_key(&context))
1341
- .or_insert_with(DirectoryPathResolver::default);
1342
- let plan = plan_file_path_update(
1343
- resolver,
1344
- id.clone(),
1345
- path,
1346
- hidden,
1347
- None,
1348
- context.clone(),
1349
- generate_directory_id,
1350
- )
1351
- .map_err(lix_error_to_datafusion_error)?;
1352
- staged.extend_filesystem_plan(plan);
1353
-
1354
- if let Some(data) = assigned_data {
1355
- stage_lix_file_data_write(&mut staged, id, data, context, None)?;
1356
- }
1357
- }
1358
-
1359
- Ok(staged)
1360
- }
1361
-
1362
- #[cfg(test)]
1363
- fn lix_file_stage_from_batch_with_options(
1364
- batch: &RecordBatch,
1365
- version_binding: Option<&str>,
1366
- surface_name: &str,
1367
- reject_read_only_fields: bool,
1368
- include_descriptor_writes: bool,
1369
- include_data_writes: bool,
1370
- ) -> Result<LixFileStagedBatch> {
1371
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1372
- batch,
1373
- version_binding,
1374
- surface_name,
1375
- reject_read_only_fields,
1376
- include_descriptor_writes,
1377
- include_data_writes,
1378
- None,
1379
- None,
1380
- )
1381
- }
1382
-
1383
- fn lix_file_stage_from_batch_with_options_and_path_resolvers(
1384
- batch: &RecordBatch,
1385
- version_binding: Option<&str>,
1386
- surface_name: &str,
1387
- reject_read_only_fields: bool,
1388
- include_descriptor_writes: bool,
1389
- include_data_writes: bool,
1390
- mut path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1391
- mut generate_directory_id: Option<&mut dyn FnMut() -> String>,
1392
- ) -> Result<LixFileStagedBatch> {
1393
- let mut staged = LixFileStagedBatch::default();
1394
-
1395
- for row_index in 0..batch.num_rows() {
1396
- if reject_read_only_fields {
1397
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_entity_id")?;
1398
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_schema_key")?;
1399
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_change_id")?;
1400
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_created_at")?;
1401
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_updated_at")?;
1402
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_commit_id")?;
1403
- }
1404
-
1405
- let path = optional_string_value(batch, row_index, "path")?;
1406
- let id = optional_string_value(batch, row_index, "id")?;
1407
- let hidden = optional_bool_value(batch, row_index, "hidden")?;
1408
- let context = file_row_context_from_batch(batch, row_index, version_binding)?;
1409
- let data = if include_data_writes {
1410
- insert_optional_binary_value(batch, row_index, "data")?
1411
- } else {
1412
- None
1413
- };
1414
-
1415
- if let Some(path) = path {
1416
- reject_read_only_lix_file_insert_field(batch, row_index, "directory_id")?;
1417
- reject_read_only_lix_file_insert_field(batch, row_index, "name")?;
1418
-
1419
- let Some(path_resolvers) = path_resolvers.as_deref_mut() else {
1420
- return Err(DataFusionError::Execution(
1421
- "INSERT into lix_file with path requires directory path resolver".to_string(),
1422
- ));
1423
- };
1424
- let resolver = path_resolvers
1425
- .entry(file_path_resolver_key(&context))
1426
- .or_insert_with(DirectoryPathResolver::default);
1427
- let Some(generate_directory_id) = generate_directory_id.as_deref_mut() else {
1428
- return Err(DataFusionError::Execution(
1429
- "INSERT into lix_file with path requires directory id generator".to_string(),
1430
- ));
1431
- };
1432
- let file_id = id.unwrap_or_else(|| generate_directory_id());
1433
- let mut plan = super::filesystem_planner::plan_file_path_write(
1434
- resolver,
1435
- FilePathWriteInput {
1436
- id: Some(file_id.clone()),
1437
- path,
1438
- data,
1439
- hidden,
1440
- context,
1441
- },
1442
- generate_directory_id,
1443
- )
1444
- .map_err(lix_error_to_datafusion_error)?;
1445
- attach_lix_file_insert_origin(&mut plan.rows, surface_name, &file_id);
1446
- staged.extend_filesystem_plan(plan);
1447
- continue;
1448
- }
1449
-
1450
- let directory_id = optional_string_value(batch, row_index, "directory_id")?;
1451
- let name = required_string_value(batch, row_index, "name")?;
1452
-
1453
- let id = if data.is_some() {
1454
- match id {
1455
- Some(id) => Some(id),
1456
- None => {
1457
- let Some(generate_id) = generate_directory_id.as_deref_mut() else {
1458
- return Err(DataFusionError::Execution(
1459
- "INSERT into lix_file with data requires id generator".to_string(),
1460
- ));
1461
- };
1462
- Some(generate_id())
1463
- }
1464
- }
1465
- } else {
1466
- id
1467
- };
1468
-
1469
- if include_descriptor_writes {
1470
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1471
- if let Some(file_id) = id.as_ref() {
1472
- let resolver = path_resolvers
1473
- .entry(file_path_resolver_key(&context))
1474
- .or_insert_with(DirectoryPathResolver::default);
1475
- resolver
1476
- .reserve_file(directory_id.clone(), name.clone(), file_id.clone())
1477
- .map_err(lix_error_to_datafusion_error)?;
1478
- }
1479
- }
1480
- let mut row = file_descriptor_write_row(FileDescriptorWriteIntent {
1481
- id: id.clone(),
1482
- directory_id: directory_id.clone(),
1483
- name: name.clone(),
1484
- hidden,
1485
- context: context.clone(),
1486
- });
1487
- if let Some(file_id) = id.as_ref() {
1488
- row.origin = Some(lix_file_insert_origin(surface_name, file_id));
1489
- }
1490
- staged.state_rows.push(row);
1491
- }
1492
-
1493
- if let (Some(id), Some(data)) = (id, data) {
1494
- let origin = Some(lix_file_insert_origin(surface_name, &id));
1495
- stage_lix_file_data_write(&mut staged, id, data, context, origin)?;
1496
- }
1497
- staged.count = staged
1498
- .count
1499
- .checked_add(1)
1500
- .ok_or_else(|| DataFusionError::Execution("lix_file row count overflow".into()))?;
1501
- }
1502
-
1503
- Ok(staged)
1504
- }
1505
-
1506
- fn stage_lix_file_data_write(
1507
- staged: &mut LixFileStagedBatch,
1508
- file_id: String,
1509
- data: Vec<u8>,
1510
- context: FilesystemRowContext,
1511
- origin: Option<TransactionWriteOrigin>,
1512
- ) -> Result<()> {
1513
- let mut row = blob_ref_row(BlobRefRowInput {
1514
- file_id: file_id.clone(),
1515
- data: data.clone(),
1516
- context: FilesystemRowContext {
1517
- file_id: None,
1518
- metadata: None,
1519
- ..context.clone()
1520
- },
1521
- })
1522
- .map_err(lix_error_to_datafusion_error)?;
1523
- row.origin = origin;
1524
- staged.state_rows.push(row);
1525
- staged.file_data_writes.push(TransactionFileData {
1526
- file_id,
1527
- version_id: context.version_id,
1528
- untracked: context.untracked,
1529
- data,
1530
- });
1531
- Ok(())
1532
- }
1533
-
1534
- fn attach_lix_file_insert_origin(
1535
- rows: &mut [TransactionWriteRow],
1536
- surface_name: &str,
1537
- file_id: &str,
1538
- ) {
1539
- let origin = lix_file_insert_origin(surface_name, file_id);
1540
- for row in rows {
1541
- if row.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY || row.schema_key == BLOB_REF_SCHEMA_KEY {
1542
- row.origin = Some(origin.clone());
1543
- }
1544
- }
1545
- }
1546
-
1547
- fn lix_file_insert_origin(surface_name: &str, file_id: &str) -> TransactionWriteOrigin {
1548
- TransactionWriteOrigin {
1549
- surface: surface_name.to_string(),
1550
- operation: TransactionWriteOperation::Insert,
1551
- primary_key: Some(LogicalPrimaryKey {
1552
- columns: vec!["id".to_string()],
1553
- values: vec![file_id.to_string()],
1554
- }),
1555
- }
1556
- }
1557
-
1558
- fn file_row_context_from_batch(
1559
- batch: &RecordBatch,
1560
- row_index: usize,
1561
- version_binding: Option<&str>,
1562
- ) -> Result<FilesystemRowContext> {
1563
- let explicit_version_id = optional_string_value(batch, row_index, "lixcol_version_id")?;
1564
- let scope = resolve_write_version_scope(
1565
- optional_bool_value(batch, row_index, "lixcol_global")?,
1566
- explicit_version_id,
1567
- version_binding,
1568
- "INSERT into lix_file_by_version",
1569
- "lix_file",
1570
- )?;
1571
-
1572
- Ok(FilesystemRowContext {
1573
- version_id: scope.version_id,
1574
- global: scope.global,
1575
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1576
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1577
- metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", "lix_file")?,
1578
- })
1579
- }
1580
-
1581
- fn file_row_context_from_update(
1582
- batch: &RecordBatch,
1583
- assignment_values: &UpdateAssignmentValues,
1584
- row_index: usize,
1585
- version_binding: Option<&str>,
1586
- ) -> Result<FilesystemRowContext> {
1587
- let explicit_version_id = optional_string_value(batch, row_index, "lixcol_version_id")?;
1588
- let scope = resolve_write_version_scope(
1589
- optional_bool_value(batch, row_index, "lixcol_global")?,
1590
- explicit_version_id,
1591
- version_binding,
1592
- "UPDATE into lix_file_by_version",
1593
- "lix_file",
1594
- )?;
1595
-
1596
- Ok(FilesystemRowContext {
1597
- version_id: scope.version_id,
1598
- global: scope.global,
1599
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1600
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1601
- metadata: update_optional_metadata_value(
1602
- batch,
1603
- assignment_values,
1604
- row_index,
1605
- "lixcol_metadata",
1606
- "lix_file",
1607
- )?,
1608
- })
1609
- }
1610
-
1611
- fn file_path_resolver_key(context: &FilesystemRowContext) -> String {
1612
- filesystem_storage_scope_key(
1613
- &context.version_id,
1614
- context.global,
1615
- context.untracked,
1616
- context.file_id.as_deref(),
1617
- )
1618
- }
1619
-
1620
- async fn file_path_resolvers_from_live_state(
1621
- live_state: Arc<dyn LiveStateReader>,
1622
- version_binding: Option<&str>,
1623
- ) -> std::result::Result<BTreeMap<String, DirectoryPathResolver>, LixError> {
1624
- let rows = live_state
1625
- .scan_rows(&LiveStateScanRequest {
1626
- filter: LiveStateFilter {
1627
- schema_keys: vec![
1628
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
1629
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1630
- ],
1631
- version_ids: version_binding
1632
- .map(|version_id| vec![version_id.to_string()])
1633
- .unwrap_or_default(),
1634
- ..Default::default()
1635
- },
1636
- ..Default::default()
1637
- })
1638
- .await?;
1639
- let mut resolvers = directory_path_resolvers_from_state_rows(rows)?;
1640
- if let Some(version_id) = version_binding {
1641
- let key = filesystem_storage_scope_key(version_id, false, false, None);
1642
- resolvers
1643
- .entry(key)
1644
- .or_insert_with(DirectoryPathResolver::default);
1645
- }
1646
- Ok(resolvers)
1647
- }
1648
-
1649
- async fn lix_file_record_batch(
1650
- schema: &SchemaRef,
1651
- blob_reader: &Arc<dyn BlobDataReader>,
1652
- rows: Vec<MaterializedLiveStateRow>,
1653
- ) -> Result<RecordBatch, LixError> {
1654
- let projected_columns = schema
1655
- .fields()
1656
- .iter()
1657
- .map(|field| field.name().as_str())
1658
- .collect::<Vec<_>>();
1659
- let needs_data = projected_columns
1660
- .iter()
1661
- .any(|column_name| *column_name == "data");
1662
-
1663
- let mut file_rows = BTreeMap::<(String, String), FileDescriptorRecord>::new();
1664
- let mut blob_rows = BTreeMap::<(String, String), BlobRefRecord>::new();
1665
- let mut directory_rows = Vec::<DirectoryDescriptorRecord>::new();
1666
-
1667
- for row in rows {
1668
- match row.schema_key.as_str() {
1669
- FILE_DESCRIPTOR_SCHEMA_KEY => {
1670
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1671
- continue;
1672
- };
1673
- let snapshot: FileDescriptorSnapshot = serde_json::from_str(snapshot_content)
1674
- .map_err(|error| {
1675
- LixError::new(
1676
- "LIX_ERROR_UNKNOWN",
1677
- format!("invalid lix_file_descriptor snapshot JSON: {error}"),
1678
- )
1679
- })?;
1680
- file_rows.insert(
1681
- (row.version_id.clone(), snapshot.id.clone()),
1682
- FileDescriptorRecord {
1683
- id: snapshot.id,
1684
- directory_id: snapshot.directory_id,
1685
- name: snapshot.name,
1686
- hidden: snapshot.hidden,
1687
- live: row,
1688
- },
1689
- );
1690
- }
1691
- BLOB_REF_SCHEMA_KEY => {
1692
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1693
- continue;
1694
- };
1695
- let snapshot: BlobRefSnapshot =
1696
- serde_json::from_str(snapshot_content).map_err(|error| {
1697
- LixError::new(
1698
- "LIX_ERROR_UNKNOWN",
1699
- format!("invalid lix_binary_blob_ref snapshot JSON: {error}"),
1700
- )
1701
- })?;
1702
- blob_rows.insert(
1703
- (row.version_id.clone(), snapshot.id.clone()),
1704
- BlobRefRecord {
1705
- blob_hash: snapshot.blob_hash,
1706
- },
1707
- );
1708
- }
1709
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY => {
1710
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1711
- continue;
1712
- };
1713
- let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
1714
- .map_err(|error| {
1715
- LixError::new(
1716
- "LIX_ERROR_UNKNOWN",
1717
- format!("invalid lix_directory_descriptor snapshot JSON: {error}"),
1718
- )
1719
- })?;
1720
- directory_rows.push(DirectoryDescriptorRecord {
1721
- id: snapshot.id,
1722
- parent_id: snapshot.parent_id,
1723
- name: snapshot.name,
1724
- version_id: row.version_id,
1725
- });
1726
- }
1727
- _ => {}
1728
- }
1729
- }
1730
-
1731
- let directory_paths = derive_directory_paths(&directory_rows)?;
1732
- let mut ids = Vec::new();
1733
- let mut paths = Vec::new();
1734
- let mut directory_ids = Vec::new();
1735
- let mut names = Vec::new();
1736
- let mut hiddens = Vec::new();
1737
- let mut data_values = Vec::new();
1738
- let mut entity_ids = Vec::new();
1739
- let mut schema_keys = Vec::new();
1740
- let mut file_ids = Vec::new();
1741
- let mut globals = Vec::new();
1742
- let mut change_ids = Vec::new();
1743
- let mut created_ats = Vec::new();
1744
- let mut updated_ats = Vec::new();
1745
- let mut commit_ids = Vec::new();
1746
- let mut untracked_values = Vec::new();
1747
- let mut metadata_values = Vec::new();
1748
- let mut version_ids = Vec::new();
1749
-
1750
- for ((version_id, _), file) in file_rows {
1751
- let directory_path = match file.directory_id.as_ref() {
1752
- Some(directory_id) => {
1753
- let key = (version_id.clone(), directory_id.clone());
1754
- let Some(path) = directory_paths.get(&key).cloned() else {
1755
- return Err(LixError::new(
1756
- LixError::CODE_FOREIGN_KEY,
1757
- format!(
1758
- "lix_file_descriptor '{}' references missing directory_id '{}' in version '{}'",
1759
- file.id, directory_id, version_id
1760
- ),
1761
- ));
1762
- };
1763
- Some(path)
1764
- }
1765
- None => None,
1766
- };
1767
- let path = match directory_path {
1768
- Some(directory_path) => format!("{directory_path}{}", file.name),
1769
- None => format!("/{}", file.name),
1770
- };
1771
- let data = if needs_data {
1772
- match blob_rows.get(&(version_id.clone(), file.id.clone())) {
1773
- Some(blob_ref) => load_single_blob_bytes(blob_reader, &blob_ref.blob_hash).await?,
1774
- None => None,
1775
- }
1776
- } else {
1777
- None
1778
- };
1779
-
1780
- ids.push(Some(file.id));
1781
- paths.push(Some(path));
1782
- directory_ids.push(file.directory_id);
1783
- names.push(Some(file.name));
1784
- hiddens.push(Some(file.hidden));
1785
- data_values.push(data);
1786
- entity_ids.push(Some(file.live.entity_id.as_json_array_text()?));
1787
- schema_keys.push(Some(file.live.schema_key));
1788
- file_ids.push(file.live.file_id);
1789
- globals.push(Some(file.live.global));
1790
- change_ids.push(file.live.change_id);
1791
- created_ats.push(file.live.created_at);
1792
- updated_ats.push(file.live.updated_at);
1793
- commit_ids.push(file.live.commit_id);
1794
- untracked_values.push(Some(file.live.untracked));
1795
- metadata_values.push(file.live.metadata.as_ref().map(serialize_row_metadata));
1796
- version_ids.push(Some(version_id));
1797
- }
1798
-
1799
- let mut columns = Vec::<ArrayRef>::with_capacity(schema.fields().len());
1800
- for field in schema.fields() {
1801
- let array: ArrayRef = match field.name().as_str() {
1802
- "id" => Arc::new(StringArray::from(ids.clone())),
1803
- "path" => Arc::new(StringArray::from(paths.clone())),
1804
- "directory_id" => Arc::new(StringArray::from(directory_ids.clone())),
1805
- "name" => Arc::new(StringArray::from(names.clone())),
1806
- "hidden" => Arc::new(BooleanArray::from(hiddens.clone())),
1807
- "data" => Arc::new(BinaryArray::from(
1808
- data_values
1809
- .iter()
1810
- .map(|value| value.as_deref())
1811
- .collect::<Vec<_>>(),
1812
- )),
1813
- "lixcol_entity_id" => Arc::new(StringArray::from(entity_ids.clone())),
1814
- "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1815
- "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1816
- "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1817
- "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1818
- "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
1819
- "lixcol_updated_at" => Arc::new(StringArray::from(updated_ats.clone())),
1820
- "lixcol_commit_id" => Arc::new(StringArray::from(commit_ids.clone())),
1821
- "lixcol_untracked" => Arc::new(BooleanArray::from(untracked_values.clone())),
1822
- "lixcol_metadata" => Arc::new(StringArray::from(metadata_values.clone())),
1823
- "lixcol_version_id" => Arc::new(StringArray::from(version_ids.clone())),
1824
- other => {
1825
- return Err(LixError::new(
1826
- "LIX_ERROR_UNKNOWN",
1827
- format!("sql2 lix_file provider does not support projected column '{other}'"),
1828
- ))
1829
- }
1830
- };
1831
- columns.push(array);
1832
- }
1833
-
1834
- let options = RecordBatchOptions::new().with_row_count(Some(ids.len()));
1835
- RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
1836
- LixError::new(
1837
- "LIX_ERROR_UNKNOWN",
1838
- format!("sql2 failed to build lix_file record batch: {error}"),
1839
- )
1840
- })
1841
- }
1842
-
1843
- async fn load_single_blob_bytes(
1844
- blob_reader: &Arc<dyn BlobDataReader>,
1845
- blob_hash: &str,
1846
- ) -> Result<Option<Vec<u8>>, LixError> {
1847
- let hash = BlobHash::from_hex(blob_hash)?;
1848
- Ok(blob_reader
1849
- .load_bytes_many(&[hash])
1850
- .await?
1851
- .into_vec()
1852
- .into_iter()
1853
- .next()
1854
- .flatten())
1855
- }
1856
-
1857
- fn derive_directory_paths(
1858
- rows: &[DirectoryDescriptorRecord],
1859
- ) -> Result<BTreeMap<(String, String), String>, LixError> {
1860
- let mut by_version = BTreeMap::<String, BTreeMap<String, &DirectoryDescriptorRecord>>::new();
1861
- for row in rows {
1862
- by_version
1863
- .entry(row.version_id.clone())
1864
- .or_default()
1865
- .insert(row.id.clone(), row);
1866
- }
1867
-
1868
- let mut paths = BTreeMap::<(String, String), String>::new();
1869
- for (version_id, records) in by_version {
1870
- for directory_id in records.keys() {
1871
- derive_directory_path_for(
1872
- &version_id,
1873
- directory_id,
1874
- &records,
1875
- &mut paths,
1876
- &mut BTreeSet::new(),
1877
- )?;
1878
- }
1879
- }
1880
- Ok(paths)
1881
- }
1882
-
1883
- fn derive_directory_path_for(
1884
- version_id: &str,
1885
- directory_id: &str,
1886
- records: &BTreeMap<String, &DirectoryDescriptorRecord>,
1887
- paths: &mut BTreeMap<(String, String), String>,
1888
- visiting: &mut BTreeSet<String>,
1889
- ) -> Result<Option<String>, LixError> {
1890
- if let Some(path) = paths.get(&(version_id.to_string(), directory_id.to_string())) {
1891
- return Ok(Some(path.clone()));
1892
- }
1893
- if !visiting.insert(directory_id.to_string()) {
1894
- return Err(directory_parent_cycle_error(version_id, directory_id));
1895
- }
1896
- let Some(row) = records.get(directory_id) else {
1897
- visiting.remove(directory_id);
1898
- return Ok(None);
1899
- };
1900
- let path = match row.parent_id.as_deref() {
1901
- Some(parent_id) => {
1902
- let Some(parent_path) =
1903
- derive_directory_path_for(version_id, parent_id, records, paths, visiting)?
1904
- else {
1905
- visiting.remove(directory_id);
1906
- return Ok(None);
1907
- };
1908
- format!("{parent_path}{}/", row.name)
1909
- }
1910
- None => format!("/{}/", row.name),
1911
- };
1912
- visiting.remove(directory_id);
1913
- paths.insert(
1914
- (version_id.to_string(), directory_id.to_string()),
1915
- path.clone(),
1916
- );
1917
- Ok(Some(path))
1918
- }
1919
-
1920
- fn directory_parent_cycle_error(version_id: &str, directory_id: &str) -> LixError {
1921
- LixError::new(
1922
- LixError::CODE_CONSTRAINT_VIOLATION,
1923
- format!(
1924
- "lix_directory_descriptor parent_id cycle in version '{version_id}' while resolving directory '{directory_id}'"
1925
- ),
1926
- )
1927
- }
1928
-
1929
- fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1930
- let fields = match projection {
1931
- Some(indices) => indices
1932
- .iter()
1933
- .map(|index| base_schema.field(*index).as_ref().clone())
1934
- .collect::<Vec<_>>(),
1935
- None => base_schema
1936
- .fields()
1937
- .iter()
1938
- .map(|field| field.as_ref().clone())
1939
- .collect::<Vec<_>>(),
1940
- };
1941
- Ok(Arc::new(Schema::new(fields)))
1942
- }
1943
-
1944
- fn lix_file_scan_request(
1945
- version_binding: Option<&str>,
1946
- projected_schema: Option<&Schema>,
1947
- limit: Option<usize>,
1948
- ) -> LiveStateScanRequest {
1949
- LiveStateScanRequest {
1950
- filter: LiveStateFilter {
1951
- schema_keys: vec![
1952
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1953
- BLOB_REF_SCHEMA_KEY.to_string(),
1954
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
1955
- ],
1956
- version_ids: version_binding
1957
- .map(|version_id| vec![version_id.to_string()])
1958
- .unwrap_or_default(),
1959
- ..LiveStateFilter::default()
1960
- },
1961
- projection: lix_file_live_state_projection(projected_schema),
1962
- limit,
1963
- }
1964
- }
1965
-
1966
- fn lix_file_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1967
- let Some(schema) = projected_schema else {
1968
- return LiveStateProjection::default();
1969
- };
1970
- let mut columns = Vec::new();
1971
- let needs_snapshot = schema.fields().iter().any(|field| {
1972
- matches!(
1973
- field.name().as_str(),
1974
- "path" | "directory_id" | "name" | "hidden" | "data"
1975
- )
1976
- });
1977
- if needs_snapshot {
1978
- columns.push("snapshot_content".to_string());
1979
- }
1980
- if schema
1981
- .fields()
1982
- .iter()
1983
- .any(|field| field.name() == "lixcol_metadata")
1984
- {
1985
- columns.push("metadata".to_string());
1986
- }
1987
- LiveStateProjection { columns }
1988
- }
1989
-
1990
- async fn scan_lix_file_live_rows(
1991
- live_state: Arc<dyn LiveStateReader>,
1992
- request: &LiveStateScanRequest,
1993
- target_file_ids: &FileIdConstraint,
1994
- ) -> std::result::Result<Vec<MaterializedLiveStateRow>, LixError> {
1995
- let target_file_ids = match target_file_ids {
1996
- FileIdConstraint::All => return live_state.scan_rows(request).await,
1997
- FileIdConstraint::None => return Ok(Vec::new()),
1998
- FileIdConstraint::Ids(target_file_ids) => target_file_ids,
1999
- };
2000
-
2001
- let mut file_request = request.clone();
2002
- file_request.filter.schema_keys = vec![
2003
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2004
- BLOB_REF_SCHEMA_KEY.to_string(),
2005
- ];
2006
- file_request.filter.entity_ids = target_file_ids
2007
- .iter()
2008
- .map(|file_id| EntityIdentity::single(file_id.clone()))
2009
- .collect();
2010
-
2011
- let mut rows = live_state.scan_rows(&file_request).await?;
2012
-
2013
- let mut directory_request = request.clone();
2014
- directory_request.filter.schema_keys = vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()];
2015
- directory_request.filter.entity_ids.clear();
2016
- directory_request.limit = None;
2017
- rows.extend(live_state.scan_rows(&directory_request).await?);
2018
-
2019
- Ok(rows)
2020
- }
2021
-
2022
- #[derive(Debug, Clone, PartialEq, Eq)]
2023
- enum FileIdConstraint {
2024
- All,
2025
- None,
2026
- Ids(BTreeSet<String>),
2027
- }
2028
-
2029
- impl FileIdConstraint {
2030
- fn from_ids(ids: Vec<String>) -> Self {
2031
- let ids = ids.into_iter().collect::<BTreeSet<_>>();
2032
- if ids.is_empty() {
2033
- Self::None
2034
- } else {
2035
- Self::Ids(ids)
2036
- }
2037
- }
2038
-
2039
- fn intersect(self, other: Self) -> Self {
2040
- match (self, other) {
2041
- (Self::None, _) | (_, Self::None) => Self::None,
2042
- (Self::All, constraint) | (constraint, Self::All) => constraint,
2043
- (Self::Ids(left), Self::Ids(right)) => {
2044
- let ids = left.intersection(&right).cloned().collect::<BTreeSet<_>>();
2045
- if ids.is_empty() {
2046
- Self::None
2047
- } else {
2048
- Self::Ids(ids)
2049
- }
2050
- }
2051
- }
2052
- }
2053
-
2054
- fn union(self, other: Self) -> Self {
2055
- match (self, other) {
2056
- (Self::All, _) | (_, Self::All) => Self::All,
2057
- (Self::None, constraint) | (constraint, Self::None) => constraint,
2058
- (Self::Ids(mut left), Self::Ids(right)) => {
2059
- left.extend(right);
2060
- Self::Ids(left)
2061
- }
2062
- }
2063
- }
2064
- }
2065
-
2066
- fn file_id_constraint_from_filters(filters: &[Expr]) -> Result<FileIdConstraint> {
2067
- let analyzer = LixFileIdFilterAnalyzer;
2068
- let mut constraint = FileIdConstraint::All;
2069
- for filter in filters {
2070
- if let Some(filter_constraint) = analyzer.analyze(filter)? {
2071
- constraint = constraint.intersect(filter_constraint);
2072
- }
2073
- }
2074
- Ok(constraint)
2075
- }
2076
-
2077
- struct LixFileIdFilterAnalyzer;
2078
-
2079
- impl LixFileIdFilterAnalyzer {
2080
- fn supports(&self, expr: &Expr) -> bool {
2081
- self.analyze(expr)
2082
- .is_ok_and(|constraint| constraint.is_some())
2083
- }
2084
-
2085
- fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2086
- ExactStringColumnFilterAnalyzer::new("id").analyze(expr)
2087
- }
2088
- }
2089
-
2090
- struct ExactStringColumnFilterAnalyzer {
2091
- column_name: &'static str,
2092
- }
2093
-
2094
- impl ExactStringColumnFilterAnalyzer {
2095
- fn new(column_name: &'static str) -> Self {
2096
- Self { column_name }
2097
- }
2098
-
2099
- fn supports(&self, expr: &Expr) -> bool {
2100
- self.analyze(expr)
2101
- .is_ok_and(|constraint| constraint.is_some())
2102
- }
2103
-
2104
- fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2105
- match expr {
2106
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
2107
- let Some(left) = self.analyze(&binary_expr.left)? else {
2108
- return Ok(None);
2109
- };
2110
- let Some(right) = self.analyze(&binary_expr.right)? else {
2111
- return Ok(None);
2112
- };
2113
- Ok(Some(left.intersect(right)))
2114
- }
2115
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
2116
- let Some(left) = self.analyze(&binary_expr.left)? else {
2117
- return Ok(None);
2118
- };
2119
- let Some(right) = self.analyze(&binary_expr.right)? else {
2120
- return Ok(None);
2121
- };
2122
- Ok(Some(left.union(right)))
2123
- }
2124
- Expr::BinaryExpr(binary_expr) => Ok(self
2125
- .value_from_binary_filter(binary_expr)
2126
- .map(|value| FileIdConstraint::Ids(BTreeSet::from([value])))),
2127
- Expr::InList(in_list) => Ok(self
2128
- .values_from_in_list_filter(in_list)
2129
- .map(FileIdConstraint::from_ids)),
2130
- _ => Ok(None),
2131
- }
2132
- }
2133
-
2134
- fn value_from_binary_filter(&self, binary_expr: &BinaryExpr) -> Option<String> {
2135
- if binary_expr.op != Operator::Eq {
2136
- return None;
2137
- }
2138
- self.value_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
2139
- .or_else(|| {
2140
- self.value_from_column_literal_filter(&binary_expr.right, &binary_expr.left)
2141
- })
2142
- }
2143
-
2144
- fn values_from_in_list_filter(&self, in_list: &InList) -> Option<Vec<String>> {
2145
- if in_list.negated {
2146
- return None;
2147
- }
2148
- let Expr::Column(column) = in_list.expr.as_ref() else {
2149
- return None;
2150
- };
2151
- if column.name != self.column_name {
2152
- return None;
2153
- }
2154
- let values = in_list
2155
- .list
2156
- .iter()
2157
- .map(string_expr_literal)
2158
- .collect::<Option<Vec<_>>>()?;
2159
- Some(values)
2160
- }
2161
-
2162
- fn value_from_column_literal_filter(
2163
- &self,
2164
- column_expr: &Expr,
2165
- literal_expr: &Expr,
2166
- ) -> Option<String> {
2167
- let Expr::Column(column) = column_expr else {
2168
- return None;
2169
- };
2170
- if column.name != self.column_name {
2171
- return None;
2172
- }
2173
- string_expr_literal(literal_expr)
2174
- }
2175
- }
2176
-
2177
- fn string_expr_literal(expr: &Expr) -> Option<String> {
2178
- let Expr::Literal(literal, _) = expr else {
2179
- return None;
2180
- };
2181
- match literal {
2182
- ScalarValue::Utf8(Some(value))
2183
- | ScalarValue::Utf8View(Some(value))
2184
- | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
2185
- _ => None,
2186
- }
2187
- }
2188
-
2189
- fn contains_column(expr: &Expr, column_name: &str) -> bool {
2190
- match expr {
2191
- Expr::Column(column) => column.name == column_name,
2192
- Expr::BinaryExpr(binary_expr) => {
2193
- contains_column(&binary_expr.left, column_name)
2194
- || contains_column(&binary_expr.right, column_name)
2195
- }
2196
- Expr::InList(in_list) => {
2197
- contains_column(&in_list.expr, column_name)
2198
- || in_list
2199
- .list
2200
- .iter()
2201
- .any(|expr| contains_column(expr, column_name))
2202
- }
2203
- Expr::Between(between) => {
2204
- contains_column(&between.expr, column_name)
2205
- || contains_column(&between.low, column_name)
2206
- || contains_column(&between.high, column_name)
2207
- }
2208
- Expr::Not(expr) | Expr::IsNull(expr) | Expr::IsNotNull(expr) => {
2209
- contains_column(expr, column_name)
2210
- }
2211
- Expr::Negative(expr) => contains_column(expr, column_name),
2212
- _ => false,
2213
- }
2214
- }
2215
-
2216
- fn validate_lix_file_update_assignments(
2217
- schema: &SchemaRef,
2218
- assignments: &[(String, Expr)],
2219
- ) -> Result<()> {
2220
- for (column_name, expr) in assignments {
2221
- schema.field_with_name(column_name).map_err(|_| {
2222
- DataFusionError::Plan(format!(
2223
- "UPDATE lix_file failed: column '{column_name}' does not exist"
2224
- ))
2225
- })?;
2226
- if !matches!(
2227
- column_name.as_str(),
2228
- "path" | "directory_id" | "name" | "hidden" | "data" | "lixcol_metadata"
2229
- ) {
2230
- return Err(DataFusionError::Execution(format!(
2231
- "UPDATE lix_file cannot stage read-only column '{column_name}'"
2232
- )));
2233
- }
2234
- if column_name == "data" {
2235
- reject_non_binary_lix_file_data_assignment(expr)?;
2236
- }
2237
- }
2238
- Ok(())
2239
- }
2240
-
2241
- fn reject_non_binary_lix_file_data_assignment(expr: &Expr) -> Result<()> {
2242
- match expr {
2243
- Expr::Literal(value, _) => {
2244
- if !scalar_is_binary_or_null(value) {
2245
- return Err(non_binary_lix_file_data_assignment_error());
2246
- }
2247
- }
2248
- Expr::Cast(cast) if is_binary_type(&cast.data_type) => {
2249
- if !logical_expr_is_binary_or_null(&cast.expr) {
2250
- return Err(non_binary_lix_file_data_assignment_error());
2251
- }
2252
- }
2253
- _ => {}
2254
- }
2255
-
2256
- Ok(())
2257
- }
2258
-
2259
- fn non_binary_lix_file_data_assignment_error() -> DataFusionError {
2260
- lix_file_data_type_error(
2261
- "UPDATE lix_file",
2262
- "data",
2263
- "use X'...' or a binary parameter for file contents",
2264
- )
2265
- }
2266
-
2267
- fn filter_lix_file_batch(
2268
- batch: RecordBatch,
2269
- filters: &[Arc<dyn PhysicalExpr>],
2270
- ) -> Result<RecordBatch> {
2271
- let Some(mask) = evaluate_lix_file_filters(&batch, filters)? else {
2272
- return Ok(batch);
2273
- };
2274
- Ok(filter_record_batch(&batch, &mask)?)
2275
- }
2276
-
2277
- fn evaluate_lix_file_filters(
2278
- batch: &RecordBatch,
2279
- filters: &[Arc<dyn PhysicalExpr>],
2280
- ) -> Result<Option<BooleanArray>> {
2281
- if filters.is_empty() {
2282
- return Ok(None);
2283
- }
2284
-
2285
- let mut combined_mask: Option<BooleanArray> = None;
2286
- for filter in filters {
2287
- let result = filter.evaluate(batch)?;
2288
- let array = result.into_array(batch.num_rows())?;
2289
- let bool_array = array
2290
- .as_any()
2291
- .downcast_ref::<BooleanArray>()
2292
- .ok_or_else(|| {
2293
- DataFusionError::Execution("lix_file filter was not boolean".to_string())
2294
- })?;
2295
- let normalized = bool_array
2296
- .iter()
2297
- .map(|value| Some(value == Some(true)))
2298
- .collect::<BooleanArray>();
2299
- combined_mask = Some(match combined_mask {
2300
- Some(existing) => and(&existing, &normalized)?,
2301
- None => normalized,
2302
- });
2303
- }
2304
- Ok(combined_mask)
2305
- }
2306
-
2307
- fn dml_count_schema() -> SchemaRef {
2308
- Arc::new(Schema::new(vec![Field::new(
2309
- "count",
2310
- DataType::UInt64,
2311
- false,
2312
- )]))
2313
- }
2314
-
2315
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
2316
- RecordBatch::try_new(
2317
- schema,
2318
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
2319
- )
2320
- .map_err(DataFusionError::from)
2321
- }
2322
-
2323
- fn record_batch_has_non_null_column(batch: &RecordBatch, column_name: &str) -> Result<bool> {
2324
- for row_index in 0..batch.num_rows() {
2325
- if optional_scalar_value(batch, row_index, column_name)?
2326
- .is_some_and(|value| !value.is_null())
2327
- {
2328
- return Ok(true);
2329
- }
2330
- }
2331
- Ok(false)
2332
- }
2333
-
2334
- fn reject_read_only_lix_file_insert_field(
2335
- batch: &RecordBatch,
2336
- row_index: usize,
2337
- column_name: &str,
2338
- ) -> Result<()> {
2339
- if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
2340
- return Err(DataFusionError::Execution(format!(
2341
- "INSERT into lix_file cannot stage read-only column '{column_name}'"
2342
- )));
2343
- }
2344
- Ok(())
2345
- }
2346
-
2347
- fn required_string_value(
2348
- batch: &RecordBatch,
2349
- row_index: usize,
2350
- column_name: &str,
2351
- ) -> Result<String> {
2352
- optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
2353
- DataFusionError::Execution(format!(
2354
- "INSERT into lix_file requires non-null text column '{column_name}'"
2355
- ))
2356
- })
2357
- }
2358
-
2359
- fn update_required_string_value(
2360
- batch: &RecordBatch,
2361
- assignment_values: &UpdateAssignmentValues,
2362
- row_index: usize,
2363
- column_name: &str,
2364
- ) -> Result<String> {
2365
- update_optional_string_value(batch, assignment_values, row_index, column_name)?.ok_or_else(
2366
- || {
2367
- DataFusionError::Execution(format!(
2368
- "UPDATE lix_file requires non-null text column '{column_name}'"
2369
- ))
2370
- },
2371
- )
2372
- }
2373
-
2374
- fn update_optional_string_value(
2375
- batch: &RecordBatch,
2376
- assignment_values: &UpdateAssignmentValues,
2377
- row_index: usize,
2378
- column_name: &str,
2379
- ) -> Result<Option<String>> {
2380
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
2381
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
2382
- InsertCell::Provided(SqlCell::Value(
2383
- ScalarValue::Utf8(Some(value))
2384
- | ScalarValue::Utf8View(Some(value))
2385
- | ScalarValue::LargeUtf8(Some(value)),
2386
- )) => Ok(Some(value)),
2387
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
2388
- "UPDATE lix_file expected text-compatible column '{column_name}', got {other:?}"
2389
- ))),
2390
- }
2391
- }
2392
-
2393
- fn update_optional_metadata_value(
2394
- batch: &RecordBatch,
2395
- assignment_values: &UpdateAssignmentValues,
2396
- row_index: usize,
2397
- column_name: &str,
2398
- context: &str,
2399
- ) -> Result<Option<TransactionJson>> {
2400
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
2401
- .map(|value| {
2402
- let metadata = parse_row_metadata_value(&value, context)
2403
- .map_err(super::error::lix_error_to_datafusion_error)?;
2404
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
2405
- .map_err(super::error::lix_error_to_datafusion_error)
2406
- })
2407
- .transpose()
2408
- }
2409
-
2410
- fn update_optional_bool_value(
2411
- batch: &RecordBatch,
2412
- assignment_values: &UpdateAssignmentValues,
2413
- row_index: usize,
2414
- column_name: &str,
2415
- ) -> Result<Option<bool>> {
2416
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
2417
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
2418
- InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(Some(value)),
2419
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
2420
- "UPDATE lix_file expected boolean column '{column_name}', got {other:?}"
2421
- ))),
2422
- }
2423
- }
2424
-
2425
- fn update_required_binary_value(
2426
- _batch: &RecordBatch,
2427
- assignment_values: &UpdateAssignmentValues,
2428
- row_index: usize,
2429
- column_name: &str,
2430
- ) -> Result<Vec<u8>> {
2431
- match assignment_values.assigned_cell(row_index, column_name)? {
2432
- UpdateCell::Unassigned | UpdateCell::Assigned(SqlCell::Null) => {
2433
- Err(lix_file_data_type_error(
2434
- "UPDATE lix_file",
2435
- column_name,
2436
- "use X'' for an empty file or omit data to leave contents unchanged",
2437
- ))
2438
- }
2439
- UpdateCell::Assigned(SqlCell::Value(ScalarValue::Binary(Some(value))))
2440
- | UpdateCell::Assigned(SqlCell::Value(ScalarValue::LargeBinary(Some(value)))) => Ok(value),
2441
- UpdateCell::Assigned(SqlCell::Value(ScalarValue::FixedSizeBinary(_, Some(value)))) => {
2442
- Ok(value)
2443
- }
2444
- UpdateCell::Assigned(SqlCell::Value(other)) => Err(lix_file_data_type_error_with_value(
2445
- "UPDATE lix_file",
2446
- column_name,
2447
- &other,
2448
- "use X'...' or a binary parameter for file contents",
2449
- )),
2450
- }
2451
- }
2452
-
2453
- fn optional_string_value(
2454
- batch: &RecordBatch,
2455
- row_index: usize,
2456
- column_name: &str,
2457
- ) -> Result<Option<String>> {
2458
- match optional_scalar_value(batch, row_index, column_name)? {
2459
- None
2460
- | Some(ScalarValue::Null)
2461
- | Some(ScalarValue::Utf8(None))
2462
- | Some(ScalarValue::Utf8View(None))
2463
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
2464
- Some(ScalarValue::Utf8(Some(value)))
2465
- | Some(ScalarValue::Utf8View(Some(value)))
2466
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
2467
- Some(other) => Err(DataFusionError::Execution(format!(
2468
- "INSERT into lix_file expected text-compatible column '{column_name}', got {other:?}"
2469
- ))),
2470
- }
2471
- }
2472
-
2473
- fn optional_metadata_value(
2474
- batch: &RecordBatch,
2475
- row_index: usize,
2476
- column_name: &str,
2477
- context: &str,
2478
- ) -> Result<Option<TransactionJson>> {
2479
- optional_string_value(batch, row_index, column_name)?
2480
- .map(|value| {
2481
- let metadata = parse_row_metadata_value(&value, context)
2482
- .map_err(super::error::lix_error_to_datafusion_error)?;
2483
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
2484
- .map_err(super::error::lix_error_to_datafusion_error)
2485
- })
2486
- .transpose()
2487
- }
2488
-
2489
- fn optional_bool_value(
2490
- batch: &RecordBatch,
2491
- row_index: usize,
2492
- column_name: &str,
2493
- ) -> Result<Option<bool>> {
2494
- match optional_scalar_value(batch, row_index, column_name)? {
2495
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
2496
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
2497
- Some(other) => Err(DataFusionError::Execution(format!(
2498
- "INSERT into lix_file expected boolean column '{column_name}', got {other:?}"
2499
- ))),
2500
- }
2501
- }
2502
-
2503
- fn insert_optional_binary_value(
2504
- batch: &RecordBatch,
2505
- row_index: usize,
2506
- column_name: &str,
2507
- ) -> Result<Option<Vec<u8>>> {
2508
- match optional_scalar_value(batch, row_index, column_name)? {
2509
- None => Ok(None),
2510
- Some(ScalarValue::Null)
2511
- | Some(ScalarValue::Binary(None))
2512
- | Some(ScalarValue::LargeBinary(None))
2513
- | Some(ScalarValue::FixedSizeBinary(_, None)) => Err(lix_file_data_type_error(
2514
- "INSERT into lix_file",
2515
- column_name,
2516
- "use X'' for an empty file or omit data to create a descriptor without contents",
2517
- )),
2518
- Some(ScalarValue::Binary(Some(value))) | Some(ScalarValue::LargeBinary(Some(value))) => {
2519
- Ok(Some(value))
2520
- }
2521
- Some(ScalarValue::FixedSizeBinary(_, Some(value))) => Ok(Some(value)),
2522
- Some(other) => Err(lix_file_data_type_error_with_value(
2523
- "INSERT into lix_file",
2524
- column_name,
2525
- &other,
2526
- "use X'...' or a binary parameter for file contents",
2527
- )),
2528
- }
2529
- }
2530
-
2531
- fn optional_scalar_value(
2532
- batch: &RecordBatch,
2533
- row_index: usize,
2534
- column_name: &str,
2535
- ) -> Result<Option<ScalarValue>> {
2536
- let schema = batch.schema();
2537
- let column_index = match schema.index_of(column_name) {
2538
- Ok(column_index) => column_index,
2539
- Err(_) => return Ok(None),
2540
- };
2541
- if row_index >= batch.num_rows() {
2542
- return Err(DataFusionError::Execution(format!(
2543
- "row index {row_index} out of bounds for lix_file batch with {} rows",
2544
- batch.num_rows()
2545
- )));
2546
- }
2547
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
2548
- .map(Some)
2549
- .map_err(|error| {
2550
- DataFusionError::Execution(format!(
2551
- "failed to decode lix_file column '{column_name}' at row {row_index}: {error}"
2552
- ))
2553
- })
2554
- }
2555
-
2556
- fn lix_file_schema() -> SchemaRef {
2557
- Arc::new(Schema::new(vec![
2558
- Field::new("id", DataType::Utf8, true),
2559
- Field::new("path", DataType::Utf8, false),
2560
- Field::new("directory_id", DataType::Utf8, true),
2561
- Field::new("name", DataType::Utf8, false),
2562
- Field::new("hidden", DataType::Boolean, true),
2563
- Field::new("data", DataType::Binary, true),
2564
- json_field("lixcol_entity_id", false),
2565
- Field::new("lixcol_schema_key", DataType::Utf8, false),
2566
- Field::new("lixcol_file_id", DataType::Utf8, true),
2567
- Field::new("lixcol_global", DataType::Boolean, true),
2568
- Field::new("lixcol_change_id", DataType::Utf8, true),
2569
- Field::new("lixcol_created_at", DataType::Utf8, true),
2570
- Field::new("lixcol_updated_at", DataType::Utf8, true),
2571
- Field::new("lixcol_commit_id", DataType::Utf8, true),
2572
- Field::new("lixcol_untracked", DataType::Boolean, true),
2573
- json_field("lixcol_metadata", true),
2574
- ]))
2575
- }
2576
-
2577
- fn lix_file_by_version_schema() -> SchemaRef {
2578
- let mut fields = lix_file_schema()
2579
- .fields()
2580
- .iter()
2581
- .map(|field| field.as_ref().clone())
2582
- .collect::<Vec<_>>();
2583
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2584
- Arc::new(Schema::new(fields))
2585
- }
2586
-
2587
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
2588
- super::error::datafusion_error_to_lix_error(error)
2589
- }
2590
-
2591
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
2592
- super::error::lix_error_to_datafusion_error(error)
2593
- }
2594
-
2595
- #[cfg(test)]
2596
- mod tests {
2597
- use std::collections::{BTreeMap, BTreeSet};
2598
- use std::sync::Arc;
2599
-
2600
- use async_trait::async_trait;
2601
- use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, StringArray};
2602
- use datafusion::arrow::datatypes::{DataType, Field, Schema};
2603
- use datafusion::arrow::record_batch::RecordBatch;
2604
- use datafusion::common::{Column, ScalarValue};
2605
- use datafusion::execution::TaskContext;
2606
- use datafusion::logical_expr::expr::InList;
2607
- use datafusion::logical_expr::lit;
2608
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
2609
- use serde_json::Value as JsonValue;
2610
-
2611
- use crate::binary_cas::BlobDataReader;
2612
- use crate::functions::{
2613
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2614
- };
2615
- use crate::live_state::MaterializedLiveStateRow;
2616
- use crate::live_state::{LiveStateReader, LiveStateRowRequest, LiveStateScanRequest};
2617
- use crate::sql2::dml::InsertSink;
2618
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2619
- use crate::transaction::types::{
2620
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
2621
- };
2622
- use crate::LixError;
2623
-
2624
- use super::{
2625
- derive_directory_path_for, lix_file_delete_stage_from_batch,
2626
- lix_file_insert_stage_from_batch, lix_file_insert_stage_from_batch_with_path_resolvers,
2627
- lix_file_write_rows_from_batch, DirectoryDescriptorRecord, LixFileInsertSink,
2628
- VersionBinding,
2629
- };
2630
-
2631
- fn test_id_generator(ids: &'static [&'static str]) -> impl FnMut() -> String {
2632
- let mut ids = ids.iter();
2633
- move || ids.next().expect("test id should exist").to_string()
2634
- }
2635
-
2636
- fn test_functions() -> FunctionProviderHandle {
2637
- SharedFunctionProvider::new(
2638
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
2639
- )
2640
- }
2641
-
2642
- fn string_literal(value: &str) -> Expr {
2643
- Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
2644
- }
2645
-
2646
- fn column(name: &str) -> Expr {
2647
- Expr::Column(Column::from_name(name))
2648
- }
2649
-
2650
- fn eq_filter(column_name: &str, value: &str) -> Expr {
2651
- Expr::BinaryExpr(BinaryExpr::new(
2652
- Box::new(column(column_name)),
2653
- Operator::Eq,
2654
- Box::new(string_literal(value)),
2655
- ))
2656
- }
2657
-
2658
- #[test]
2659
- fn file_id_filters_support_string_id_predicates() {
2660
- let analyzer = super::LixFileIdFilterAnalyzer;
2661
- let constraint = analyzer
2662
- .analyze(&Expr::InList(InList::new(
2663
- Box::new(column("id")),
2664
- vec![string_literal("file-b"), string_literal("file-a")],
2665
- false,
2666
- )))
2667
- .unwrap()
2668
- .unwrap();
2669
-
2670
- assert_eq!(
2671
- constraint,
2672
- super::FileIdConstraint::Ids(BTreeSet::from([
2673
- "file-a".to_string(),
2674
- "file-b".to_string()
2675
- ]))
2676
- );
2677
- assert!(analyzer.supports(&eq_filter("id", "file-a")));
2678
- assert!(analyzer.supports(&Expr::BinaryExpr(BinaryExpr::new(
2679
- Box::new(string_literal("file-a")),
2680
- Operator::Eq,
2681
- Box::new(column("id")),
2682
- ))));
2683
- }
2684
-
2685
- #[test]
2686
- fn file_id_filters_intersect_and_union_boolean_predicates() {
2687
- let analyzer = super::LixFileIdFilterAnalyzer;
2688
- let left = Expr::InList(InList::new(
2689
- Box::new(column("id")),
2690
- vec![string_literal("file-a"), string_literal("file-b")],
2691
- false,
2692
- ));
2693
- let right = Expr::InList(InList::new(
2694
- Box::new(column("id")),
2695
- vec![string_literal("file-b"), string_literal("file-c")],
2696
- false,
2697
- ));
2698
-
2699
- let and_constraint = analyzer
2700
- .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2701
- Box::new(left.clone()),
2702
- Operator::And,
2703
- Box::new(right.clone()),
2704
- )))
2705
- .unwrap()
2706
- .unwrap();
2707
- assert_eq!(
2708
- and_constraint,
2709
- super::FileIdConstraint::Ids(BTreeSet::from(["file-b".to_string()]))
2710
- );
2711
-
2712
- let or_constraint = analyzer
2713
- .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2714
- Box::new(left),
2715
- Operator::Or,
2716
- Box::new(right),
2717
- )))
2718
- .unwrap()
2719
- .unwrap();
2720
- assert_eq!(
2721
- or_constraint,
2722
- super::FileIdConstraint::Ids(BTreeSet::from([
2723
- "file-a".to_string(),
2724
- "file-b".to_string(),
2725
- "file-c".to_string()
2726
- ]))
2727
- );
2728
- }
2729
-
2730
- #[test]
2731
- fn file_id_filters_detect_contradictions() {
2732
- let filters = vec![Expr::BinaryExpr(BinaryExpr::new(
2733
- Box::new(eq_filter("id", "file-a")),
2734
- Operator::And,
2735
- Box::new(eq_filter("id", "file-b")),
2736
- ))];
2737
-
2738
- assert_eq!(
2739
- super::file_id_constraint_from_filters(&filters).unwrap(),
2740
- super::FileIdConstraint::None
2741
- );
2742
- }
2743
-
2744
- #[test]
2745
- fn file_id_filters_ignore_non_id_and_negated_predicates() {
2746
- let analyzer = super::LixFileIdFilterAnalyzer;
2747
-
2748
- assert!(!analyzer.supports(&eq_filter("name", "readme.md")));
2749
- assert!(!analyzer.supports(&Expr::InList(InList::new(
2750
- Box::new(column("id")),
2751
- vec![string_literal("file-a")],
2752
- true,
2753
- ))));
2754
- }
2755
-
2756
- fn lix_file_update_stage_from_batch_for_test(
2757
- batch: &RecordBatch,
2758
- version_binding: Option<&str>,
2759
- update_columns: super::LixFileUpdateColumns,
2760
- path_resolvers: Option<&mut BTreeMap<String, super::DirectoryPathResolver>>,
2761
- generate_directory_id: &mut dyn FnMut() -> String,
2762
- ) -> datafusion::common::Result<super::LixFileStagedBatch> {
2763
- let mut columns = Vec::new();
2764
- if update_columns.path {
2765
- columns.extend(["path", "hidden"]);
2766
- }
2767
- if update_columns.data {
2768
- columns.push("data");
2769
- }
2770
- if update_columns.descriptor {
2771
- columns.extend(["directory_id", "name", "hidden"]);
2772
- }
2773
- let assignment_values = super::UpdateAssignmentValues::from_batch_columns(batch, &columns);
2774
- super::lix_file_update_stage_from_batch(
2775
- batch,
2776
- &assignment_values,
2777
- version_binding,
2778
- update_columns,
2779
- path_resolvers,
2780
- generate_directory_id,
2781
- )
2782
- }
2783
-
2784
- #[derive(Default)]
2785
- struct CapturingWriteContext {
2786
- rows: Vec<MaterializedLiveStateRow>,
2787
- writes: Vec<TransactionWrite>,
2788
- }
2789
-
2790
- #[async_trait]
2791
- impl BlobDataReader for CapturingWriteContext {
2792
- async fn load_bytes_many(
2793
- &self,
2794
- hashes: &[crate::binary_cas::BlobHash],
2795
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2796
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
2797
- None;
2798
- hashes.len()
2799
- ]))
2800
- }
2801
- }
2802
-
2803
- #[async_trait]
2804
- impl SqlWriteExecutionContext for CapturingWriteContext {
2805
- fn active_version_id(&self) -> &str {
2806
- "version-b"
2807
- }
2808
-
2809
- fn functions(&self) -> FunctionProviderHandle {
2810
- test_functions()
2811
- }
2812
-
2813
- fn list_visible_schemas(&self) -> Result<Vec<JsonValue>, LixError> {
2814
- Ok(Vec::new())
2815
- }
2816
-
2817
- async fn load_bytes_many(
2818
- &mut self,
2819
- hashes: &[crate::binary_cas::BlobHash],
2820
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2821
- BlobDataReader::load_bytes_many(self, hashes).await
2822
- }
2823
-
2824
- async fn scan_live_state(
2825
- &mut self,
2826
- _request: &LiveStateScanRequest,
2827
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2828
- Ok(self.rows.clone())
2829
- }
2830
-
2831
- async fn load_version_head(
2832
- &mut self,
2833
- version_id: &str,
2834
- ) -> Result<Option<String>, LixError> {
2835
- if version_id == "ghost-version" {
2836
- return Ok(None);
2837
- }
2838
- Ok(Some(format!("commit-{version_id}")))
2839
- }
2840
-
2841
- async fn stage_write(
2842
- &mut self,
2843
- write: TransactionWrite,
2844
- ) -> Result<TransactionWriteOutcome, LixError> {
2845
- self.writes.push(write);
2846
- Ok(TransactionWriteOutcome { count: 0 })
2847
- }
2848
- }
2849
-
2850
- #[derive(Default)]
2851
- struct RowsLiveStateReader {
2852
- rows: Vec<MaterializedLiveStateRow>,
2853
- }
2854
-
2855
- #[async_trait]
2856
- impl LiveStateReader for RowsLiveStateReader {
2857
- async fn scan_rows(
2858
- &self,
2859
- _request: &LiveStateScanRequest,
2860
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2861
- Ok(self.rows.clone())
2862
- }
2863
-
2864
- async fn load_row(
2865
- &self,
2866
- _request: &LiveStateRowRequest,
2867
- ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
2868
- Ok(None)
2869
- }
2870
- }
2871
-
2872
- fn live_directory_row(
2873
- entity_id: &str,
2874
- version_id: &str,
2875
- snapshot_content: &str,
2876
- ) -> MaterializedLiveStateRow {
2877
- MaterializedLiveStateRow {
2878
- entity_id: crate::entity_identity::EntityIdentity::single(entity_id),
2879
- schema_key: super::DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
2880
- file_id: None,
2881
- snapshot_content: Some(snapshot_content.to_string()),
2882
- metadata: None,
2883
- deleted: false,
2884
- version_id: version_id.to_string(),
2885
- change_id: Some(format!("change-{entity_id}")),
2886
- commit_id: Some(format!("commit-{entity_id}")),
2887
- global: false,
2888
- untracked: false,
2889
- created_at: "2026-04-23T00:00:00Z".to_string(),
2890
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2891
- }
2892
- }
2893
-
2894
- fn live_file_row(
2895
- entity_id: &str,
2896
- version_id: &str,
2897
- snapshot_content: &str,
2898
- ) -> MaterializedLiveStateRow {
2899
- MaterializedLiveStateRow {
2900
- entity_id: crate::entity_identity::EntityIdentity::single(entity_id),
2901
- schema_key: super::FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2902
- file_id: None,
2903
- snapshot_content: Some(snapshot_content.to_string()),
2904
- metadata: None,
2905
- deleted: false,
2906
- version_id: version_id.to_string(),
2907
- change_id: Some(format!("change-{entity_id}")),
2908
- commit_id: Some(format!("commit-{entity_id}")),
2909
- global: false,
2910
- untracked: false,
2911
- created_at: "2026-04-23T00:00:00Z".to_string(),
2912
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2913
- }
2914
- }
2915
-
2916
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2917
- Arc::new(StringArray::from(values)) as ArrayRef
2918
- }
2919
-
2920
- fn file_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2921
- let mut fields = vec![
2922
- Field::new("id", DataType::Utf8, false),
2923
- Field::new("directory_id", DataType::Utf8, true),
2924
- Field::new("name", DataType::Utf8, false),
2925
- Field::new("hidden", DataType::Boolean, false),
2926
- Field::new("lixcol_global", DataType::Boolean, false),
2927
- Field::new("lixcol_metadata", DataType::Utf8, true),
2928
- ];
2929
- let mut columns = vec![
2930
- string_column(vec![Some("file-readme")]),
2931
- string_column(vec![Some("dir-docs")]),
2932
- string_column(vec![Some("readme.md")]),
2933
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2934
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2935
- string_column(vec![Some("{\"source\":\"file\"}")]),
2936
- ];
2937
- if include_version {
2938
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2939
- columns.push(string_column(vec![Some("version-b")]));
2940
- }
2941
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).expect("file insert batch")
2942
- }
2943
-
2944
- fn data_insert_batch() -> RecordBatch {
2945
- RecordBatch::try_new(
2946
- Arc::new(Schema::new(vec![
2947
- Field::new("id", DataType::Utf8, false),
2948
- Field::new("directory_id", DataType::Utf8, true),
2949
- Field::new("name", DataType::Utf8, false),
2950
- Field::new("hidden", DataType::Boolean, false),
2951
- Field::new("data", DataType::Binary, true),
2952
- Field::new("lixcol_version_id", DataType::Utf8, false),
2953
- ])),
2954
- vec![
2955
- string_column(vec![Some("file-readme")]),
2956
- string_column(vec![Some("dir-docs")]),
2957
- string_column(vec![Some("readme.md")]),
2958
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2959
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
2960
- string_column(vec![Some("version-b")]),
2961
- ],
2962
- )
2963
- .expect("file data batch")
2964
- }
2965
-
2966
- fn path_data_insert_batch() -> RecordBatch {
2967
- RecordBatch::try_new(
2968
- Arc::new(Schema::new(vec![
2969
- Field::new("id", DataType::Utf8, false),
2970
- Field::new("path", DataType::Utf8, false),
2971
- Field::new("hidden", DataType::Boolean, false),
2972
- Field::new("data", DataType::Binary, true),
2973
- Field::new("lixcol_version_id", DataType::Utf8, false),
2974
- ])),
2975
- vec![
2976
- string_column(vec![Some("file-readme")]),
2977
- string_column(vec![Some("/docs/guides/readme.md")]),
2978
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2979
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
2980
- string_column(vec![Some("version-b")]),
2981
- ],
2982
- )
2983
- .expect("file path data batch")
2984
- }
2985
-
2986
- fn path_update_batch() -> RecordBatch {
2987
- RecordBatch::try_new(
2988
- Arc::new(Schema::new(vec![
2989
- Field::new("id", DataType::Utf8, false),
2990
- Field::new("path", DataType::Utf8, false),
2991
- Field::new("hidden", DataType::Boolean, false),
2992
- Field::new("data", DataType::Binary, true),
2993
- Field::new("lixcol_version_id", DataType::Utf8, false),
2994
- ])),
2995
- vec![
2996
- string_column(vec![Some("file-readme")]),
2997
- string_column(vec![Some("/docs/renamed.md")]),
2998
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2999
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
3000
- string_column(vec![Some("version-b")]),
3001
- ],
3002
- )
3003
- .expect("file path update batch")
3004
- }
3005
-
3006
- fn file_delete_batch() -> RecordBatch {
3007
- RecordBatch::try_new(
3008
- Arc::new(Schema::new(vec![
3009
- Field::new("id", DataType::Utf8, false),
3010
- Field::new("lixcol_version_id", DataType::Utf8, false),
3011
- ])),
3012
- vec![
3013
- string_column(vec![Some("file-readme")]),
3014
- string_column(vec![Some("version-b")]),
3015
- ],
3016
- )
3017
- .expect("file delete batch")
3018
- }
3019
-
3020
- #[test]
3021
- fn derives_nested_directory_paths() {
3022
- let root = DirectoryDescriptorRecord {
3023
- id: "dir-docs".to_string(),
3024
- parent_id: None,
3025
- name: "docs".to_string(),
3026
- version_id: "version-a".to_string(),
3027
- };
3028
- let child = DirectoryDescriptorRecord {
3029
- id: "dir-guides".to_string(),
3030
- parent_id: Some("dir-docs".to_string()),
3031
- name: "guides".to_string(),
3032
- version_id: "version-a".to_string(),
3033
- };
3034
- let mut records = BTreeMap::new();
3035
- records.insert(root.id.clone(), &root);
3036
- records.insert(child.id.clone(), &child);
3037
- let mut paths = BTreeMap::new();
3038
-
3039
- assert_eq!(
3040
- derive_directory_path_for(
3041
- "version-a",
3042
- "dir-guides",
3043
- &records,
3044
- &mut paths,
3045
- &mut BTreeSet::new()
3046
- )
3047
- .expect("path derivation should succeed"),
3048
- Some("/docs/guides/".to_string())
3049
- );
3050
- }
3051
-
3052
- #[tokio::test]
3053
- async fn file_projection_rejects_unresolved_non_root_directory_id() {
3054
- let blob_reader = Arc::new(CapturingWriteContext::default()) as Arc<dyn BlobDataReader>;
3055
- let error = super::lix_file_record_batch(
3056
- &super::lix_file_schema(),
3057
- &blob_reader,
3058
- vec![live_file_row(
3059
- "file-readme",
3060
- "version-b",
3061
- "{\"id\":\"file-readme\",\"directory_id\":\"missing-dir\",\"name\":\"readme.md\",\"hidden\":false}",
3062
- )],
3063
- )
3064
- .await
3065
- .expect_err("unresolved non-root directory_id should not project as root path");
3066
-
3067
- assert_eq!(error.code, LixError::CODE_FOREIGN_KEY);
3068
- assert!(error.message.contains("missing-dir"));
3069
- }
3070
-
3071
- #[test]
3072
- fn decodes_file_insert_into_lix_state_write_row() {
3073
- let batch = file_insert_batch(true, false);
3074
-
3075
- let rows = lix_file_write_rows_from_batch(&batch, None).expect("decode file insert");
3076
-
3077
- assert_eq!(rows.len(), 1);
3078
- assert_eq!(
3079
- rows[0].entity_id.as_ref(),
3080
- Some(&crate::entity_identity::EntityIdentity::single(
3081
- "file-readme"
3082
- ))
3083
- );
3084
- assert_eq!(rows[0].schema_key, "lix_file_descriptor");
3085
- assert_eq!(rows[0].version_id, "version-b");
3086
- assert_eq!(
3087
- rows[0].metadata.as_ref(),
3088
- Some(&TransactionJson::from_value_for_test(
3089
- serde_json::json!({"source": "file"})
3090
- ))
3091
- );
3092
- let snapshot = rows[0].snapshot.as_ref().expect("descriptor snapshot JSON");
3093
- assert_eq!(snapshot["id"], "file-readme");
3094
- assert_eq!(snapshot["directory_id"], "dir-docs");
3095
- assert_eq!(snapshot["name"], "readme.md");
3096
- assert_eq!(snapshot["hidden"], false);
3097
- }
3098
-
3099
- #[test]
3100
- fn active_file_insert_defaults_version_id() {
3101
- let batch = file_insert_batch(false, false);
3102
-
3103
- let rows =
3104
- lix_file_write_rows_from_batch(&batch, Some("version-a")).expect("decode file insert");
3105
-
3106
- assert_eq!(rows.len(), 1);
3107
- assert_eq!(rows[0].version_id, "version-a");
3108
- }
3109
-
3110
- #[test]
3111
- fn by_version_file_insert_requires_version_id_for_non_global_rows() {
3112
- let batch = file_insert_batch(false, false);
3113
-
3114
- let error =
3115
- lix_file_write_rows_from_batch(&batch, None).expect_err("version id is required");
3116
-
3117
- assert!(
3118
- error.to_string().contains("requires lixcol_version_id"),
3119
- "unexpected error: {error}"
3120
- );
3121
- }
3122
-
3123
- #[test]
3124
- fn file_insert_rejects_global_with_non_global_version_id() {
3125
- let error = lix_file_write_rows_from_batch(&file_insert_batch(true, true), None)
3126
- .expect_err("global file write should reject conflicting version id");
3127
-
3128
- assert!(
3129
- error
3130
- .to_string()
3131
- .contains("cannot set lixcol_global=true with non-global lixcol_version_id"),
3132
- "unexpected error: {error}"
3133
- );
3134
- }
3135
-
3136
- #[test]
3137
- fn file_update_accepts_path_assignment() {
3138
- super::validate_lix_file_update_assignments(
3139
- &super::lix_file_schema(),
3140
- &[("path".to_string(), lit("/docs/renamed.md"))],
3141
- )
3142
- .expect("path should be writable for update");
3143
- }
3144
-
3145
- #[test]
3146
- fn file_path_update_stages_descriptor_from_new_path() {
3147
- let mut resolvers = BTreeMap::new();
3148
- resolvers.insert(
3149
- super::filesystem_storage_scope_key("version-b", false, false, None),
3150
- super::DirectoryPathResolver::from_existing([(
3151
- "/docs/".to_string(),
3152
- "dir-docs".to_string(),
3153
- )])
3154
- .expect("directory resolver should seed"),
3155
- );
3156
-
3157
- let staged = lix_file_update_stage_from_batch_for_test(
3158
- &path_update_batch(),
3159
- None,
3160
- super::LixFileUpdateColumns {
3161
- path: true,
3162
- data: false,
3163
- descriptor: false,
3164
- },
3165
- Some(&mut resolvers),
3166
- &mut test_id_generator(&["should-not-be-used"]),
3167
- )
3168
- .expect("decode file path update");
3169
-
3170
- assert_eq!(staged.count, 1);
3171
- assert_eq!(staged.file_data_writes.len(), 0);
3172
- assert_eq!(staged.state_rows.len(), 1);
3173
- let descriptor = staged
3174
- .state_rows
3175
- .iter()
3176
- .find(|row| row.schema_key == "lix_file_descriptor")
3177
- .expect("file descriptor row should be staged");
3178
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3179
- assert_eq!(snapshot["id"], "file-readme");
3180
- assert_eq!(snapshot["directory_id"], "dir-docs");
3181
- assert_eq!(snapshot["name"], "renamed.md");
3182
- assert_eq!(snapshot["hidden"], false);
3183
- }
3184
-
3185
- #[test]
3186
- fn file_path_update_preserves_existing_data_unless_data_is_assigned() {
3187
- let mut resolvers = BTreeMap::new();
3188
- resolvers.insert(
3189
- super::filesystem_storage_scope_key("version-b", false, false, None),
3190
- super::DirectoryPathResolver::from_existing([(
3191
- "/docs/".to_string(),
3192
- "dir-docs".to_string(),
3193
- )])
3194
- .expect("directory resolver should seed"),
3195
- );
3196
-
3197
- let staged = lix_file_update_stage_from_batch_for_test(
3198
- &path_update_batch(),
3199
- None,
3200
- super::LixFileUpdateColumns {
3201
- path: true,
3202
- data: false,
3203
- descriptor: false,
3204
- },
3205
- Some(&mut resolvers),
3206
- &mut test_id_generator(&["should-not-be-used"]),
3207
- )
3208
- .expect("decode file path update");
3209
-
3210
- assert!(
3211
- staged.file_data_writes.is_empty(),
3212
- "path-only update should not rewrite file data"
3213
- );
3214
- assert!(
3215
- staged
3216
- .state_rows
3217
- .iter()
3218
- .all(|row| row.schema_key != "lix_binary_blob_ref"),
3219
- "path-only update should not rewrite the blob ref"
3220
- );
3221
- }
3222
-
3223
- #[tokio::test]
3224
- async fn file_path_update_seeds_resolver_from_visible_directory_state() {
3225
- let mut resolvers = super::file_path_resolvers_from_live_state(
3226
- Arc::new(RowsLiveStateReader {
3227
- rows: vec![live_directory_row(
3228
- "dir-docs",
3229
- "version-b",
3230
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\"}",
3231
- )],
3232
- }) as Arc<dyn LiveStateReader>,
3233
- Some("version-b"),
3234
- )
3235
- .await
3236
- .expect("directory state should seed path resolver");
3237
-
3238
- let staged = lix_file_update_stage_from_batch_for_test(
3239
- &path_update_batch(),
3240
- None,
3241
- super::LixFileUpdateColumns {
3242
- path: true,
3243
- data: false,
3244
- descriptor: false,
3245
- },
3246
- Some(&mut resolvers),
3247
- &mut test_id_generator(&["should-not-be-used"]),
3248
- )
3249
- .expect("decode file path update");
3250
-
3251
- assert_eq!(staged.count, 1);
3252
- assert_eq!(staged.state_rows.len(), 1);
3253
- assert!(staged
3254
- .state_rows
3255
- .iter()
3256
- .all(|row| row.schema_key != "lix_directory_descriptor"));
3257
-
3258
- let snapshot: JsonValue = staged.state_rows[0]
3259
- .snapshot
3260
- .as_ref()
3261
- .unwrap()
3262
- .value()
3263
- .clone();
3264
- assert_eq!(snapshot["directory_id"], "dir-docs");
3265
- assert_eq!(snapshot["name"], "renamed.md");
3266
- }
3267
-
3268
- #[tokio::test]
3269
- async fn file_path_update_stages_only_missing_parent_directories() {
3270
- let mut resolvers = super::file_path_resolvers_from_live_state(
3271
- Arc::new(RowsLiveStateReader::default()) as Arc<dyn LiveStateReader>,
3272
- Some("version-b"),
3273
- )
3274
- .await
3275
- .expect("empty directory state should seed path resolver");
3276
-
3277
- let staged = lix_file_update_stage_from_batch_for_test(
3278
- &path_update_batch(),
3279
- None,
3280
- super::LixFileUpdateColumns {
3281
- path: true,
3282
- data: false,
3283
- descriptor: false,
3284
- },
3285
- Some(&mut resolvers),
3286
- &mut test_id_generator(&["dir-generated-docs"]),
3287
- )
3288
- .expect("decode file path update");
3289
-
3290
- assert_eq!(staged.count, 1);
3291
- assert_eq!(staged.state_rows.len(), 2);
3292
- assert_eq!(
3293
- staged
3294
- .state_rows
3295
- .iter()
3296
- .filter(|row| row.schema_key == "lix_directory_descriptor")
3297
- .count(),
3298
- 1
3299
- );
3300
-
3301
- let directory = staged
3302
- .state_rows
3303
- .iter()
3304
- .find(|row| row.schema_key == "lix_directory_descriptor")
3305
- .expect("missing /docs/ directory should be staged");
3306
- assert_eq!(
3307
- directory.entity_id.as_ref(),
3308
- Some(&crate::entity_identity::EntityIdentity::single(
3309
- "dir-generated-docs"
3310
- ))
3311
- );
3312
-
3313
- let descriptor = staged
3314
- .state_rows
3315
- .iter()
3316
- .find(|row| row.schema_key == "lix_file_descriptor")
3317
- .expect("file descriptor should be staged");
3318
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3319
- assert_eq!(snapshot["directory_id"], "dir-generated-docs");
3320
- }
3321
-
3322
- #[test]
3323
- fn file_path_update_with_data_assignment_stages_blob_ref_and_payload() {
3324
- let mut resolvers = BTreeMap::new();
3325
- resolvers.insert(
3326
- super::filesystem_storage_scope_key("version-b", false, false, None),
3327
- super::DirectoryPathResolver::from_existing([(
3328
- "/docs/".to_string(),
3329
- "dir-docs".to_string(),
3330
- )])
3331
- .expect("directory resolver should seed"),
3332
- );
3333
-
3334
- let staged = lix_file_update_stage_from_batch_for_test(
3335
- &path_update_batch(),
3336
- None,
3337
- super::LixFileUpdateColumns {
3338
- path: true,
3339
- data: true,
3340
- descriptor: false,
3341
- },
3342
- Some(&mut resolvers),
3343
- &mut test_id_generator(&["should-not-be-used"]),
3344
- )
3345
- .expect("decode file path and data update");
3346
-
3347
- assert_eq!(staged.count, 1);
3348
- assert_eq!(staged.file_data_writes.len(), 1);
3349
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3350
- assert_eq!(staged.file_data_writes[0].data, b"hello");
3351
- assert!(staged
3352
- .state_rows
3353
- .iter()
3354
- .any(|row| row.schema_key == "lix_file_descriptor"));
3355
- assert!(staged
3356
- .state_rows
3357
- .iter()
3358
- .any(|row| row.schema_key == "lix_binary_blob_ref"));
3359
- }
3360
-
3361
- #[test]
3362
- fn file_data_update_without_path_ignores_materialized_path_column() {
3363
- let staged = lix_file_update_stage_from_batch_for_test(
3364
- &path_update_batch(),
3365
- None,
3366
- super::LixFileUpdateColumns {
3367
- path: false,
3368
- data: true,
3369
- descriptor: false,
3370
- },
3371
- None,
3372
- &mut test_id_generator(&["should-not-be-used"]),
3373
- )
3374
- .expect("decode file data update");
3375
-
3376
- assert_eq!(staged.count, 1);
3377
- assert_eq!(staged.file_data_writes.len(), 1);
3378
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3379
- assert_eq!(staged.state_rows.len(), 1);
3380
- assert_eq!(staged.state_rows[0].schema_key, "lix_binary_blob_ref");
3381
- }
3382
-
3383
- #[test]
3384
- fn file_insert_stages_non_null_data() {
3385
- let batch = data_insert_batch();
3386
-
3387
- let staged = lix_file_insert_stage_from_batch(&batch, None).expect("decode file data");
3388
-
3389
- assert_eq!(staged.count, 1);
3390
- assert_eq!(staged.state_rows.len(), 2);
3391
- assert!(staged
3392
- .state_rows
3393
- .iter()
3394
- .any(|row| row.schema_key == "lix_file_descriptor"));
3395
- let blob_ref_row = staged
3396
- .state_rows
3397
- .iter()
3398
- .find(|row| row.schema_key == "lix_binary_blob_ref")
3399
- .expect("data insert should stage blob ref row");
3400
- assert_eq!(
3401
- blob_ref_row.entity_id.as_ref(),
3402
- Some(&crate::entity_identity::EntityIdentity::single(
3403
- "file-readme"
3404
- ))
3405
- );
3406
- assert_eq!(blob_ref_row.file_id.as_deref(), Some("file-readme"));
3407
- assert_eq!(staged.file_data_writes.len(), 1);
3408
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3409
- assert_eq!(staged.file_data_writes[0].version_id, "version-b");
3410
- assert_eq!(staged.file_data_writes[0].data, b"hello");
3411
- }
3412
-
3413
- #[test]
3414
- fn file_delete_with_blob_ref_stages_descriptor_and_blob_ref_tombstones() {
3415
- let batch = file_delete_batch();
3416
- let staged = lix_file_delete_stage_from_batch(
3417
- &batch,
3418
- None,
3419
- &BTreeSet::from(["file-readme".to_string()]),
3420
- )
3421
- .expect("decode file delete");
3422
-
3423
- assert_eq!(staged.count, 1);
3424
- assert_eq!(staged.state_rows.len(), 2);
3425
- let descriptor = staged
3426
- .state_rows
3427
- .iter()
3428
- .find(|row| row.schema_key == "lix_file_descriptor")
3429
- .expect("file descriptor tombstone should be staged");
3430
- assert_eq!(
3431
- descriptor.entity_id.as_ref(),
3432
- Some(&crate::entity_identity::EntityIdentity::single(
3433
- "file-readme"
3434
- ))
3435
- );
3436
- assert_eq!(descriptor.file_id, None);
3437
- assert_eq!(descriptor.snapshot, None);
3438
-
3439
- let blob_ref = staged
3440
- .state_rows
3441
- .iter()
3442
- .find(|row| row.schema_key == "lix_binary_blob_ref")
3443
- .expect("blob ref tombstone should be staged");
3444
- assert_eq!(
3445
- blob_ref.entity_id.as_ref(),
3446
- Some(&crate::entity_identity::EntityIdentity::single(
3447
- "file-readme"
3448
- ))
3449
- );
3450
- assert_eq!(blob_ref.file_id.as_deref(), Some("file-readme"));
3451
- assert_eq!(blob_ref.snapshot, None);
3452
- }
3453
-
3454
- #[test]
3455
- fn file_delete_without_blob_ref_stages_only_descriptor_tombstone() {
3456
- let batch = file_delete_batch();
3457
- let staged = lix_file_delete_stage_from_batch(&batch, None, &BTreeSet::new())
3458
- .expect("decode file delete");
3459
-
3460
- assert_eq!(staged.count, 1);
3461
- assert_eq!(staged.state_rows.len(), 1);
3462
- assert_eq!(staged.state_rows[0].schema_key, "lix_file_descriptor");
3463
- assert_eq!(
3464
- staged.state_rows[0].entity_id.as_ref(),
3465
- Some(&crate::entity_identity::EntityIdentity::single(
3466
- "file-readme"
3467
- ))
3468
- );
3469
- assert_eq!(staged.state_rows[0].snapshot, None);
3470
- }
3471
-
3472
- #[test]
3473
- fn file_path_insert_reuses_existing_parent_directory() {
3474
- let mut resolvers = BTreeMap::new();
3475
- resolvers.insert(
3476
- super::filesystem_storage_scope_key("version-b", false, false, None),
3477
- super::DirectoryPathResolver::from_existing([
3478
- ("/docs/".to_string(), "dir-docs".to_string()),
3479
- ("/docs/guides/".to_string(), "dir-guides".to_string()),
3480
- ])
3481
- .expect("directory resolver should seed"),
3482
- );
3483
-
3484
- let staged = lix_file_insert_stage_from_batch_with_path_resolvers(
3485
- &path_data_insert_batch(),
3486
- None,
3487
- "lix_file",
3488
- &mut resolvers,
3489
- &mut test_id_generator(&["should-not-be-used"]),
3490
- true,
3491
- )
3492
- .expect("decode file path data");
3493
-
3494
- assert_eq!(staged.count, 1);
3495
- assert_eq!(staged.file_data_writes.len(), 1);
3496
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3497
- assert_eq!(staged.state_rows.len(), 2);
3498
- let descriptor = staged
3499
- .state_rows
3500
- .iter()
3501
- .find(|row| row.schema_key == "lix_file_descriptor")
3502
- .expect("file descriptor row should be staged");
3503
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3504
- assert_eq!(snapshot["id"], "file-readme");
3505
- assert_eq!(snapshot["directory_id"], "dir-guides");
3506
- assert_eq!(snapshot["name"], "readme.md");
3507
- }
3508
-
3509
- #[test]
3510
- fn file_path_insert_stages_missing_parent_directories_once() {
3511
- let mut resolvers = BTreeMap::new();
3512
-
3513
- let staged = lix_file_insert_stage_from_batch_with_path_resolvers(
3514
- &path_data_insert_batch(),
3515
- None,
3516
- "lix_file",
3517
- &mut resolvers,
3518
- &mut test_id_generator(&["dir-generated-docs", "dir-generated-guides"]),
3519
- true,
3520
- )
3521
- .expect("decode file path data");
3522
-
3523
- assert_eq!(staged.count, 1);
3524
- assert_eq!(staged.state_rows.len(), 4);
3525
- let directory_rows = staged
3526
- .state_rows
3527
- .iter()
3528
- .filter(|row| row.schema_key == "lix_directory_descriptor")
3529
- .collect::<Vec<_>>();
3530
- assert_eq!(directory_rows.len(), 2);
3531
-
3532
- let descriptor = staged
3533
- .state_rows
3534
- .iter()
3535
- .find(|row| row.schema_key == "lix_file_descriptor")
3536
- .expect("file descriptor row should be staged");
3537
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3538
- assert_eq!(snapshot["directory_id"], "dir-generated-guides");
3539
- }
3540
-
3541
- #[tokio::test]
3542
- async fn file_insert_sink_stages_decoded_lix_state_rows() {
3543
- let batch = file_insert_batch(true, false);
3544
- let mut write_context = CapturingWriteContext::default();
3545
- let write_ctx = SqlWriteContext::new(&mut write_context);
3546
- let sink = LixFileInsertSink::new(
3547
- batch.schema(),
3548
- write_ctx,
3549
- test_functions(),
3550
- VersionBinding::explicit(),
3551
- false,
3552
- );
3553
-
3554
- let count = sink
3555
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3556
- .await
3557
- .expect("file insert sink should stage");
3558
-
3559
- assert_eq!(count, 1);
3560
- let writes = &write_context.writes;
3561
- assert_eq!(writes.len(), 1);
3562
- match &writes[0] {
3563
- TransactionWrite::Rows { mode, rows } => {
3564
- assert_eq!(*mode, TransactionWriteMode::Insert);
3565
- assert_eq!(rows.len(), 1);
3566
- assert_eq!(
3567
- rows[0].entity_id.as_ref(),
3568
- Some(&crate::entity_identity::EntityIdentity::single(
3569
- "file-readme"
3570
- ))
3571
- );
3572
- assert_eq!(rows[0].schema_key, "lix_file_descriptor");
3573
- }
3574
- other => panic!("expected insert staged write, got {other:?}"),
3575
- }
3576
- }
3577
-
3578
- #[tokio::test]
3579
- async fn file_insert_sink_stages_file_data_writes() {
3580
- let batch = data_insert_batch();
3581
- let mut write_context = CapturingWriteContext::default();
3582
- let write_ctx = SqlWriteContext::new(&mut write_context);
3583
- let sink = LixFileInsertSink::new(
3584
- batch.schema(),
3585
- write_ctx,
3586
- test_functions(),
3587
- VersionBinding::explicit(),
3588
- true,
3589
- );
3590
-
3591
- let count = sink
3592
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3593
- .await
3594
- .expect("file insert sink should stage data");
3595
-
3596
- assert_eq!(count, 1);
3597
- let writes = &write_context.writes;
3598
- assert_eq!(writes.len(), 1);
3599
- match &writes[0] {
3600
- TransactionWrite::RowsWithFileData {
3601
- mode,
3602
- rows,
3603
- file_data,
3604
- count,
3605
- ..
3606
- } => {
3607
- assert_eq!(*mode, TransactionWriteMode::Insert);
3608
- assert_eq!(*count, 1);
3609
- assert_eq!(rows.len(), 2);
3610
- assert!(rows
3611
- .iter()
3612
- .any(|row| row.schema_key == "lix_file_descriptor"));
3613
- assert!(rows
3614
- .iter()
3615
- .any(|row| row.schema_key == "lix_binary_blob_ref"));
3616
- assert_eq!(file_data.len(), 1);
3617
- assert_eq!(file_data[0].file_id, "file-readme");
3618
- assert_eq!(file_data[0].data, b"hello");
3619
- }
3620
- other => panic!("expected insert with file data staged write, got {other:?}"),
3621
- }
3622
- }
3623
-
3624
- #[tokio::test]
3625
- async fn file_insert_sink_seeds_path_resolver_from_live_state() {
3626
- let batch = path_data_insert_batch();
3627
- let mut write_context = CapturingWriteContext {
3628
- rows: vec![
3629
- live_directory_row(
3630
- "dir-docs",
3631
- "version-b",
3632
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\"}",
3633
- ),
3634
- live_directory_row(
3635
- "dir-guides",
3636
- "version-b",
3637
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\"}",
3638
- ),
3639
- ],
3640
- writes: Vec::new(),
3641
- };
3642
- let write_ctx = SqlWriteContext::new(&mut write_context);
3643
- let sink = LixFileInsertSink::new(
3644
- batch.schema(),
3645
- write_ctx,
3646
- test_functions(),
3647
- VersionBinding::explicit(),
3648
- true,
3649
- );
3650
-
3651
- let count = sink
3652
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3653
- .await
3654
- .expect("file insert sink should stage path data");
3655
-
3656
- assert_eq!(count, 1);
3657
- let writes = &write_context.writes;
3658
- assert_eq!(writes.len(), 1);
3659
- match &writes[0] {
3660
- TransactionWrite::RowsWithFileData {
3661
- rows,
3662
- file_data,
3663
- count,
3664
- ..
3665
- } => {
3666
- assert_eq!(*count, 1);
3667
- assert_eq!(file_data.len(), 1);
3668
- assert_eq!(file_data[0].file_id, "file-readme");
3669
- let descriptor = rows
3670
- .iter()
3671
- .find(|row| row.schema_key == "lix_file_descriptor")
3672
- .expect("file descriptor row should be staged");
3673
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3674
- assert_eq!(snapshot["directory_id"], "dir-guides");
3675
- }
3676
- other => panic!("expected insert with file data staged write, got {other:?}"),
3677
- }
3678
- }
3679
- }