@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,2405 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{
7
+ ArrayRef, BooleanArray, RecordBatchOptions, StringArray, UInt64Array,
8
+ };
9
+ use datafusion::arrow::compute::{and, filter_record_batch};
10
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
+ use datafusion::arrow::record_batch::RecordBatch;
12
+ use datafusion::catalog::{Session, TableProvider};
13
+ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
14
+ use datafusion::datasource::TableType;
15
+ use datafusion::execution::TaskContext;
16
+ use datafusion::logical_expr::dml::InsertOp;
17
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
18
+ use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
21
+ use datafusion::physical_plan::{
22
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
23
+ };
24
+ use datafusion::prelude::SessionContext;
25
+ use futures_util::{stream, TryStreamExt};
26
+ use serde::Deserialize;
27
+
28
+ use crate::functions::FunctionProviderHandle;
29
+ use crate::live_state::LiveStateRow;
30
+ use crate::live_state::{
31
+ LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
32
+ };
33
+ use crate::sql2::dml::{InsertExec, InsertSink};
34
+ use crate::sql2::filesystem_predicates::{
35
+ canonicalize_filesystem_path_filters, FilesystemPathKind,
36
+ };
37
+ use crate::sql2::version_scope::{
38
+ explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
39
+ resolve_write_version_scope, VersionBinding,
40
+ };
41
+ use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
42
+ use crate::transaction::types::{LogicalPrimaryKey, StageRow, StageRowOrigin, StageWriteOperation};
43
+ use crate::version::VersionRefReader;
44
+ use crate::{parse_row_metadata, serialize_row_metadata, LixError, RowMetadata};
45
+
46
+ use super::filesystem_planner::{
47
+ directory_descriptor_write_row, directory_path_resolvers_from_state_rows,
48
+ filesystem_storage_scope_key, plan_recursive_directory_delete, DirectoryDescriptorWriteIntent,
49
+ DirectoryPathResolver, FilesystemDeletePlan, FilesystemRowContext,
50
+ };
51
+ use super::filesystem_visibility::VisibleFilesystem;
52
+ use super::result_metadata::json_field;
53
+ use crate::sql2::{
54
+ SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
55
+ };
56
+ use crate::transaction::types::{StageWrite, StageWriteMode};
57
+
58
+ const DIRECTORY_SCHEMA_KEY: &str = "lix_directory_descriptor";
59
+ const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
60
+
61
+ pub(crate) async fn register_lix_directory_providers(
62
+ session: &SessionContext,
63
+ active_version_id: &str,
64
+ live_state: Arc<dyn LiveStateReader>,
65
+ version_ref: Arc<dyn VersionRefReader>,
66
+ functions: FunctionProviderHandle,
67
+ ) -> Result<(), LixError> {
68
+ session
69
+ .register_table(
70
+ "lix_directory_by_version",
71
+ Arc::new(LixDirectoryProvider::by_version(
72
+ Arc::clone(&live_state),
73
+ Arc::clone(&version_ref),
74
+ functions.clone(),
75
+ )),
76
+ )
77
+ .map_err(datafusion_error_to_lix_error)?;
78
+ session
79
+ .register_table(
80
+ "lix_directory",
81
+ Arc::new(LixDirectoryProvider::active_version(
82
+ active_version_id,
83
+ live_state,
84
+ version_ref,
85
+ functions,
86
+ )),
87
+ )
88
+ .map_err(datafusion_error_to_lix_error)?;
89
+ Ok(())
90
+ }
91
+
92
+ pub(crate) async fn register_lix_directory_write_providers(
93
+ session: &SessionContext,
94
+ write_ctx: SqlWriteContext,
95
+ ) -> Result<(), LixError> {
96
+ session
97
+ .register_table(
98
+ "lix_directory_by_version",
99
+ Arc::new(LixDirectoryProvider::by_version_with_write(
100
+ write_ctx.clone(),
101
+ )),
102
+ )
103
+ .map_err(datafusion_error_to_lix_error)?;
104
+ session
105
+ .register_table(
106
+ "lix_directory",
107
+ Arc::new(LixDirectoryProvider::active_version_with_write(write_ctx)),
108
+ )
109
+ .map_err(datafusion_error_to_lix_error)?;
110
+ Ok(())
111
+ }
112
+
113
+ pub(crate) struct LixDirectoryProvider {
114
+ schema: SchemaRef,
115
+ live_state: Arc<dyn LiveStateReader>,
116
+ version_ref: Arc<dyn VersionRefReader>,
117
+ write_access: WriteAccess,
118
+ functions: FunctionProviderHandle,
119
+ version_binding: VersionBinding,
120
+ }
121
+
122
+ impl std::fmt::Debug for LixDirectoryProvider {
123
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124
+ f.debug_struct("LixDirectoryProvider").finish()
125
+ }
126
+ }
127
+
128
+ impl LixDirectoryProvider {
129
+ fn active_version(
130
+ active_version_id: impl Into<String>,
131
+ live_state: Arc<dyn LiveStateReader>,
132
+ version_ref: Arc<dyn VersionRefReader>,
133
+ functions: FunctionProviderHandle,
134
+ ) -> Self {
135
+ Self {
136
+ schema: lix_directory_schema(),
137
+ live_state,
138
+ version_ref,
139
+ write_access: WriteAccess::read_only(),
140
+ functions,
141
+ version_binding: VersionBinding::active(active_version_id),
142
+ }
143
+ }
144
+
145
+ fn active_version_with_write(write_ctx: SqlWriteContext) -> Self {
146
+ let active_version_id = write_ctx.active_version_id();
147
+ let functions = write_ctx.functions();
148
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
149
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
150
+ Self {
151
+ schema: lix_directory_schema(),
152
+ live_state,
153
+ version_ref,
154
+ write_access: WriteAccess::write(write_ctx),
155
+ functions,
156
+ version_binding: VersionBinding::active(active_version_id),
157
+ }
158
+ }
159
+
160
+ fn by_version(
161
+ live_state: Arc<dyn LiveStateReader>,
162
+ version_ref: Arc<dyn VersionRefReader>,
163
+ functions: FunctionProviderHandle,
164
+ ) -> Self {
165
+ Self {
166
+ schema: lix_directory_by_version_schema(),
167
+ live_state,
168
+ version_ref,
169
+ write_access: WriteAccess::read_only(),
170
+ functions,
171
+ version_binding: VersionBinding::explicit(),
172
+ }
173
+ }
174
+
175
+ fn by_version_with_write(write_ctx: SqlWriteContext) -> Self {
176
+ let functions = write_ctx.functions();
177
+ let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
178
+ let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
179
+ Self {
180
+ schema: lix_directory_by_version_schema(),
181
+ live_state,
182
+ version_ref,
183
+ write_access: WriteAccess::write(write_ctx),
184
+ functions,
185
+ version_binding: VersionBinding::explicit(),
186
+ }
187
+ }
188
+ }
189
+
190
+ #[async_trait]
191
+ impl TableProvider for LixDirectoryProvider {
192
+ fn as_any(&self) -> &dyn Any {
193
+ self
194
+ }
195
+
196
+ fn schema(&self) -> SchemaRef {
197
+ Arc::clone(&self.schema)
198
+ }
199
+
200
+ fn table_type(&self) -> TableType {
201
+ TableType::Base
202
+ }
203
+
204
+ fn supports_filters_pushdown(
205
+ &self,
206
+ filters: &[&Expr],
207
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
208
+ Ok(filters
209
+ .iter()
210
+ .map(|_| TableProviderFilterPushDown::Exact)
211
+ .collect())
212
+ }
213
+
214
+ async fn scan(
215
+ &self,
216
+ _state: &dyn Session,
217
+ projection: Option<&Vec<usize>>,
218
+ filters: &[Expr],
219
+ limit: Option<usize>,
220
+ ) -> Result<Arc<dyn ExecutionPlan>> {
221
+ let projected_schema = projected_schema(&self.schema, projection)?;
222
+ let scan_limit = if filters.is_empty() { limit } else { None };
223
+ let mut request =
224
+ lix_directory_scan_request(self.version_binding.active_version_id(), scan_limit);
225
+ if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
226
+ {
227
+ request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
228
+ if request.filter.version_ids.is_empty() {
229
+ return Err(DataFusionError::Plan(
230
+ "DELETE FROM lix_directory_by_version requires an explicit lixcol_version_id predicate"
231
+ .to_string(),
232
+ ));
233
+ }
234
+ }
235
+ request.filter.version_ids = resolve_provider_version_ids(
236
+ self.version_ref.as_ref(),
237
+ &self.version_binding,
238
+ request.filter.version_ids,
239
+ )
240
+ .await
241
+ .map_err(lix_error_to_datafusion_error)?;
242
+ let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::Directory)?;
243
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
244
+ let physical_filters = filters
245
+ .iter()
246
+ .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
247
+ .collect::<Result<Vec<_>>>()?;
248
+ Ok(Arc::new(LixDirectoryScanExec::new(
249
+ Arc::clone(&self.live_state),
250
+ Arc::clone(&self.schema),
251
+ projected_schema,
252
+ projection.cloned(),
253
+ request,
254
+ physical_filters,
255
+ limit,
256
+ )))
257
+ }
258
+
259
+ async fn insert_into(
260
+ &self,
261
+ _state: &dyn Session,
262
+ input: Arc<dyn ExecutionPlan>,
263
+ insert_op: InsertOp,
264
+ ) -> Result<Arc<dyn ExecutionPlan>> {
265
+ if insert_op != InsertOp::Append {
266
+ return not_impl_err!("{insert_op} not implemented for lix_directory yet");
267
+ }
268
+
269
+ let write_ctx = self
270
+ .write_access
271
+ .require_write("INSERT into lix_directory")?;
272
+
273
+ let sink = LixDirectoryInsertSink::new(
274
+ input.schema(),
275
+ write_ctx.clone(),
276
+ self.functions.clone(),
277
+ self.version_binding.clone(),
278
+ );
279
+ Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
280
+ }
281
+
282
+ async fn delete_from(
283
+ &self,
284
+ state: &dyn Session,
285
+ filters: Vec<Expr>,
286
+ ) -> Result<Arc<dyn ExecutionPlan>> {
287
+ let write_ctx = self
288
+ .write_access
289
+ .require_write("DELETE FROM lix_directory")?;
290
+
291
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
292
+ let filters =
293
+ canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
294
+ let physical_filters = filters
295
+ .iter()
296
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
297
+ .collect::<Result<Vec<_>>>()?;
298
+ let mut request =
299
+ lix_directory_scan_request(self.version_binding.active_version_id(), None);
300
+ if matches!(self.version_binding, VersionBinding::Explicit) {
301
+ request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
302
+ if request.filter.version_ids.is_empty() {
303
+ return Err(DataFusionError::Plan(
304
+ "DELETE FROM lix_directory_by_version requires an explicit lixcol_version_id predicate"
305
+ .to_string(),
306
+ ));
307
+ }
308
+ }
309
+
310
+ Ok(Arc::new(LixDirectoryDeleteExec::new(
311
+ write_ctx.clone(),
312
+ Arc::clone(&self.schema),
313
+ self.version_binding.clone(),
314
+ request,
315
+ physical_filters,
316
+ )))
317
+ }
318
+
319
+ async fn update(
320
+ &self,
321
+ state: &dyn Session,
322
+ assignments: Vec<(String, Expr)>,
323
+ filters: Vec<Expr>,
324
+ ) -> Result<Arc<dyn ExecutionPlan>> {
325
+ let write_ctx = self.write_access.require_write("UPDATE lix_directory")?;
326
+
327
+ validate_lix_directory_update_assignments(&self.schema, &assignments)?;
328
+
329
+ let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
330
+ let physical_assignments = assignments
331
+ .iter()
332
+ .map(|(column_name, expr)| {
333
+ Ok((
334
+ column_name.clone(),
335
+ create_physical_expr(expr, &df_schema, state.execution_props())?,
336
+ ))
337
+ })
338
+ .collect::<Result<Vec<_>>>()?;
339
+ let filters =
340
+ canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
341
+ let physical_filters = filters
342
+ .iter()
343
+ .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
344
+ .collect::<Result<Vec<_>>>()?;
345
+ let request = lix_directory_scan_request(self.version_binding.active_version_id(), None);
346
+
347
+ Ok(Arc::new(LixDirectoryUpdateExec::new(
348
+ write_ctx.clone(),
349
+ Arc::clone(&self.schema),
350
+ self.version_binding.clone(),
351
+ request,
352
+ physical_assignments,
353
+ physical_filters,
354
+ )))
355
+ }
356
+ }
357
+
358
+ struct LixDirectoryInsertSink {
359
+ write_ctx: SqlWriteContext,
360
+ functions: FunctionProviderHandle,
361
+ version_binding: VersionBinding,
362
+ surface_name: &'static str,
363
+ }
364
+
365
+ impl std::fmt::Debug for LixDirectoryInsertSink {
366
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367
+ f.debug_struct("LixDirectoryInsertSink").finish()
368
+ }
369
+ }
370
+
371
+ impl LixDirectoryInsertSink {
372
+ fn new(
373
+ _schema: SchemaRef,
374
+ write_ctx: SqlWriteContext,
375
+ functions: FunctionProviderHandle,
376
+ version_binding: VersionBinding,
377
+ ) -> Self {
378
+ let surface_name = lix_directory_surface_name(&version_binding);
379
+ Self {
380
+ write_ctx,
381
+ functions,
382
+ version_binding,
383
+ surface_name,
384
+ }
385
+ }
386
+ }
387
+
388
+ impl DisplayAs for LixDirectoryInsertSink {
389
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
390
+ match t {
391
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
392
+ write!(f, "LixDirectoryInsertSink")
393
+ }
394
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryInsertSink"),
395
+ }
396
+ }
397
+ }
398
+
399
+ #[async_trait]
400
+ impl InsertSink for LixDirectoryInsertSink {
401
+ async fn write_batches(
402
+ &self,
403
+ batches: Vec<RecordBatch>,
404
+ _context: &Arc<TaskContext>,
405
+ ) -> Result<u64> {
406
+ let mut path_resolvers = None;
407
+ let mut rows = Vec::new();
408
+ let mut count = 0_u64;
409
+ for batch in batches {
410
+ if path_resolvers.is_none() {
411
+ path_resolvers = Some(
412
+ directory_path_resolvers_from_live_state(
413
+ Arc::new(WriteContextLiveStateReader::new(self.write_ctx.clone())),
414
+ self.version_binding.active_version_id(),
415
+ )
416
+ .await
417
+ .map_err(lix_error_to_datafusion_error)?,
418
+ );
419
+ }
420
+ count = count
421
+ .checked_add(u64::try_from(batch.num_rows()).map_err(|_| {
422
+ DataFusionError::Execution("lix_directory INSERT row count overflow".into())
423
+ })?)
424
+ .ok_or_else(|| {
425
+ DataFusionError::Execution("lix_directory INSERT row count overflow".into())
426
+ })?;
427
+ if record_batch_has_non_null_column(&batch, "path")? {
428
+ rows.extend(lix_directory_write_rows_from_batch_with_path_resolvers(
429
+ &batch,
430
+ self.version_binding.active_version_id(),
431
+ self.surface_name,
432
+ path_resolvers
433
+ .as_mut()
434
+ .expect("path resolver should be initialized"),
435
+ &mut || self.functions.call_uuid_v7(),
436
+ )?);
437
+ } else {
438
+ rows.extend(
439
+ lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
440
+ &batch,
441
+ self.version_binding.active_version_id(),
442
+ self.surface_name,
443
+ true,
444
+ path_resolvers.as_mut(),
445
+ None,
446
+ )?,
447
+ );
448
+ }
449
+ }
450
+
451
+ self.write_ctx
452
+ .stage_write(StageWrite::Rows {
453
+ mode: StageWriteMode::Insert,
454
+ rows,
455
+ })
456
+ .await
457
+ .map_err(lix_error_to_datafusion_error)?;
458
+
459
+ Ok(count)
460
+ }
461
+ }
462
+
463
+ fn lix_directory_surface_name(version_binding: &VersionBinding) -> &'static str {
464
+ match version_binding {
465
+ VersionBinding::Active { .. } => "lix_directory",
466
+ VersionBinding::Explicit => "lix_directory_by_version",
467
+ }
468
+ }
469
+
470
+ #[allow(dead_code)]
471
+ struct LixDirectoryDeleteExec {
472
+ write_ctx: SqlWriteContext,
473
+ table_schema: SchemaRef,
474
+ version_binding: VersionBinding,
475
+ request: LiveStateScanRequest,
476
+ filters: Vec<Arc<dyn PhysicalExpr>>,
477
+ result_schema: SchemaRef,
478
+ properties: Arc<PlanProperties>,
479
+ }
480
+
481
+ impl std::fmt::Debug for LixDirectoryDeleteExec {
482
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483
+ f.debug_struct("LixDirectoryDeleteExec").finish()
484
+ }
485
+ }
486
+
487
+ impl LixDirectoryDeleteExec {
488
+ fn new(
489
+ write_ctx: SqlWriteContext,
490
+ table_schema: SchemaRef,
491
+ version_binding: VersionBinding,
492
+ request: LiveStateScanRequest,
493
+ filters: Vec<Arc<dyn PhysicalExpr>>,
494
+ ) -> Self {
495
+ let result_schema = dml_count_schema();
496
+ let properties = PlanProperties::new(
497
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
498
+ Partitioning::UnknownPartitioning(1),
499
+ EmissionType::Final,
500
+ Boundedness::Bounded,
501
+ );
502
+ Self {
503
+ write_ctx,
504
+ table_schema,
505
+ version_binding,
506
+ request,
507
+ filters,
508
+ result_schema,
509
+ properties: Arc::new(properties),
510
+ }
511
+ }
512
+ }
513
+
514
+ impl DisplayAs for LixDirectoryDeleteExec {
515
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
516
+ match t {
517
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
518
+ write!(f, "LixDirectoryDeleteExec(filters={})", self.filters.len())
519
+ }
520
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryDeleteExec"),
521
+ }
522
+ }
523
+ }
524
+
525
+ impl ExecutionPlan for LixDirectoryDeleteExec {
526
+ fn name(&self) -> &str {
527
+ "LixDirectoryDeleteExec"
528
+ }
529
+
530
+ fn as_any(&self) -> &dyn Any {
531
+ self
532
+ }
533
+
534
+ fn properties(&self) -> &Arc<PlanProperties> {
535
+ &self.properties
536
+ }
537
+
538
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
539
+ Vec::new()
540
+ }
541
+
542
+ fn with_new_children(
543
+ self: Arc<Self>,
544
+ children: Vec<Arc<dyn ExecutionPlan>>,
545
+ ) -> Result<Arc<dyn ExecutionPlan>> {
546
+ if !children.is_empty() {
547
+ return Err(DataFusionError::Execution(
548
+ "LixDirectoryDeleteExec does not accept children".to_string(),
549
+ ));
550
+ }
551
+ Ok(self)
552
+ }
553
+
554
+ fn execute(
555
+ &self,
556
+ partition: usize,
557
+ _context: Arc<TaskContext>,
558
+ ) -> Result<SendableRecordBatchStream> {
559
+ if partition != 0 {
560
+ return Err(DataFusionError::Execution(format!(
561
+ "LixDirectoryDeleteExec only exposes one partition, got {partition}"
562
+ )));
563
+ }
564
+ let write_ctx = self.write_ctx.clone();
565
+ let table_schema = Arc::clone(&self.table_schema);
566
+ let version_binding = self.version_binding.clone();
567
+ let request = self.request.clone();
568
+ let filters = self.filters.clone();
569
+ let result_schema = Arc::clone(&self.result_schema);
570
+ let stream_schema = Arc::clone(&result_schema);
571
+
572
+ let stream = stream::once(async move {
573
+ let rows = write_ctx
574
+ .scan_live_state(&request)
575
+ .await
576
+ .map_err(lix_error_to_datafusion_error)?;
577
+ let source_batch = lix_directory_record_batch(&table_schema, rows)
578
+ .map_err(lix_error_to_datafusion_error)?;
579
+ let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
580
+ let version_ids = directory_version_ids_from_batch(
581
+ &matched_batch,
582
+ version_binding.active_version_id(),
583
+ )?;
584
+ let mut visible_filesystems = BTreeMap::new();
585
+ for version_id in version_ids {
586
+ visible_filesystems.insert(
587
+ version_id.clone(),
588
+ VisibleFilesystem::load(
589
+ Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
590
+ &version_id,
591
+ )
592
+ .await
593
+ .map_err(lix_error_to_datafusion_error)?,
594
+ );
595
+ }
596
+ let (write_rows, count) = lix_directory_recursive_delete_rows_from_batch(
597
+ &matched_batch,
598
+ version_binding.active_version_id(),
599
+ &visible_filesystems,
600
+ )?;
601
+
602
+ if count > 0 {
603
+ write_ctx
604
+ .stage_write(StageWrite::Rows {
605
+ mode: StageWriteMode::Replace,
606
+ rows: write_rows,
607
+ })
608
+ .await
609
+ .map_err(lix_error_to_datafusion_error)?;
610
+ }
611
+
612
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
613
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
614
+ )]))
615
+ })
616
+ .try_flatten();
617
+
618
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
619
+ result_schema,
620
+ stream,
621
+ )))
622
+ }
623
+ }
624
+
625
+ #[allow(dead_code)]
626
+ struct LixDirectoryUpdateExec {
627
+ write_ctx: SqlWriteContext,
628
+ table_schema: SchemaRef,
629
+ version_binding: VersionBinding,
630
+ request: LiveStateScanRequest,
631
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
632
+ filters: Vec<Arc<dyn PhysicalExpr>>,
633
+ result_schema: SchemaRef,
634
+ properties: Arc<PlanProperties>,
635
+ }
636
+
637
+ impl std::fmt::Debug for LixDirectoryUpdateExec {
638
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
639
+ f.debug_struct("LixDirectoryUpdateExec").finish()
640
+ }
641
+ }
642
+
643
+ impl LixDirectoryUpdateExec {
644
+ fn new(
645
+ write_ctx: SqlWriteContext,
646
+ table_schema: SchemaRef,
647
+ version_binding: VersionBinding,
648
+ request: LiveStateScanRequest,
649
+ assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
650
+ filters: Vec<Arc<dyn PhysicalExpr>>,
651
+ ) -> Self {
652
+ let result_schema = dml_count_schema();
653
+ let properties = PlanProperties::new(
654
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
655
+ Partitioning::UnknownPartitioning(1),
656
+ EmissionType::Final,
657
+ Boundedness::Bounded,
658
+ );
659
+ Self {
660
+ write_ctx,
661
+ table_schema,
662
+ version_binding,
663
+ request,
664
+ assignments,
665
+ filters,
666
+ result_schema,
667
+ properties: Arc::new(properties),
668
+ }
669
+ }
670
+ }
671
+
672
+ impl DisplayAs for LixDirectoryUpdateExec {
673
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
674
+ match t {
675
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
676
+ write!(
677
+ f,
678
+ "LixDirectoryUpdateExec(assignments={}, filters={})",
679
+ self.assignments.len(),
680
+ self.filters.len()
681
+ )
682
+ }
683
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryUpdateExec"),
684
+ }
685
+ }
686
+ }
687
+
688
+ impl ExecutionPlan for LixDirectoryUpdateExec {
689
+ fn name(&self) -> &str {
690
+ "LixDirectoryUpdateExec"
691
+ }
692
+
693
+ fn as_any(&self) -> &dyn Any {
694
+ self
695
+ }
696
+
697
+ fn properties(&self) -> &Arc<PlanProperties> {
698
+ &self.properties
699
+ }
700
+
701
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
702
+ Vec::new()
703
+ }
704
+
705
+ fn with_new_children(
706
+ self: Arc<Self>,
707
+ children: Vec<Arc<dyn ExecutionPlan>>,
708
+ ) -> Result<Arc<dyn ExecutionPlan>> {
709
+ if !children.is_empty() {
710
+ return Err(DataFusionError::Execution(
711
+ "LixDirectoryUpdateExec does not accept children".to_string(),
712
+ ));
713
+ }
714
+ Ok(self)
715
+ }
716
+
717
+ fn execute(
718
+ &self,
719
+ partition: usize,
720
+ _context: Arc<TaskContext>,
721
+ ) -> Result<SendableRecordBatchStream> {
722
+ if partition != 0 {
723
+ return Err(DataFusionError::Execution(format!(
724
+ "LixDirectoryUpdateExec only exposes one partition, got {partition}"
725
+ )));
726
+ }
727
+ let write_ctx = self.write_ctx.clone();
728
+ let table_schema = Arc::clone(&self.table_schema);
729
+ let version_binding = self.version_binding.clone();
730
+ let request = self.request.clone();
731
+ let assignments = self.assignments.clone();
732
+ let filters = self.filters.clone();
733
+ let result_schema = Arc::clone(&self.result_schema);
734
+ let stream_schema = Arc::clone(&result_schema);
735
+
736
+ let stream = stream::once(async move {
737
+ let rows = write_ctx
738
+ .scan_live_state(&request)
739
+ .await
740
+ .map_err(lix_error_to_datafusion_error)?;
741
+ let source_batch = lix_directory_record_batch(&table_schema, rows)
742
+ .map_err(lix_error_to_datafusion_error)?;
743
+ let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
744
+ let mut path_resolvers = directory_path_resolvers_from_live_state(
745
+ Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
746
+ version_binding.active_version_id(),
747
+ )
748
+ .await
749
+ .map_err(lix_error_to_datafusion_error)?;
750
+ let write_rows = lix_directory_update_write_rows_from_batch(
751
+ &matched_batch,
752
+ &assignments,
753
+ version_binding.active_version_id(),
754
+ &mut path_resolvers,
755
+ )?;
756
+ let count = u64::try_from(write_rows.len()).map_err(|_| {
757
+ DataFusionError::Execution("lix_directory UPDATE row count overflow".into())
758
+ })?;
759
+
760
+ if count > 0 {
761
+ write_ctx
762
+ .stage_write(StageWrite::Rows {
763
+ mode: StageWriteMode::Replace,
764
+ rows: write_rows,
765
+ })
766
+ .await
767
+ .map_err(lix_error_to_datafusion_error)?;
768
+ }
769
+
770
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
771
+ dml_count_batch(Arc::clone(&stream_schema), count)?,
772
+ )]))
773
+ })
774
+ .try_flatten();
775
+
776
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
777
+ result_schema,
778
+ stream,
779
+ )))
780
+ }
781
+ }
782
+
783
+ struct LixDirectoryScanExec {
784
+ live_state: Arc<dyn LiveStateReader>,
785
+ batch_schema: SchemaRef,
786
+ output_schema: SchemaRef,
787
+ projection: Option<Vec<usize>>,
788
+ request: LiveStateScanRequest,
789
+ filters: Vec<Arc<dyn PhysicalExpr>>,
790
+ limit: Option<usize>,
791
+ properties: Arc<PlanProperties>,
792
+ }
793
+
794
+ impl std::fmt::Debug for LixDirectoryScanExec {
795
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
796
+ f.debug_struct("LixDirectoryScanExec").finish()
797
+ }
798
+ }
799
+
800
+ impl LixDirectoryScanExec {
801
+ fn new(
802
+ live_state: Arc<dyn LiveStateReader>,
803
+ batch_schema: SchemaRef,
804
+ output_schema: SchemaRef,
805
+ projection: Option<Vec<usize>>,
806
+ request: LiveStateScanRequest,
807
+ filters: Vec<Arc<dyn PhysicalExpr>>,
808
+ limit: Option<usize>,
809
+ ) -> Self {
810
+ let properties = PlanProperties::new(
811
+ EquivalenceProperties::new(Arc::clone(&output_schema)),
812
+ Partitioning::UnknownPartitioning(1),
813
+ EmissionType::Incremental,
814
+ Boundedness::Bounded,
815
+ );
816
+ Self {
817
+ live_state,
818
+ batch_schema,
819
+ output_schema,
820
+ projection,
821
+ request,
822
+ filters,
823
+ limit,
824
+ properties: Arc::new(properties),
825
+ }
826
+ }
827
+ }
828
+
829
+ impl DisplayAs for LixDirectoryScanExec {
830
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
831
+ match t {
832
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
833
+ write!(f, "LixDirectoryScanExec(limit={:?})", self.limit)
834
+ }
835
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryScanExec"),
836
+ }
837
+ }
838
+ }
839
+
840
+ impl ExecutionPlan for LixDirectoryScanExec {
841
+ fn name(&self) -> &str {
842
+ "LixDirectoryScanExec"
843
+ }
844
+
845
+ fn as_any(&self) -> &dyn Any {
846
+ self
847
+ }
848
+
849
+ fn properties(&self) -> &Arc<PlanProperties> {
850
+ &self.properties
851
+ }
852
+
853
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
854
+ Vec::new()
855
+ }
856
+
857
+ fn with_new_children(
858
+ self: Arc<Self>,
859
+ children: Vec<Arc<dyn ExecutionPlan>>,
860
+ ) -> Result<Arc<dyn ExecutionPlan>> {
861
+ if !children.is_empty() {
862
+ return Err(DataFusionError::Execution(
863
+ "LixDirectoryScanExec does not accept children".to_string(),
864
+ ));
865
+ }
866
+ Ok(self)
867
+ }
868
+
869
+ fn execute(
870
+ &self,
871
+ partition: usize,
872
+ _context: Arc<TaskContext>,
873
+ ) -> Result<SendableRecordBatchStream> {
874
+ if partition != 0 {
875
+ return Err(DataFusionError::Execution(format!(
876
+ "LixDirectoryScanExec only supports partition 0, got {partition}"
877
+ )));
878
+ }
879
+
880
+ let live_state = Arc::clone(&self.live_state);
881
+ let request = self.request.clone();
882
+ let filters = self.filters.clone();
883
+ let limit = self.limit;
884
+ let output_schema = Arc::clone(&self.output_schema);
885
+ let batch_schema = Arc::clone(&self.batch_schema);
886
+ let projection = self.projection.clone();
887
+ let fut = async move {
888
+ let rows = live_state.scan_rows(&request).await.map_err(|error| {
889
+ DataFusionError::Execution(format!("sql2 lix_directory scan failed: {error}"))
890
+ })?;
891
+ let batch = lix_directory_record_batch(&batch_schema, rows).map_err(|error| {
892
+ DataFusionError::Execution(format!(
893
+ "sql2 lix_directory batch build failed: {error}"
894
+ ))
895
+ })?;
896
+ let filtered = filter_lix_directory_batch(batch, &filters)?;
897
+ let projected = match projection {
898
+ Some(indices) => filtered.project(&indices).map_err(DataFusionError::from),
899
+ None => Ok(filtered),
900
+ }?;
901
+ match limit {
902
+ Some(limit) => Ok(projected.slice(0, limit.min(projected.num_rows()))),
903
+ None => Ok(projected),
904
+ }
905
+ };
906
+
907
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
908
+ output_schema,
909
+ stream::once(fut).map_ok(|batch| batch),
910
+ )))
911
+ }
912
+ }
913
+
914
+ #[derive(Debug, Clone)]
915
+ struct DirectoryDescriptorRecord {
916
+ id: String,
917
+ parent_id: Option<String>,
918
+ name: String,
919
+ hidden: bool,
920
+ live: LiveStateRow,
921
+ }
922
+
923
+ #[derive(Debug, Deserialize)]
924
+ struct DirectoryDescriptorSnapshot {
925
+ id: String,
926
+ parent_id: Option<String>,
927
+ name: String,
928
+ hidden: Option<bool>,
929
+ }
930
+
931
+ #[cfg(test)]
932
+ fn lix_directory_write_rows_from_batch(
933
+ batch: &RecordBatch,
934
+ version_binding: Option<&str>,
935
+ ) -> Result<Vec<StageRow>> {
936
+ lix_directory_write_rows_from_batch_with_options(batch, version_binding, "lix_directory", true)
937
+ }
938
+
939
+ fn lix_directory_write_rows_from_batch_with_path_resolvers(
940
+ batch: &RecordBatch,
941
+ version_binding: Option<&str>,
942
+ surface_name: &str,
943
+ path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
944
+ generate_directory_id: &mut dyn FnMut() -> String,
945
+ ) -> Result<Vec<StageRow>> {
946
+ lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
947
+ batch,
948
+ version_binding,
949
+ surface_name,
950
+ true,
951
+ Some(path_resolvers),
952
+ Some(generate_directory_id),
953
+ )
954
+ }
955
+
956
+ fn lix_directory_update_write_rows_from_batch(
957
+ batch: &RecordBatch,
958
+ assignments: &[(String, Arc<dyn PhysicalExpr>)],
959
+ version_binding: Option<&str>,
960
+ path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
961
+ ) -> Result<Vec<StageRow>> {
962
+ let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
963
+ let mut rows = Vec::new();
964
+ for row_index in 0..batch.num_rows() {
965
+ let id = optional_string_value(batch, row_index, "id")?;
966
+ let context = directory_row_context_from_update(
967
+ batch,
968
+ &assignment_values,
969
+ row_index,
970
+ version_binding,
971
+ )?;
972
+ let parent_id =
973
+ update_optional_string_value(batch, &assignment_values, row_index, "parent_id")?;
974
+ let name = update_required_string_value(batch, &assignment_values, row_index, "name")?;
975
+ if let Some(directory_id) = id.as_ref() {
976
+ let resolver = path_resolvers
977
+ .entry(directory_path_resolver_key(&context))
978
+ .or_insert_with(DirectoryPathResolver::default);
979
+ resolver
980
+ .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
981
+ .map_err(lix_error_to_datafusion_error)?;
982
+ }
983
+ rows.push(directory_descriptor_write_row(
984
+ DirectoryDescriptorWriteIntent {
985
+ id,
986
+ parent_id,
987
+ name,
988
+ hidden: update_optional_bool_value(batch, &assignment_values, row_index, "hidden")?,
989
+ context,
990
+ },
991
+ ));
992
+ }
993
+ Ok(rows)
994
+ }
995
+
996
+ fn directory_version_ids_from_batch(
997
+ batch: &RecordBatch,
998
+ version_binding: Option<&str>,
999
+ ) -> Result<BTreeSet<String>> {
1000
+ let mut version_ids = BTreeSet::new();
1001
+ for row_index in 0..batch.num_rows() {
1002
+ version_ids.insert(
1003
+ directory_row_context_from_batch(batch, row_index, version_binding)?.version_id,
1004
+ );
1005
+ }
1006
+ Ok(version_ids)
1007
+ }
1008
+
1009
+ fn lix_directory_recursive_delete_rows_from_batch(
1010
+ batch: &RecordBatch,
1011
+ version_binding: Option<&str>,
1012
+ visible_filesystems: &BTreeMap<String, VisibleFilesystem>,
1013
+ ) -> Result<(Vec<StageRow>, u64)> {
1014
+ let mut rows = Vec::new();
1015
+ let mut seen = BTreeSet::new();
1016
+ let mut count = 0u64;
1017
+ for row_index in 0..batch.num_rows() {
1018
+ let directory_id = required_string_value(batch, row_index, "id")?;
1019
+ let context = directory_row_context_from_batch(batch, row_index, version_binding)?;
1020
+ let visible_filesystem = visible_filesystems
1021
+ .get(&context.version_id)
1022
+ .ok_or_else(|| {
1023
+ DataFusionError::Execution(format!(
1024
+ "DELETE FROM lix_directory missing visible filesystem for version '{}'",
1025
+ context.version_id
1026
+ ))
1027
+ })?;
1028
+ append_deduped_delete_plan(
1029
+ &mut rows,
1030
+ &mut seen,
1031
+ plan_recursive_directory_delete(&directory_id, visible_filesystem, context),
1032
+ &mut count,
1033
+ );
1034
+ }
1035
+ Ok((rows, count))
1036
+ }
1037
+
1038
+ fn append_deduped_delete_plan(
1039
+ rows: &mut Vec<StageRow>,
1040
+ seen: &mut BTreeSet<StateRowDedupeKey>,
1041
+ plan: FilesystemDeletePlan,
1042
+ count: &mut u64,
1043
+ ) {
1044
+ for row in plan.rows {
1045
+ if seen.insert(StateRowDedupeKey::from(&row)) {
1046
+ if is_user_visible_filesystem_delete_row(&row) {
1047
+ *count += 1;
1048
+ }
1049
+ rows.push(row);
1050
+ }
1051
+ }
1052
+ }
1053
+
1054
+ fn is_user_visible_filesystem_delete_row(row: &StageRow) -> bool {
1055
+ matches!(
1056
+ row.schema_key.as_str(),
1057
+ "lix_directory_descriptor" | "lix_file_descriptor"
1058
+ )
1059
+ }
1060
+
1061
+ #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1062
+ struct StateRowDedupeKey {
1063
+ entity_id: String,
1064
+ schema_key: String,
1065
+ file_id: Option<String>,
1066
+ version_id: String,
1067
+ global: bool,
1068
+ untracked: bool,
1069
+ }
1070
+
1071
+ impl From<&StageRow> for StateRowDedupeKey {
1072
+ fn from(row: &StageRow) -> Self {
1073
+ Self {
1074
+ entity_id: row
1075
+ .entity_id
1076
+ .as_ref()
1077
+ .expect("directory provider staged row should carry entity_id")
1078
+ .as_string()
1079
+ .expect("directory provider staged row entity identity should project"),
1080
+ schema_key: row.schema_key.clone(),
1081
+ file_id: row.file_id.clone(),
1082
+ version_id: row.version_id.clone(),
1083
+ global: row.global,
1084
+ untracked: row.untracked,
1085
+ }
1086
+ }
1087
+ }
1088
+
1089
+ #[cfg(test)]
1090
+ fn lix_directory_write_rows_from_batch_with_options(
1091
+ batch: &RecordBatch,
1092
+ version_binding: Option<&str>,
1093
+ surface_name: &str,
1094
+ reject_read_only_fields: bool,
1095
+ ) -> Result<Vec<StageRow>> {
1096
+ lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1097
+ batch,
1098
+ version_binding,
1099
+ surface_name,
1100
+ reject_read_only_fields,
1101
+ None,
1102
+ None,
1103
+ )
1104
+ }
1105
+
1106
+ fn lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1107
+ batch: &RecordBatch,
1108
+ version_binding: Option<&str>,
1109
+ surface_name: &str,
1110
+ reject_read_only_fields: bool,
1111
+ mut path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1112
+ mut generate_directory_id: Option<&mut dyn FnMut() -> String>,
1113
+ ) -> Result<Vec<StageRow>> {
1114
+ let mut rows = Vec::new();
1115
+ for row_index in 0..batch.num_rows() {
1116
+ if reject_read_only_fields {
1117
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_entity_id")?;
1118
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_schema_key")?;
1119
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_change_id")?;
1120
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_created_at")?;
1121
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_updated_at")?;
1122
+ reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_commit_id")?;
1123
+ }
1124
+
1125
+ let path = optional_string_value(batch, row_index, "path")?;
1126
+ let id = optional_string_value(batch, row_index, "id")?;
1127
+ let hidden = optional_bool_value(batch, row_index, "hidden")?;
1128
+ let context = directory_row_context_from_batch(batch, row_index, version_binding)?;
1129
+
1130
+ if let Some(path) = path.filter(|_| reject_read_only_fields) {
1131
+ reject_read_only_lix_directory_insert_field(batch, row_index, "parent_id")?;
1132
+ reject_read_only_lix_directory_insert_field(batch, row_index, "name")?;
1133
+
1134
+ let Some(path_resolvers) = path_resolvers.as_deref_mut() else {
1135
+ return Err(DataFusionError::Execution(
1136
+ "INSERT into lix_directory with path requires directory path resolver"
1137
+ .to_string(),
1138
+ ));
1139
+ };
1140
+ let resolver = path_resolvers
1141
+ .entry(directory_path_resolver_key(&context))
1142
+ .or_insert_with(DirectoryPathResolver::default);
1143
+ let Some(generate_directory_id) = generate_directory_id.as_deref_mut() else {
1144
+ return Err(DataFusionError::Execution(
1145
+ "INSERT into lix_directory with path requires directory id generator"
1146
+ .to_string(),
1147
+ ));
1148
+ };
1149
+ let directory_id = id.unwrap_or_else(|| generate_directory_id());
1150
+ let mut planned_rows = resolver
1151
+ .create_directory_path_with_leaf_id(
1152
+ &path,
1153
+ Some(directory_id.clone()),
1154
+ context,
1155
+ hidden.unwrap_or(false),
1156
+ generate_directory_id,
1157
+ )
1158
+ .map_err(lix_error_to_datafusion_error)?;
1159
+ attach_lix_directory_insert_origin(&mut planned_rows, surface_name, &directory_id);
1160
+ rows.extend(planned_rows);
1161
+ continue;
1162
+ }
1163
+
1164
+ let parent_id = optional_string_value(batch, row_index, "parent_id")?;
1165
+ let name = required_string_value(batch, row_index, "name")?;
1166
+ if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1167
+ if let Some(directory_id) = id.as_ref() {
1168
+ let resolver = path_resolvers
1169
+ .entry(directory_path_resolver_key(&context))
1170
+ .or_insert_with(DirectoryPathResolver::default);
1171
+ resolver
1172
+ .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
1173
+ .map_err(lix_error_to_datafusion_error)?;
1174
+ }
1175
+ }
1176
+ let mut row = directory_descriptor_write_row(DirectoryDescriptorWriteIntent {
1177
+ id: id.clone(),
1178
+ parent_id,
1179
+ name,
1180
+ hidden,
1181
+ context,
1182
+ });
1183
+ if let Some(directory_id) = id.as_ref() {
1184
+ row.origin = Some(lix_directory_insert_origin(surface_name, directory_id));
1185
+ }
1186
+ rows.push(row);
1187
+ }
1188
+ Ok(rows)
1189
+ }
1190
+
1191
+ fn attach_lix_directory_insert_origin(
1192
+ rows: &mut [StageRow],
1193
+ surface_name: &str,
1194
+ directory_id: &str,
1195
+ ) {
1196
+ let origin = lix_directory_insert_origin(surface_name, directory_id);
1197
+ for row in rows {
1198
+ if row.schema_key != DIRECTORY_SCHEMA_KEY {
1199
+ continue;
1200
+ }
1201
+ let Some(entity_id) = row
1202
+ .entity_id
1203
+ .as_ref()
1204
+ .and_then(|entity_id| entity_id.as_string().ok())
1205
+ else {
1206
+ continue;
1207
+ };
1208
+ if entity_id == directory_id {
1209
+ row.origin = Some(origin.clone());
1210
+ }
1211
+ }
1212
+ }
1213
+
1214
+ fn lix_directory_insert_origin(surface_name: &str, directory_id: &str) -> StageRowOrigin {
1215
+ StageRowOrigin {
1216
+ surface: surface_name.to_string(),
1217
+ operation: StageWriteOperation::Insert,
1218
+ primary_key: Some(LogicalPrimaryKey {
1219
+ columns: vec!["id".to_string()],
1220
+ values: vec![directory_id.to_string()],
1221
+ }),
1222
+ }
1223
+ }
1224
+
1225
+ fn directory_row_context_from_batch(
1226
+ batch: &RecordBatch,
1227
+ row_index: usize,
1228
+ version_binding: Option<&str>,
1229
+ ) -> Result<FilesystemRowContext> {
1230
+ let scope = resolve_write_version_scope(
1231
+ optional_bool_value(batch, row_index, "lixcol_global")?,
1232
+ optional_string_value(batch, row_index, "lixcol_version_id")?,
1233
+ version_binding,
1234
+ "INSERT into lix_directory_by_version",
1235
+ "lix_directory",
1236
+ )?;
1237
+
1238
+ Ok(FilesystemRowContext {
1239
+ version_id: scope.version_id,
1240
+ global: scope.global,
1241
+ untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1242
+ file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1243
+ metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", "lix_directory")?,
1244
+ })
1245
+ }
1246
+
1247
+ fn directory_row_context_from_update(
1248
+ batch: &RecordBatch,
1249
+ assignment_values: &UpdateAssignmentValues,
1250
+ row_index: usize,
1251
+ version_binding: Option<&str>,
1252
+ ) -> Result<FilesystemRowContext> {
1253
+ let scope = resolve_write_version_scope(
1254
+ optional_bool_value(batch, row_index, "lixcol_global")?,
1255
+ optional_string_value(batch, row_index, "lixcol_version_id")?,
1256
+ version_binding,
1257
+ "UPDATE into lix_directory_by_version",
1258
+ "lix_directory",
1259
+ )?;
1260
+
1261
+ Ok(FilesystemRowContext {
1262
+ version_id: scope.version_id,
1263
+ global: scope.global,
1264
+ untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1265
+ file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1266
+ metadata: update_optional_metadata_value(
1267
+ batch,
1268
+ assignment_values,
1269
+ row_index,
1270
+ "lixcol_metadata",
1271
+ "lix_directory",
1272
+ )?,
1273
+ })
1274
+ }
1275
+
1276
+ fn directory_path_resolver_key(context: &FilesystemRowContext) -> String {
1277
+ filesystem_storage_scope_key(
1278
+ &context.version_id,
1279
+ context.global,
1280
+ context.untracked,
1281
+ context.file_id.as_deref(),
1282
+ )
1283
+ }
1284
+
1285
+ async fn directory_path_resolvers_from_live_state(
1286
+ live_state: Arc<dyn LiveStateReader>,
1287
+ version_binding: Option<&str>,
1288
+ ) -> std::result::Result<BTreeMap<String, DirectoryPathResolver>, LixError> {
1289
+ let rows = live_state
1290
+ .scan_rows(&LiveStateScanRequest {
1291
+ filter: LiveStateFilter {
1292
+ schema_keys: vec![
1293
+ DIRECTORY_SCHEMA_KEY.to_string(),
1294
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1295
+ ],
1296
+ version_ids: version_binding
1297
+ .map(|version_id| vec![version_id.to_string()])
1298
+ .unwrap_or_default(),
1299
+ ..Default::default()
1300
+ },
1301
+ ..Default::default()
1302
+ })
1303
+ .await?;
1304
+ let mut resolvers = directory_path_resolvers_from_state_rows(rows)?;
1305
+ if let Some(version_id) = version_binding {
1306
+ let key = filesystem_storage_scope_key(version_id, false, false, None);
1307
+ resolvers
1308
+ .entry(key)
1309
+ .or_insert_with(DirectoryPathResolver::default);
1310
+ }
1311
+ Ok(resolvers)
1312
+ }
1313
+
1314
+ fn lix_directory_record_batch(
1315
+ schema: &SchemaRef,
1316
+ rows: Vec<LiveStateRow>,
1317
+ ) -> Result<RecordBatch, LixError> {
1318
+ let mut directory_rows = Vec::<DirectoryDescriptorRecord>::new();
1319
+
1320
+ for row in rows {
1321
+ if row.schema_key != DIRECTORY_SCHEMA_KEY {
1322
+ continue;
1323
+ }
1324
+ let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1325
+ continue;
1326
+ };
1327
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
1328
+ .map_err(|error| {
1329
+ LixError::new(
1330
+ "LIX_ERROR_UNKNOWN",
1331
+ format!("invalid lix_directory_descriptor snapshot JSON: {error}"),
1332
+ )
1333
+ })?;
1334
+ directory_rows.push(DirectoryDescriptorRecord {
1335
+ id: snapshot.id,
1336
+ parent_id: snapshot.parent_id,
1337
+ name: snapshot.name,
1338
+ hidden: snapshot.hidden.unwrap_or(false),
1339
+ live: row,
1340
+ });
1341
+ }
1342
+
1343
+ let directory_paths = derive_directory_paths(&directory_rows)?;
1344
+ let mut ids = Vec::new();
1345
+ let mut paths = Vec::new();
1346
+ let mut parent_ids = Vec::new();
1347
+ let mut names = Vec::new();
1348
+ let mut hiddens = Vec::new();
1349
+ let mut entity_ids = Vec::new();
1350
+ let mut schema_keys = Vec::new();
1351
+ let mut file_ids = Vec::new();
1352
+ let mut schema_versions = Vec::new();
1353
+ let mut globals = Vec::new();
1354
+ let mut change_ids = Vec::new();
1355
+ let mut created_ats = Vec::new();
1356
+ let mut updated_ats = Vec::new();
1357
+ let mut commit_ids = Vec::new();
1358
+ let mut untracked_values = Vec::new();
1359
+ let mut metadata_values = Vec::new();
1360
+ let mut version_ids = Vec::new();
1361
+
1362
+ for directory in directory_rows {
1363
+ ids.push(Some(directory.id.clone()));
1364
+ paths.push(
1365
+ directory_paths
1366
+ .get(&(directory.live.version_id.clone(), directory.id.clone()))
1367
+ .cloned(),
1368
+ );
1369
+ parent_ids.push(directory.parent_id);
1370
+ names.push(Some(directory.name));
1371
+ hiddens.push(Some(directory.hidden));
1372
+ entity_ids.push(Some(directory.live.entity_id.as_string()?));
1373
+ schema_keys.push(Some(directory.live.schema_key));
1374
+ file_ids.push(directory.live.file_id);
1375
+ schema_versions.push(directory.live.schema_version);
1376
+ globals.push(Some(directory.live.global));
1377
+ change_ids.push(directory.live.change_id);
1378
+ created_ats.push(directory.live.created_at);
1379
+ updated_ats.push(directory.live.updated_at);
1380
+ commit_ids.push(directory.live.commit_id);
1381
+ untracked_values.push(Some(directory.live.untracked));
1382
+ metadata_values.push(directory.live.metadata.as_ref().map(serialize_row_metadata));
1383
+ version_ids.push(Some(directory.live.version_id));
1384
+ }
1385
+
1386
+ let mut columns = Vec::<ArrayRef>::with_capacity(schema.fields().len());
1387
+ for field in schema.fields() {
1388
+ let array: ArrayRef = match field.name().as_str() {
1389
+ "id" => Arc::new(StringArray::from(ids.clone())),
1390
+ "path" => Arc::new(StringArray::from(paths.clone())),
1391
+ "parent_id" => Arc::new(StringArray::from(parent_ids.clone())),
1392
+ "name" => Arc::new(StringArray::from(names.clone())),
1393
+ "hidden" => Arc::new(BooleanArray::from(hiddens.clone())),
1394
+ "lixcol_entity_id" => Arc::new(StringArray::from(entity_ids.clone())),
1395
+ "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1396
+ "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1397
+ "lixcol_schema_version" => Arc::new(StringArray::from(schema_versions.clone())),
1398
+ "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1399
+ "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1400
+ "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
1401
+ "lixcol_updated_at" => Arc::new(StringArray::from(updated_ats.clone())),
1402
+ "lixcol_commit_id" => Arc::new(StringArray::from(commit_ids.clone())),
1403
+ "lixcol_untracked" => Arc::new(BooleanArray::from(untracked_values.clone())),
1404
+ "lixcol_metadata" => Arc::new(StringArray::from(metadata_values.clone())),
1405
+ "lixcol_version_id" => Arc::new(StringArray::from(version_ids.clone())),
1406
+ other => {
1407
+ return Err(LixError::new(
1408
+ "LIX_ERROR_UNKNOWN",
1409
+ format!(
1410
+ "sql2 lix_directory provider does not support projected column '{other}'"
1411
+ ),
1412
+ ))
1413
+ }
1414
+ };
1415
+ columns.push(array);
1416
+ }
1417
+
1418
+ let options = RecordBatchOptions::new().with_row_count(Some(ids.len()));
1419
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
1420
+ LixError::new(
1421
+ "LIX_ERROR_UNKNOWN",
1422
+ format!("sql2 failed to build lix_directory record batch: {error}"),
1423
+ )
1424
+ })
1425
+ }
1426
+
1427
+ fn derive_directory_paths(
1428
+ rows: &[DirectoryDescriptorRecord],
1429
+ ) -> std::result::Result<BTreeMap<(String, String), String>, LixError> {
1430
+ let mut by_version = BTreeMap::<String, BTreeMap<String, &DirectoryDescriptorRecord>>::new();
1431
+ for row in rows {
1432
+ by_version
1433
+ .entry(row.live.version_id.clone())
1434
+ .or_default()
1435
+ .insert(row.id.clone(), row);
1436
+ }
1437
+
1438
+ let mut paths = BTreeMap::<(String, String), String>::new();
1439
+ for (version_id, records) in by_version {
1440
+ for directory_id in records.keys() {
1441
+ derive_directory_path_for(
1442
+ &version_id,
1443
+ directory_id,
1444
+ &records,
1445
+ &mut paths,
1446
+ &mut BTreeSet::new(),
1447
+ )?;
1448
+ }
1449
+ }
1450
+ Ok(paths)
1451
+ }
1452
+
1453
+ fn derive_directory_path_for(
1454
+ version_id: &str,
1455
+ directory_id: &str,
1456
+ records: &BTreeMap<String, &DirectoryDescriptorRecord>,
1457
+ paths: &mut BTreeMap<(String, String), String>,
1458
+ visiting: &mut BTreeSet<String>,
1459
+ ) -> std::result::Result<Option<String>, LixError> {
1460
+ if let Some(path) = paths.get(&(version_id.to_string(), directory_id.to_string())) {
1461
+ return Ok(Some(path.clone()));
1462
+ }
1463
+ if !visiting.insert(directory_id.to_string()) {
1464
+ return Err(directory_parent_cycle_error(version_id, directory_id));
1465
+ }
1466
+ let Some(row) = records.get(directory_id) else {
1467
+ visiting.remove(directory_id);
1468
+ return Ok(None);
1469
+ };
1470
+ let path = match row.parent_id.as_deref() {
1471
+ Some(parent_id) => {
1472
+ let Some(parent_path) =
1473
+ derive_directory_path_for(version_id, parent_id, records, paths, visiting)?
1474
+ else {
1475
+ visiting.remove(directory_id);
1476
+ return Ok(None);
1477
+ };
1478
+ format!("{parent_path}{}/", row.name)
1479
+ }
1480
+ None => format!("/{}/", row.name),
1481
+ };
1482
+ visiting.remove(directory_id);
1483
+ paths.insert(
1484
+ (version_id.to_string(), directory_id.to_string()),
1485
+ path.clone(),
1486
+ );
1487
+ Ok(Some(path))
1488
+ }
1489
+
1490
+ fn directory_parent_cycle_error(version_id: &str, directory_id: &str) -> LixError {
1491
+ LixError::new(
1492
+ LixError::CODE_CONSTRAINT_VIOLATION,
1493
+ format!(
1494
+ "lix_directory_descriptor parent_id cycle in version '{version_id}' while resolving directory '{directory_id}'"
1495
+ ),
1496
+ )
1497
+ }
1498
+
1499
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1500
+ let fields = match projection {
1501
+ Some(indices) => indices
1502
+ .iter()
1503
+ .map(|index| base_schema.field(*index).as_ref().clone())
1504
+ .collect::<Vec<_>>(),
1505
+ None => base_schema
1506
+ .fields()
1507
+ .iter()
1508
+ .map(|field| field.as_ref().clone())
1509
+ .collect::<Vec<_>>(),
1510
+ };
1511
+ Ok(Arc::new(Schema::new(fields)))
1512
+ }
1513
+
1514
+ fn lix_directory_scan_request(
1515
+ version_binding: Option<&str>,
1516
+ limit: Option<usize>,
1517
+ ) -> LiveStateScanRequest {
1518
+ LiveStateScanRequest {
1519
+ filter: LiveStateFilter {
1520
+ schema_keys: vec![DIRECTORY_SCHEMA_KEY.to_string()],
1521
+ version_ids: version_binding
1522
+ .map(|version_id| vec![version_id.to_string()])
1523
+ .unwrap_or_default(),
1524
+ ..LiveStateFilter::default()
1525
+ },
1526
+ projection: LiveStateProjection::default(),
1527
+ limit,
1528
+ }
1529
+ }
1530
+
1531
+ fn validate_lix_directory_update_assignments(
1532
+ schema: &SchemaRef,
1533
+ assignments: &[(String, Expr)],
1534
+ ) -> Result<()> {
1535
+ for (column_name, _) in assignments {
1536
+ schema.field_with_name(column_name).map_err(|_| {
1537
+ DataFusionError::Plan(format!(
1538
+ "UPDATE lix_directory failed: column '{column_name}' does not exist"
1539
+ ))
1540
+ })?;
1541
+ if !matches!(
1542
+ column_name.as_str(),
1543
+ "parent_id" | "name" | "hidden" | "lixcol_metadata"
1544
+ ) {
1545
+ return Err(DataFusionError::Execution(format!(
1546
+ "UPDATE lix_directory cannot stage read-only column '{column_name}'"
1547
+ )));
1548
+ }
1549
+ }
1550
+ Ok(())
1551
+ }
1552
+
1553
+ fn filter_lix_directory_batch(
1554
+ batch: RecordBatch,
1555
+ filters: &[Arc<dyn PhysicalExpr>],
1556
+ ) -> Result<RecordBatch> {
1557
+ let Some(mask) = evaluate_lix_directory_filters(&batch, filters)? else {
1558
+ return Ok(batch);
1559
+ };
1560
+ Ok(filter_record_batch(&batch, &mask)?)
1561
+ }
1562
+
1563
+ fn evaluate_lix_directory_filters(
1564
+ batch: &RecordBatch,
1565
+ filters: &[Arc<dyn PhysicalExpr>],
1566
+ ) -> Result<Option<BooleanArray>> {
1567
+ if filters.is_empty() {
1568
+ return Ok(None);
1569
+ }
1570
+
1571
+ let mut combined_mask: Option<BooleanArray> = None;
1572
+ for filter in filters {
1573
+ let result = filter.evaluate(batch)?;
1574
+ let array = result.into_array(batch.num_rows())?;
1575
+ let bool_array = array
1576
+ .as_any()
1577
+ .downcast_ref::<BooleanArray>()
1578
+ .ok_or_else(|| {
1579
+ DataFusionError::Execution("lix_directory filter was not boolean".to_string())
1580
+ })?;
1581
+ let normalized = bool_array
1582
+ .iter()
1583
+ .map(|value| Some(value == Some(true)))
1584
+ .collect::<BooleanArray>();
1585
+ combined_mask = Some(match combined_mask {
1586
+ Some(existing) => and(&existing, &normalized)?,
1587
+ None => normalized,
1588
+ });
1589
+ }
1590
+ Ok(combined_mask)
1591
+ }
1592
+
1593
+ fn dml_count_schema() -> SchemaRef {
1594
+ Arc::new(Schema::new(vec![Field::new(
1595
+ "count",
1596
+ DataType::UInt64,
1597
+ false,
1598
+ )]))
1599
+ }
1600
+
1601
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1602
+ RecordBatch::try_new(
1603
+ schema,
1604
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1605
+ )
1606
+ .map_err(DataFusionError::from)
1607
+ }
1608
+
1609
+ fn record_batch_has_non_null_column(batch: &RecordBatch, column_name: &str) -> Result<bool> {
1610
+ for row_index in 0..batch.num_rows() {
1611
+ if optional_scalar_value(batch, row_index, column_name)?
1612
+ .is_some_and(|value| !value.is_null())
1613
+ {
1614
+ return Ok(true);
1615
+ }
1616
+ }
1617
+ Ok(false)
1618
+ }
1619
+
1620
+ fn reject_read_only_lix_directory_insert_field(
1621
+ batch: &RecordBatch,
1622
+ row_index: usize,
1623
+ column_name: &str,
1624
+ ) -> Result<()> {
1625
+ if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
1626
+ return Err(DataFusionError::Execution(format!(
1627
+ "INSERT into lix_directory cannot stage read-only column '{column_name}'"
1628
+ )));
1629
+ }
1630
+ Ok(())
1631
+ }
1632
+
1633
+ fn required_string_value(
1634
+ batch: &RecordBatch,
1635
+ row_index: usize,
1636
+ column_name: &str,
1637
+ ) -> Result<String> {
1638
+ optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
1639
+ DataFusionError::Execution(format!(
1640
+ "INSERT into lix_directory requires non-null text column '{column_name}'"
1641
+ ))
1642
+ })
1643
+ }
1644
+
1645
+ fn update_required_string_value(
1646
+ batch: &RecordBatch,
1647
+ assignment_values: &UpdateAssignmentValues,
1648
+ row_index: usize,
1649
+ column_name: &str,
1650
+ ) -> Result<String> {
1651
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?.ok_or_else(
1652
+ || {
1653
+ DataFusionError::Execution(format!(
1654
+ "UPDATE lix_directory requires non-null text column '{column_name}'"
1655
+ ))
1656
+ },
1657
+ )
1658
+ }
1659
+
1660
+ fn update_optional_string_value(
1661
+ batch: &RecordBatch,
1662
+ assignment_values: &UpdateAssignmentValues,
1663
+ row_index: usize,
1664
+ column_name: &str,
1665
+ ) -> Result<Option<String>> {
1666
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1667
+ InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1668
+ InsertCell::Provided(SqlCell::Value(
1669
+ ScalarValue::Utf8(Some(value))
1670
+ | ScalarValue::Utf8View(Some(value))
1671
+ | ScalarValue::LargeUtf8(Some(value)),
1672
+ )) => Ok(Some(value)),
1673
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1674
+ "UPDATE lix_directory expected text-compatible column '{column_name}', got {other:?}"
1675
+ ))),
1676
+ }
1677
+ }
1678
+
1679
+ fn update_optional_metadata_value(
1680
+ batch: &RecordBatch,
1681
+ assignment_values: &UpdateAssignmentValues,
1682
+ row_index: usize,
1683
+ column_name: &str,
1684
+ context: &str,
1685
+ ) -> Result<Option<RowMetadata>> {
1686
+ update_optional_string_value(batch, assignment_values, row_index, column_name)?
1687
+ .map(|value| {
1688
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1689
+ })
1690
+ .transpose()
1691
+ }
1692
+
1693
+ fn update_optional_bool_value(
1694
+ batch: &RecordBatch,
1695
+ assignment_values: &UpdateAssignmentValues,
1696
+ row_index: usize,
1697
+ column_name: &str,
1698
+ ) -> Result<Option<bool>> {
1699
+ match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1700
+ InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1701
+ InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(Some(value)),
1702
+ InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1703
+ "UPDATE lix_directory expected boolean column '{column_name}', got {other:?}"
1704
+ ))),
1705
+ }
1706
+ }
1707
+
1708
+ fn optional_string_value(
1709
+ batch: &RecordBatch,
1710
+ row_index: usize,
1711
+ column_name: &str,
1712
+ ) -> Result<Option<String>> {
1713
+ match optional_scalar_value(batch, row_index, column_name)? {
1714
+ None
1715
+ | Some(ScalarValue::Null)
1716
+ | Some(ScalarValue::Utf8(None))
1717
+ | Some(ScalarValue::Utf8View(None))
1718
+ | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1719
+ Some(ScalarValue::Utf8(Some(value)))
1720
+ | Some(ScalarValue::Utf8View(Some(value)))
1721
+ | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1722
+ Some(other) => Err(DataFusionError::Execution(format!(
1723
+ "INSERT into lix_directory expected text-compatible column '{column_name}', got {other:?}"
1724
+ ))),
1725
+ }
1726
+ }
1727
+
1728
+ fn optional_metadata_value(
1729
+ batch: &RecordBatch,
1730
+ row_index: usize,
1731
+ column_name: &str,
1732
+ context: &str,
1733
+ ) -> Result<Option<RowMetadata>> {
1734
+ optional_string_value(batch, row_index, column_name)?
1735
+ .map(|value| {
1736
+ parse_row_metadata(&value, context).map_err(super::error::lix_error_to_datafusion_error)
1737
+ })
1738
+ .transpose()
1739
+ }
1740
+
1741
+ fn optional_bool_value(
1742
+ batch: &RecordBatch,
1743
+ row_index: usize,
1744
+ column_name: &str,
1745
+ ) -> Result<Option<bool>> {
1746
+ match optional_scalar_value(batch, row_index, column_name)? {
1747
+ None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1748
+ Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1749
+ Some(other) => Err(DataFusionError::Execution(format!(
1750
+ "INSERT into lix_directory expected boolean column '{column_name}', got {other:?}"
1751
+ ))),
1752
+ }
1753
+ }
1754
+
1755
+ fn optional_scalar_value(
1756
+ batch: &RecordBatch,
1757
+ row_index: usize,
1758
+ column_name: &str,
1759
+ ) -> Result<Option<ScalarValue>> {
1760
+ let schema = batch.schema();
1761
+ let column_index = match schema.index_of(column_name) {
1762
+ Ok(column_index) => column_index,
1763
+ Err(_) => return Ok(None),
1764
+ };
1765
+ if row_index >= batch.num_rows() {
1766
+ return Err(DataFusionError::Execution(format!(
1767
+ "row index {row_index} out of bounds for lix_directory batch with {} rows",
1768
+ batch.num_rows()
1769
+ )));
1770
+ }
1771
+ ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1772
+ .map(Some)
1773
+ .map_err(|error| {
1774
+ DataFusionError::Execution(format!(
1775
+ "failed to decode lix_directory column '{column_name}' at row {row_index}: {error}"
1776
+ ))
1777
+ })
1778
+ }
1779
+
1780
+ fn lix_directory_schema() -> SchemaRef {
1781
+ Arc::new(Schema::new(vec![
1782
+ Field::new("id", DataType::Utf8, true),
1783
+ Field::new("path", DataType::Utf8, true),
1784
+ Field::new("parent_id", DataType::Utf8, true),
1785
+ Field::new("name", DataType::Utf8, false),
1786
+ Field::new("hidden", DataType::Boolean, true),
1787
+ Field::new("lixcol_entity_id", DataType::Utf8, false),
1788
+ Field::new("lixcol_schema_key", DataType::Utf8, false),
1789
+ Field::new("lixcol_file_id", DataType::Utf8, true),
1790
+ Field::new("lixcol_schema_version", DataType::Utf8, false),
1791
+ Field::new("lixcol_global", DataType::Boolean, true),
1792
+ Field::new("lixcol_change_id", DataType::Utf8, true),
1793
+ Field::new("lixcol_created_at", DataType::Utf8, true),
1794
+ Field::new("lixcol_updated_at", DataType::Utf8, true),
1795
+ Field::new("lixcol_commit_id", DataType::Utf8, true),
1796
+ Field::new("lixcol_untracked", DataType::Boolean, true),
1797
+ json_field("lixcol_metadata", true),
1798
+ ]))
1799
+ }
1800
+
1801
+ fn lix_directory_by_version_schema() -> SchemaRef {
1802
+ let mut fields = lix_directory_schema()
1803
+ .fields()
1804
+ .iter()
1805
+ .map(|field| field.as_ref().clone())
1806
+ .collect::<Vec<_>>();
1807
+ fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
1808
+ Arc::new(Schema::new(fields))
1809
+ }
1810
+
1811
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1812
+ super::error::datafusion_error_to_lix_error(error)
1813
+ }
1814
+
1815
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1816
+ super::error::lix_error_to_datafusion_error(error)
1817
+ }
1818
+
1819
+ #[cfg(test)]
1820
+ mod tests {
1821
+ use std::collections::{BTreeMap, BTreeSet};
1822
+ use std::sync::Arc;
1823
+
1824
+ use async_trait::async_trait;
1825
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray};
1826
+ use datafusion::arrow::datatypes::{DataType, Field, Schema};
1827
+ use datafusion::arrow::record_batch::RecordBatch;
1828
+ use datafusion::execution::TaskContext;
1829
+ use serde_json::json;
1830
+
1831
+ use crate::binary_cas::BlobDataReader;
1832
+ use crate::functions::{
1833
+ FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1834
+ };
1835
+ use crate::live_state::{
1836
+ LiveStateReader, LiveStateRow, LiveStateRowRequest, LiveStateScanRequest,
1837
+ };
1838
+ use crate::sql2::dml::InsertSink;
1839
+ use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1840
+ use crate::transaction::types::{StageRow, StageWrite, StageWriteMode, StageWriteOutcome};
1841
+ use crate::LixError;
1842
+
1843
+ use super::{
1844
+ derive_directory_path_for, directory_path_resolvers_from_state_rows,
1845
+ lix_directory_by_version_schema, lix_directory_insert_origin, lix_directory_record_batch,
1846
+ lix_directory_recursive_delete_rows_from_batch, lix_directory_write_rows_from_batch,
1847
+ lix_directory_write_rows_from_batch_with_path_resolvers, DirectoryDescriptorRecord,
1848
+ LixDirectoryInsertSink, VersionBinding,
1849
+ };
1850
+ use crate::sql2::filesystem_visibility::VisibleFilesystem;
1851
+
1852
+ fn test_id_generator(ids: &'static [&'static str]) -> impl FnMut() -> String {
1853
+ let mut ids = ids.iter();
1854
+ move || ids.next().expect("test id should exist").to_string()
1855
+ }
1856
+
1857
+ fn test_functions() -> FunctionProviderHandle {
1858
+ SharedFunctionProvider::new(
1859
+ Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1860
+ )
1861
+ }
1862
+
1863
+ #[derive(Default)]
1864
+ struct CapturingWriteContext {
1865
+ rows: Vec<LiveStateRow>,
1866
+ writes: Vec<StageWrite>,
1867
+ }
1868
+
1869
+ #[async_trait]
1870
+ impl BlobDataReader for CapturingWriteContext {
1871
+ async fn load_bytes_many(
1872
+ &self,
1873
+ hashes: &[crate::binary_cas::BlobHash],
1874
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1875
+ Ok(crate::binary_cas::BlobBytesBatch::missing(hashes.len()))
1876
+ }
1877
+ }
1878
+
1879
+ #[async_trait]
1880
+ impl SqlWriteExecutionContext for CapturingWriteContext {
1881
+ fn active_version_id(&self) -> &str {
1882
+ "version-a"
1883
+ }
1884
+
1885
+ fn functions(&self) -> FunctionProviderHandle {
1886
+ test_functions()
1887
+ }
1888
+
1889
+ fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1890
+ Ok(Vec::new())
1891
+ }
1892
+
1893
+ async fn load_bytes_many(
1894
+ &mut self,
1895
+ hashes: &[crate::binary_cas::BlobHash],
1896
+ ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1897
+ BlobDataReader::load_bytes_many(self, hashes).await
1898
+ }
1899
+
1900
+ async fn scan_live_state(
1901
+ &mut self,
1902
+ _request: &LiveStateScanRequest,
1903
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1904
+ Ok(self.rows.clone())
1905
+ }
1906
+
1907
+ async fn load_version_head(
1908
+ &mut self,
1909
+ version_id: &str,
1910
+ ) -> Result<Option<String>, LixError> {
1911
+ if version_id == "ghost-version" {
1912
+ return Ok(None);
1913
+ }
1914
+ Ok(Some(format!("commit-{version_id}")))
1915
+ }
1916
+
1917
+ async fn stage_write(&mut self, write: StageWrite) -> Result<StageWriteOutcome, LixError> {
1918
+ self.writes.push(write);
1919
+ Ok(StageWriteOutcome { count: 0 })
1920
+ }
1921
+ }
1922
+
1923
+ #[derive(Default)]
1924
+ #[allow(dead_code)]
1925
+ struct RowsLiveStateReader {
1926
+ rows: Vec<LiveStateRow>,
1927
+ }
1928
+
1929
+ #[async_trait]
1930
+ impl LiveStateReader for RowsLiveStateReader {
1931
+ async fn scan_rows(
1932
+ &self,
1933
+ _request: &LiveStateScanRequest,
1934
+ ) -> Result<Vec<LiveStateRow>, LixError> {
1935
+ Ok(self.rows.clone())
1936
+ }
1937
+
1938
+ async fn load_row(
1939
+ &self,
1940
+ _request: &LiveStateRowRequest,
1941
+ ) -> Result<Option<LiveStateRow>, LixError> {
1942
+ Ok(None)
1943
+ }
1944
+ }
1945
+
1946
+ fn live_row(entity_id: &str, version_id: &str, snapshot_content: &str) -> LiveStateRow {
1947
+ live_filesystem_row(
1948
+ entity_id,
1949
+ super::DIRECTORY_SCHEMA_KEY,
1950
+ None,
1951
+ version_id,
1952
+ snapshot_content,
1953
+ )
1954
+ }
1955
+
1956
+ fn live_filesystem_row(
1957
+ entity_id: &str,
1958
+ schema_key: &str,
1959
+ file_id: Option<&str>,
1960
+ version_id: &str,
1961
+ snapshot_content: &str,
1962
+ ) -> LiveStateRow {
1963
+ LiveStateRow {
1964
+ entity_id: crate::entity_identity::EntityIdentity::from_string(entity_id)
1965
+ .expect("entity id should decode"),
1966
+ schema_key: schema_key.to_string(),
1967
+ file_id: file_id.map(ToOwned::to_owned),
1968
+ snapshot_content: Some(snapshot_content.to_string()),
1969
+ metadata: Some(json!({"source": "test"})),
1970
+ schema_version: "1".to_string(),
1971
+ version_id: version_id.to_string(),
1972
+ change_id: Some(format!("change-{entity_id}")),
1973
+ commit_id: Some(format!("commit-{entity_id}")),
1974
+ global: false,
1975
+ untracked: false,
1976
+ created_at: "2026-04-23T00:00:00Z".to_string(),
1977
+ updated_at: "2026-04-23T01:00:00Z".to_string(),
1978
+ }
1979
+ }
1980
+
1981
+ fn filesystem_rows() -> Vec<LiveStateRow> {
1982
+ vec![
1983
+ live_filesystem_row(
1984
+ "dir-docs",
1985
+ "lix_directory_descriptor",
1986
+ None,
1987
+ "version-a",
1988
+ r#"{"id":"dir-docs","parent_id":null,"name":"docs","hidden":false}"#,
1989
+ ),
1990
+ live_filesystem_row(
1991
+ "dir-guides",
1992
+ "lix_directory_descriptor",
1993
+ None,
1994
+ "version-a",
1995
+ r#"{"id":"dir-guides","parent_id":"dir-docs","name":"guides","hidden":false}"#,
1996
+ ),
1997
+ live_filesystem_row(
1998
+ "file-index",
1999
+ "lix_file_descriptor",
2000
+ None,
2001
+ "version-a",
2002
+ r#"{"id":"file-index","directory_id":"dir-docs","name":"index.md","hidden":false}"#,
2003
+ ),
2004
+ live_filesystem_row(
2005
+ "file-readme",
2006
+ "lix_file_descriptor",
2007
+ None,
2008
+ "version-a",
2009
+ r#"{"id":"file-readme","directory_id":"dir-guides","name":"readme.md","hidden":false}"#,
2010
+ ),
2011
+ live_filesystem_row(
2012
+ "file-readme",
2013
+ "lix_binary_blob_ref",
2014
+ Some("file-readme"),
2015
+ "version-a",
2016
+ r#"{"id":"file-readme","blob_hash":"abc123","size_bytes":5}"#,
2017
+ ),
2018
+ ]
2019
+ }
2020
+
2021
+ fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2022
+ Arc::new(StringArray::from(values)) as ArrayRef
2023
+ }
2024
+
2025
+ fn directory_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2026
+ let mut fields = vec![
2027
+ Field::new("id", DataType::Utf8, false),
2028
+ Field::new("parent_id", DataType::Utf8, true),
2029
+ Field::new("name", DataType::Utf8, false),
2030
+ Field::new("hidden", DataType::Boolean, false),
2031
+ Field::new("lixcol_global", DataType::Boolean, false),
2032
+ Field::new("lixcol_metadata", DataType::Utf8, true),
2033
+ ];
2034
+ let mut columns = vec![
2035
+ string_column(vec![Some("dir-docs")]),
2036
+ string_column(vec![None]),
2037
+ string_column(vec![Some("docs")]),
2038
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2039
+ Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2040
+ string_column(vec![Some("{\"source\":\"directory\"}")]),
2041
+ ];
2042
+ if include_version {
2043
+ fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2044
+ columns.push(string_column(vec![Some("version-a")]));
2045
+ }
2046
+ RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2047
+ .expect("directory insert batch should build")
2048
+ }
2049
+
2050
+ fn directory_path_insert_batch(path: &str) -> RecordBatch {
2051
+ RecordBatch::try_new(
2052
+ Arc::new(Schema::new(vec![
2053
+ Field::new("id", DataType::Utf8, false),
2054
+ Field::new("path", DataType::Utf8, true),
2055
+ Field::new("hidden", DataType::Boolean, false),
2056
+ Field::new("lixcol_version_id", DataType::Utf8, false),
2057
+ ])),
2058
+ vec![
2059
+ string_column(vec![Some("dir-nested")]),
2060
+ string_column(vec![Some(path)]),
2061
+ Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2062
+ string_column(vec![Some("version-a")]),
2063
+ ],
2064
+ )
2065
+ .expect("directory path insert batch should build")
2066
+ }
2067
+
2068
+ fn directory_delete_batch(ids: &[&str]) -> RecordBatch {
2069
+ RecordBatch::try_new(
2070
+ Arc::new(Schema::new(vec![
2071
+ Field::new("id", DataType::Utf8, false),
2072
+ Field::new("lixcol_version_id", DataType::Utf8, false),
2073
+ ])),
2074
+ vec![
2075
+ string_column(ids.iter().copied().map(Some).collect::<Vec<_>>()),
2076
+ string_column(vec![Some("version-a"); ids.len()]),
2077
+ ],
2078
+ )
2079
+ .expect("directory delete batch should build")
2080
+ }
2081
+
2082
+ #[test]
2083
+ fn derives_nested_directory_paths() {
2084
+ let root = DirectoryDescriptorRecord {
2085
+ id: "dir-docs".to_string(),
2086
+ parent_id: None,
2087
+ name: "docs".to_string(),
2088
+ hidden: false,
2089
+ live: live_row(
2090
+ "dir-docs",
2091
+ "version-a",
2092
+ "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2093
+ ),
2094
+ };
2095
+ let child = DirectoryDescriptorRecord {
2096
+ id: "dir-guides".to_string(),
2097
+ parent_id: Some("dir-docs".to_string()),
2098
+ name: "guides".to_string(),
2099
+ hidden: false,
2100
+ live: live_row(
2101
+ "dir-guides",
2102
+ "version-a",
2103
+ "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":false}",
2104
+ ),
2105
+ };
2106
+ let mut records = BTreeMap::new();
2107
+ records.insert(root.id.clone(), &root);
2108
+ records.insert(child.id.clone(), &child);
2109
+ let mut paths = BTreeMap::new();
2110
+
2111
+ assert_eq!(
2112
+ derive_directory_path_for(
2113
+ "version-a",
2114
+ "dir-guides",
2115
+ &records,
2116
+ &mut paths,
2117
+ &mut BTreeSet::new()
2118
+ )
2119
+ .expect("path derivation should succeed"),
2120
+ Some("/docs/guides/".to_string())
2121
+ );
2122
+ }
2123
+
2124
+ #[test]
2125
+ fn record_batch_projects_directory_columns() {
2126
+ let rows = vec![
2127
+ live_row(
2128
+ "dir-docs",
2129
+ "version-a",
2130
+ "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2131
+ ),
2132
+ live_row(
2133
+ "dir-guides",
2134
+ "version-a",
2135
+ "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":true}",
2136
+ ),
2137
+ ];
2138
+
2139
+ let batch = lix_directory_record_batch(&lix_directory_by_version_schema(), rows)
2140
+ .expect("directory batch should build");
2141
+
2142
+ assert_eq!(batch.num_rows(), 2);
2143
+ assert_eq!(
2144
+ batch
2145
+ .column_by_name("path")
2146
+ .expect("path column")
2147
+ .as_any()
2148
+ .downcast_ref::<StringArray>()
2149
+ .expect("path is string")
2150
+ .value(1),
2151
+ "/docs/guides/"
2152
+ );
2153
+ assert_eq!(
2154
+ batch
2155
+ .column_by_name("lixcol_version_id")
2156
+ .expect("version column")
2157
+ .as_any()
2158
+ .downcast_ref::<StringArray>()
2159
+ .expect("version is string")
2160
+ .value(1),
2161
+ "version-a"
2162
+ );
2163
+ }
2164
+
2165
+ #[test]
2166
+ fn decodes_directory_insert_into_lix_state_write_row() {
2167
+ let rows = lix_directory_write_rows_from_batch(&directory_insert_batch(true, false), None)
2168
+ .expect("directory batch should decode");
2169
+
2170
+ assert_eq!(
2171
+ rows,
2172
+ vec![StageRow {
2173
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("dir-docs")),
2174
+ schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2175
+ file_id: None,
2176
+ snapshot_content: Some(
2177
+ "{\"hidden\":false,\"id\":\"dir-docs\",\"name\":\"docs\",\"parent_id\":null}"
2178
+ .to_string()
2179
+ ),
2180
+ metadata: Some(json!({"source": "directory"})),
2181
+ origin: Some(lix_directory_insert_origin("lix_directory", "dir-docs")),
2182
+ schema_version: "1".to_string(),
2183
+ created_at: None,
2184
+ updated_at: None,
2185
+ global: false,
2186
+ change_id: None,
2187
+ commit_id: None,
2188
+ untracked: false,
2189
+ version_id: "version-a".to_string(),
2190
+ }]
2191
+ );
2192
+ }
2193
+
2194
+ #[test]
2195
+ fn active_directory_insert_defaults_version_id() {
2196
+ let rows = lix_directory_write_rows_from_batch(
2197
+ &directory_insert_batch(false, false),
2198
+ Some("version-active"),
2199
+ )
2200
+ .expect("active directory batch should decode");
2201
+
2202
+ assert_eq!(rows[0].version_id, "version-active");
2203
+ }
2204
+
2205
+ #[test]
2206
+ fn by_version_directory_insert_requires_version_id_for_non_global_rows() {
2207
+ let error =
2208
+ lix_directory_write_rows_from_batch(&directory_insert_batch(false, false), None)
2209
+ .expect_err("by-version insert should require version id");
2210
+
2211
+ assert!(
2212
+ error.to_string().contains("requires lixcol_version_id"),
2213
+ "unexpected error: {error}"
2214
+ );
2215
+ }
2216
+
2217
+ #[test]
2218
+ fn directory_insert_rejects_global_with_non_global_version_id() {
2219
+ let error = lix_directory_write_rows_from_batch(&directory_insert_batch(true, true), None)
2220
+ .expect_err("global directory write should reject conflicting version id");
2221
+
2222
+ assert!(
2223
+ error
2224
+ .to_string()
2225
+ .contains("cannot set lixcol_global=true with non-global lixcol_version_id"),
2226
+ "unexpected error: {error}"
2227
+ );
2228
+ }
2229
+
2230
+ #[test]
2231
+ fn directory_path_insert_reuses_existing_parent_descriptor() {
2232
+ let existing_rows = vec![live_row(
2233
+ "dir-docs",
2234
+ "version-a",
2235
+ "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2236
+ )];
2237
+ let mut resolvers = directory_path_resolvers_from_state_rows(existing_rows)
2238
+ .expect("existing directory rows should seed paths");
2239
+
2240
+ let rows = lix_directory_write_rows_from_batch_with_path_resolvers(
2241
+ &directory_path_insert_batch("/docs/nested/"),
2242
+ None,
2243
+ "lix_directory",
2244
+ &mut resolvers,
2245
+ &mut test_id_generator(&["should-not-be-used"]),
2246
+ )
2247
+ .expect("directory path batch should decode");
2248
+
2249
+ assert_eq!(rows.len(), 1);
2250
+ let snapshot: serde_json::Value =
2251
+ serde_json::from_str(rows[0].snapshot_content.as_deref().unwrap()).unwrap();
2252
+ assert_eq!(snapshot["id"], "dir-nested");
2253
+ assert_eq!(snapshot["parent_id"], "dir-docs");
2254
+ assert_eq!(snapshot["name"], "nested");
2255
+ }
2256
+
2257
+ #[test]
2258
+ fn recursive_directory_delete_deletes_nested_dirs_files_and_blob_refs() {
2259
+ let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2260
+ .expect("visible filesystem should build");
2261
+ let mut visible_filesystems = BTreeMap::new();
2262
+ visible_filesystems.insert("version-a".to_string(), visible_filesystem);
2263
+
2264
+ let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2265
+ &directory_delete_batch(&["dir-docs"]),
2266
+ None,
2267
+ &visible_filesystems,
2268
+ )
2269
+ .expect("recursive directory delete should plan");
2270
+
2271
+ assert_eq!(count, 4);
2272
+ assert_eq!(
2273
+ rows.iter()
2274
+ .map(|row| {
2275
+ (
2276
+ row.schema_key.as_str(),
2277
+ row.entity_id
2278
+ .as_ref()
2279
+ .expect("planned delete row should carry entity_id")
2280
+ .as_string()
2281
+ .expect("planned delete row should project entity_id"),
2282
+ )
2283
+ })
2284
+ .collect::<Vec<_>>(),
2285
+ vec![
2286
+ ("lix_file_descriptor", "file-readme".to_string()),
2287
+ ("lix_binary_blob_ref", "file-readme".to_string()),
2288
+ ("lix_directory_descriptor", "dir-guides".to_string()),
2289
+ ("lix_file_descriptor", "file-index".to_string()),
2290
+ ("lix_directory_descriptor", "dir-docs".to_string()),
2291
+ ]
2292
+ );
2293
+ assert!(rows.iter().all(|row| row.snapshot_content.is_none()));
2294
+ }
2295
+
2296
+ #[test]
2297
+ fn recursive_directory_delete_dedupes_overlapping_parent_and_child() {
2298
+ let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2299
+ .expect("visible filesystem should build");
2300
+ let mut visible_filesystems = BTreeMap::new();
2301
+ visible_filesystems.insert("version-a".to_string(), visible_filesystem);
2302
+
2303
+ let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2304
+ &directory_delete_batch(&["dir-docs", "dir-guides"]),
2305
+ None,
2306
+ &visible_filesystems,
2307
+ )
2308
+ .expect("recursive directory delete should plan");
2309
+
2310
+ assert_eq!(count, 4);
2311
+ let identities = rows
2312
+ .iter()
2313
+ .map(|row| {
2314
+ (
2315
+ row.schema_key.clone(),
2316
+ row.entity_id.clone(),
2317
+ row.file_id.clone(),
2318
+ row.version_id.clone(),
2319
+ )
2320
+ })
2321
+ .collect::<std::collections::BTreeSet<_>>();
2322
+ assert_eq!(identities.len(), rows.len());
2323
+ assert_eq!(rows.len(), 5);
2324
+ }
2325
+
2326
+ #[tokio::test]
2327
+ async fn directory_insert_sink_stages_decoded_lix_state_rows() {
2328
+ let mut write_context = CapturingWriteContext::default();
2329
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2330
+ let batch = directory_insert_batch(true, false);
2331
+ let sink = LixDirectoryInsertSink::new(
2332
+ batch.schema(),
2333
+ write_ctx,
2334
+ test_functions(),
2335
+ VersionBinding::explicit(),
2336
+ );
2337
+ let count = sink
2338
+ .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2339
+ .await
2340
+ .expect("directory sink should stage write");
2341
+
2342
+ assert_eq!(count, 1);
2343
+ assert_eq!(
2344
+ write_context.writes.as_slice(),
2345
+ &[StageWrite::Rows { mode: StageWriteMode::Insert, rows: vec![StageRow {
2346
+ entity_id: Some(crate::entity_identity::EntityIdentity::single("dir-docs")),
2347
+ schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2348
+ file_id: None,
2349
+ snapshot_content: Some(
2350
+ "{\"hidden\":false,\"id\":\"dir-docs\",\"name\":\"docs\",\"parent_id\":null}"
2351
+ .to_string()
2352
+ ),
2353
+ metadata: Some(json!({"source": "directory"})),
2354
+ origin: Some(lix_directory_insert_origin(
2355
+ "lix_directory_by_version",
2356
+ "dir-docs"
2357
+ )),
2358
+ schema_version: "1".to_string(),
2359
+ created_at: None,
2360
+ updated_at: None,
2361
+ global: false,
2362
+ change_id: None,
2363
+ commit_id: None,
2364
+ untracked: false,
2365
+ version_id: "version-a".to_string(),
2366
+ }]
2367
+ }]
2368
+ );
2369
+ }
2370
+
2371
+ #[tokio::test]
2372
+ async fn directory_insert_sink_seeds_path_resolver_from_live_state() {
2373
+ let mut write_context = CapturingWriteContext {
2374
+ rows: vec![live_row(
2375
+ "dir-docs",
2376
+ "version-a",
2377
+ "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2378
+ )],
2379
+ writes: Vec::new(),
2380
+ };
2381
+ let write_ctx = SqlWriteContext::new(&mut write_context);
2382
+ let batch = directory_path_insert_batch("/docs/nested/");
2383
+ let sink = LixDirectoryInsertSink::new(
2384
+ batch.schema(),
2385
+ write_ctx,
2386
+ test_functions(),
2387
+ VersionBinding::explicit(),
2388
+ );
2389
+ let count = sink
2390
+ .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2391
+ .await
2392
+ .expect("directory sink should stage path write");
2393
+
2394
+ assert_eq!(count, 1);
2395
+ let [StageWrite::Rows { rows, .. }] = write_context.writes.as_slice() else {
2396
+ panic!("expected one directory staged write");
2397
+ };
2398
+ assert_eq!(rows.len(), 1);
2399
+ let snapshot: serde_json::Value =
2400
+ serde_json::from_str(rows[0].snapshot_content.as_deref().unwrap()).unwrap();
2401
+ assert_eq!(snapshot["id"], "dir-nested");
2402
+ assert_eq!(snapshot["parent_id"], "dir-docs");
2403
+ assert_eq!(snapshot["name"], "nested");
2404
+ }
2405
+ }