@lix-js/sdk 0.6.0-preview.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +39 -201
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +20 -50
  16. package/SKILL.md +0 -506
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -821
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -26
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -303
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/kv.rs +0 -358
  31. package/dist-engine-src/src/backend/mod.rs +0 -12
  32. package/dist-engine-src/src/backend/testing.rs +0 -658
  33. package/dist-engine-src/src/backend/types.rs +0 -96
  34. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  35. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  36. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  37. package/dist-engine-src/src/binary_cas/kv.rs +0 -1063
  38. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  39. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  40. package/dist-engine-src/src/catalog/context.rs +0 -412
  41. package/dist-engine-src/src/catalog/mod.rs +0 -10
  42. package/dist-engine-src/src/catalog/schema.rs +0 -4
  43. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  44. package/dist-engine-src/src/cel/context.rs +0 -86
  45. package/dist-engine-src/src/cel/error.rs +0 -19
  46. package/dist-engine-src/src/cel/mod.rs +0 -8
  47. package/dist-engine-src/src/cel/provider.rs +0 -9
  48. package/dist-engine-src/src/cel/runtime.rs +0 -167
  49. package/dist-engine-src/src/cel/value.rs +0 -50
  50. package/dist-engine-src/src/commit_graph/context.rs +0 -901
  51. package/dist-engine-src/src/commit_graph/mod.rs +0 -11
  52. package/dist-engine-src/src/commit_graph/types.rs +0 -109
  53. package/dist-engine-src/src/commit_graph/walker.rs +0 -756
  54. package/dist-engine-src/src/commit_store/codec.rs +0 -887
  55. package/dist-engine-src/src/commit_store/context.rs +0 -944
  56. package/dist-engine-src/src/commit_store/materialization.rs +0 -84
  57. package/dist-engine-src/src/commit_store/mod.rs +0 -16
  58. package/dist-engine-src/src/commit_store/storage.rs +0 -600
  59. package/dist-engine-src/src/commit_store/types.rs +0 -215
  60. package/dist-engine-src/src/common/error.rs +0 -313
  61. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  62. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  63. package/dist-engine-src/src/common/identity.rs +0 -145
  64. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  65. package/dist-engine-src/src/common/metadata.rs +0 -40
  66. package/dist-engine-src/src/common/mod.rs +0 -23
  67. package/dist-engine-src/src/common/types.rs +0 -105
  68. package/dist-engine-src/src/common/wire.rs +0 -222
  69. package/dist-engine-src/src/domain.rs +0 -324
  70. package/dist-engine-src/src/engine.rs +0 -225
  71. package/dist-engine-src/src/entity_identity.rs +0 -405
  72. package/dist-engine-src/src/functions/context.rs +0 -292
  73. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  74. package/dist-engine-src/src/functions/mod.rs +0 -18
  75. package/dist-engine-src/src/functions/provider.rs +0 -130
  76. package/dist-engine-src/src/functions/state.rs +0 -336
  77. package/dist-engine-src/src/functions/types.rs +0 -37
  78. package/dist-engine-src/src/init.rs +0 -558
  79. package/dist-engine-src/src/json_store/compression.rs +0 -77
  80. package/dist-engine-src/src/json_store/context.rs +0 -423
  81. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  82. package/dist-engine-src/src/json_store/mod.rs +0 -12
  83. package/dist-engine-src/src/json_store/store.rs +0 -1109
  84. package/dist-engine-src/src/json_store/types.rs +0 -217
  85. package/dist-engine-src/src/lib.rs +0 -62
  86. package/dist-engine-src/src/live_state/context.rs +0 -2019
  87. package/dist-engine-src/src/live_state/mod.rs +0 -15
  88. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  89. package/dist-engine-src/src/live_state/reader.rs +0 -23
  90. package/dist-engine-src/src/live_state/types.rs +0 -222
  91. package/dist-engine-src/src/live_state/visibility.rs +0 -223
  92. package/dist-engine-src/src/plugin/archive.rs +0 -438
  93. package/dist-engine-src/src/plugin/component.rs +0 -183
  94. package/dist-engine-src/src/plugin/install.rs +0 -619
  95. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  96. package/dist-engine-src/src/plugin/materializer.rs +0 -477
  97. package/dist-engine-src/src/plugin/mod.rs +0 -33
  98. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -118
  99. package/dist-engine-src/src/plugin/storage.rs +0 -74
  100. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  101. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  102. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  103. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  104. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  105. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  106. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  107. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  108. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  109. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  110. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  111. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  112. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  113. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  114. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  115. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -34
  116. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -48
  117. package/dist-engine-src/src/schema/builtin/mod.rs +0 -222
  118. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  119. package/dist-engine-src/src/schema/definition.json +0 -187
  120. package/dist-engine-src/src/schema/definition.rs +0 -742
  121. package/dist-engine-src/src/schema/key.rs +0 -138
  122. package/dist-engine-src/src/schema/mod.rs +0 -20
  123. package/dist-engine-src/src/schema/seed.rs +0 -14
  124. package/dist-engine-src/src/schema/tests.rs +0 -780
  125. package/dist-engine-src/src/session/context.rs +0 -404
  126. package/dist-engine-src/src/session/create_version.rs +0 -88
  127. package/dist-engine-src/src/session/execute.rs +0 -541
  128. package/dist-engine-src/src/session/merge/analysis.rs +0 -102
  129. package/dist-engine-src/src/session/merge/apply.rs +0 -23
  130. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  131. package/dist-engine-src/src/session/merge/mod.rs +0 -11
  132. package/dist-engine-src/src/session/merge/stats.rs +0 -65
  133. package/dist-engine-src/src/session/merge/version.rs +0 -427
  134. package/dist-engine-src/src/session/mod.rs +0 -27
  135. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +0 -100
  136. package/dist-engine-src/src/session/switch_version.rs +0 -110
  137. package/dist-engine-src/src/session/transaction.rs +0 -76
  138. package/dist-engine-src/src/sql2/change_provider.rs +0 -331
  139. package/dist-engine-src/src/sql2/classify.rs +0 -174
  140. package/dist-engine-src/src/sql2/context.rs +0 -311
  141. package/dist-engine-src/src/sql2/directory_history_provider.rs +0 -631
  142. package/dist-engine-src/src/sql2/directory_provider.rs +0 -2453
  143. package/dist-engine-src/src/sql2/dml.rs +0 -148
  144. package/dist-engine-src/src/sql2/entity_history_provider.rs +0 -440
  145. package/dist-engine-src/src/sql2/entity_provider.rs +0 -3211
  146. package/dist-engine-src/src/sql2/error.rs +0 -215
  147. package/dist-engine-src/src/sql2/execute.rs +0 -3533
  148. package/dist-engine-src/src/sql2/file_history_provider.rs +0 -910
  149. package/dist-engine-src/src/sql2/file_provider.rs +0 -3679
  150. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1490
  151. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  152. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  153. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  154. package/dist-engine-src/src/sql2/history_provider.rs +0 -412
  155. package/dist-engine-src/src/sql2/history_route.rs +0 -657
  156. package/dist-engine-src/src/sql2/lix_state_provider.rs +0 -2512
  157. package/dist-engine-src/src/sql2/mod.rs +0 -47
  158. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -246
  159. package/dist-engine-src/src/sql2/public_bind/assignment.rs +0 -46
  160. package/dist-engine-src/src/sql2/public_bind/capability.rs +0 -41
  161. package/dist-engine-src/src/sql2/public_bind/dml.rs +0 -172
  162. package/dist-engine-src/src/sql2/public_bind/mod.rs +0 -26
  163. package/dist-engine-src/src/sql2/public_bind/table.rs +0 -168
  164. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  165. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  166. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  167. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  168. package/dist-engine-src/src/sql2/session.rs +0 -132
  169. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  170. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +0 -53
  171. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  172. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  173. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  174. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  175. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  176. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  177. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  178. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  179. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -89
  180. package/dist-engine-src/src/sql2/udfs/public_call.rs +0 -238
  181. package/dist-engine-src/src/sql2/version_provider.rs +0 -1202
  182. package/dist-engine-src/src/sql2/version_scope.rs +0 -394
  183. package/dist-engine-src/src/sql2/write_normalization.rs +0 -345
  184. package/dist-engine-src/src/storage/context.rs +0 -356
  185. package/dist-engine-src/src/storage/mod.rs +0 -14
  186. package/dist-engine-src/src/storage/read_scope.rs +0 -88
  187. package/dist-engine-src/src/storage/types.rs +0 -501
  188. package/dist-engine-src/src/storage_bench.rs +0 -4863
  189. package/dist-engine-src/src/test_support.rs +0 -228
  190. package/dist-engine-src/src/tracked_state/by_file_index.rs +0 -98
  191. package/dist-engine-src/src/tracked_state/codec.rs +0 -2085
  192. package/dist-engine-src/src/tracked_state/context.rs +0 -1867
  193. package/dist-engine-src/src/tracked_state/diff.rs +0 -686
  194. package/dist-engine-src/src/tracked_state/materialization.rs +0 -403
  195. package/dist-engine-src/src/tracked_state/materializer.rs +0 -488
  196. package/dist-engine-src/src/tracked_state/merge.rs +0 -492
  197. package/dist-engine-src/src/tracked_state/mod.rs +0 -32
  198. package/dist-engine-src/src/tracked_state/storage.rs +0 -375
  199. package/dist-engine-src/src/tracked_state/tree.rs +0 -3187
  200. package/dist-engine-src/src/tracked_state/types.rs +0 -231
  201. package/dist-engine-src/src/transaction/commit.rs +0 -1484
  202. package/dist-engine-src/src/transaction/context.rs +0 -1548
  203. package/dist-engine-src/src/transaction/live_state_overlay.rs +0 -35
  204. package/dist-engine-src/src/transaction/mod.rs +0 -13
  205. package/dist-engine-src/src/transaction/normalization.rs +0 -890
  206. package/dist-engine-src/src/transaction/prep.rs +0 -37
  207. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -149
  208. package/dist-engine-src/src/transaction/staging.rs +0 -1731
  209. package/dist-engine-src/src/transaction/types.rs +0 -460
  210. package/dist-engine-src/src/transaction/validation.rs +0 -5830
  211. package/dist-engine-src/src/untracked_state/codec.rs +0 -307
  212. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  213. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  214. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  215. package/dist-engine-src/src/untracked_state/storage.rs +0 -396
  216. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  217. package/dist-engine-src/src/version/context.rs +0 -40
  218. package/dist-engine-src/src/version/lifecycle.rs +0 -221
  219. package/dist-engine-src/src/version/mod.rs +0 -13
  220. package/dist-engine-src/src/version/refs.rs +0 -330
  221. package/dist-engine-src/src/version/stage_rows.rs +0 -67
  222. package/dist-engine-src/src/version/types.rs +0 -21
  223. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,2453 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{
7
- ArrayRef, BooleanArray, RecordBatchOptions, StringArray, UInt64Array,
8
- };
9
- use datafusion::arrow::compute::{and, filter_record_batch};
10
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
- use datafusion::arrow::record_batch::RecordBatch;
12
- use datafusion::catalog::{Session, TableProvider};
13
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue};
14
- use datafusion::datasource::TableType;
15
- use datafusion::execution::TaskContext;
16
- use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
18
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
21
- use datafusion::physical_plan::{
22
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
23
- };
24
- use datafusion::prelude::SessionContext;
25
- use futures_util::{stream, TryStreamExt};
26
- use serde::Deserialize;
27
-
28
- use crate::functions::FunctionProviderHandle;
29
- use crate::live_state::MaterializedLiveStateRow;
30
- use crate::live_state::{
31
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
32
- };
33
- use crate::sql2::dml::{InsertExec, InsertSink};
34
- use crate::sql2::filesystem_predicates::{
35
- canonicalize_filesystem_path_filters, FilesystemPathKind,
36
- };
37
- use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
38
- use crate::sql2::version_scope::{
39
- explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
40
- resolve_write_version_scope, VersionBinding,
41
- };
42
- use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
43
- use crate::transaction::types::{
44
- LogicalPrimaryKey, TransactionJson, TransactionWriteOperation, TransactionWriteOrigin,
45
- TransactionWriteRow,
46
- };
47
- use crate::version::VersionRefReader;
48
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
49
-
50
- use super::filesystem_planner::{
51
- directory_descriptor_write_row, directory_path_resolvers_from_state_rows,
52
- filesystem_storage_scope_key, plan_recursive_directory_delete, DirectoryDescriptorWriteIntent,
53
- DirectoryPathResolver, FilesystemDeletePlan, FilesystemRowContext,
54
- };
55
- use super::filesystem_visibility::VisibleFilesystem;
56
- use super::result_metadata::json_field;
57
- use crate::sql2::{
58
- SqlWriteContext, WriteAccess, WriteContextLiveStateReader, WriteContextVersionRefReader,
59
- };
60
- use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
61
-
62
- const DIRECTORY_SCHEMA_KEY: &str = "lix_directory_descriptor";
63
- const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
64
-
65
- pub(crate) async fn register_lix_directory_providers(
66
- session: &SessionContext,
67
- active_version_id: &str,
68
- live_state: Arc<dyn LiveStateReader>,
69
- version_ref: Arc<dyn VersionRefReader>,
70
- functions: FunctionProviderHandle,
71
- ) -> Result<(), LixError> {
72
- session
73
- .register_table(
74
- "lix_directory_by_version",
75
- Arc::new(LixDirectoryProvider::by_version(
76
- Arc::clone(&live_state),
77
- Arc::clone(&version_ref),
78
- functions.clone(),
79
- )),
80
- )
81
- .map_err(datafusion_error_to_lix_error)?;
82
- session
83
- .register_table(
84
- "lix_directory",
85
- Arc::new(LixDirectoryProvider::active_version(
86
- active_version_id,
87
- live_state,
88
- version_ref,
89
- functions,
90
- )),
91
- )
92
- .map_err(datafusion_error_to_lix_error)?;
93
- Ok(())
94
- }
95
-
96
- pub(crate) async fn register_lix_directory_write_providers(
97
- session: &SessionContext,
98
- write_ctx: SqlWriteContext,
99
- ) -> Result<(), LixError> {
100
- session
101
- .register_table(
102
- "lix_directory_by_version",
103
- Arc::new(LixDirectoryProvider::by_version_with_write(
104
- write_ctx.clone(),
105
- )),
106
- )
107
- .map_err(datafusion_error_to_lix_error)?;
108
- session
109
- .register_table(
110
- "lix_directory",
111
- Arc::new(LixDirectoryProvider::active_version_with_write(write_ctx)),
112
- )
113
- .map_err(datafusion_error_to_lix_error)?;
114
- Ok(())
115
- }
116
-
117
- pub(crate) struct LixDirectoryProvider {
118
- schema: SchemaRef,
119
- live_state: Arc<dyn LiveStateReader>,
120
- version_ref: Arc<dyn VersionRefReader>,
121
- write_access: WriteAccess,
122
- functions: FunctionProviderHandle,
123
- version_binding: VersionBinding,
124
- }
125
-
126
- impl std::fmt::Debug for LixDirectoryProvider {
127
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128
- f.debug_struct("LixDirectoryProvider").finish()
129
- }
130
- }
131
-
132
- impl LixDirectoryProvider {
133
- fn active_version(
134
- active_version_id: impl Into<String>,
135
- live_state: Arc<dyn LiveStateReader>,
136
- version_ref: Arc<dyn VersionRefReader>,
137
- functions: FunctionProviderHandle,
138
- ) -> Self {
139
- Self {
140
- schema: lix_directory_schema(),
141
- live_state,
142
- version_ref,
143
- write_access: WriteAccess::read_only(),
144
- functions,
145
- version_binding: VersionBinding::active(active_version_id),
146
- }
147
- }
148
-
149
- fn active_version_with_write(write_ctx: SqlWriteContext) -> Self {
150
- let active_version_id = write_ctx.active_version_id();
151
- let functions = write_ctx.functions();
152
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
153
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
154
- Self {
155
- schema: lix_directory_schema(),
156
- live_state,
157
- version_ref,
158
- write_access: WriteAccess::write(write_ctx),
159
- functions,
160
- version_binding: VersionBinding::active(active_version_id),
161
- }
162
- }
163
-
164
- fn by_version(
165
- live_state: Arc<dyn LiveStateReader>,
166
- version_ref: Arc<dyn VersionRefReader>,
167
- functions: FunctionProviderHandle,
168
- ) -> Self {
169
- Self {
170
- schema: lix_directory_by_version_schema(),
171
- live_state,
172
- version_ref,
173
- write_access: WriteAccess::read_only(),
174
- functions,
175
- version_binding: VersionBinding::explicit(),
176
- }
177
- }
178
-
179
- fn by_version_with_write(write_ctx: SqlWriteContext) -> Self {
180
- let functions = write_ctx.functions();
181
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
182
- let version_ref = Arc::new(WriteContextVersionRefReader::new(write_ctx.clone()));
183
- Self {
184
- schema: lix_directory_by_version_schema(),
185
- live_state,
186
- version_ref,
187
- write_access: WriteAccess::write(write_ctx),
188
- functions,
189
- version_binding: VersionBinding::explicit(),
190
- }
191
- }
192
- }
193
-
194
- #[async_trait]
195
- impl TableProvider for LixDirectoryProvider {
196
- fn as_any(&self) -> &dyn Any {
197
- self
198
- }
199
-
200
- fn schema(&self) -> SchemaRef {
201
- Arc::clone(&self.schema)
202
- }
203
-
204
- fn table_type(&self) -> TableType {
205
- TableType::Base
206
- }
207
-
208
- fn supports_filters_pushdown(
209
- &self,
210
- filters: &[&Expr],
211
- ) -> Result<Vec<TableProviderFilterPushDown>> {
212
- Ok(filters
213
- .iter()
214
- .map(|_| TableProviderFilterPushDown::Exact)
215
- .collect())
216
- }
217
-
218
- async fn scan(
219
- &self,
220
- _state: &dyn Session,
221
- projection: Option<&Vec<usize>>,
222
- filters: &[Expr],
223
- limit: Option<usize>,
224
- ) -> Result<Arc<dyn ExecutionPlan>> {
225
- let projected_schema = projected_schema(&self.schema, projection)?;
226
- let scan_limit = if filters.is_empty() { limit } else { None };
227
- let mut request = lix_directory_scan_request(
228
- self.version_binding.active_version_id(),
229
- Some(projected_schema.as_ref()),
230
- scan_limit,
231
- );
232
- if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
233
- {
234
- request.filter.version_ids = explicit_version_ids_from_dml_filters(filters);
235
- if request.filter.version_ids.is_empty() {
236
- return Err(DataFusionError::Plan(
237
- "DELETE FROM lix_directory_by_version requires an explicit lixcol_version_id predicate"
238
- .to_string(),
239
- ));
240
- }
241
- }
242
- request.filter.version_ids = resolve_provider_version_ids(
243
- self.version_ref.as_ref(),
244
- &self.version_binding,
245
- request.filter.version_ids,
246
- )
247
- .await
248
- .map_err(lix_error_to_datafusion_error)?;
249
- let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::Directory)?;
250
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
251
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
252
- let physical_filters = filters
253
- .iter()
254
- .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
255
- .collect::<Result<Vec<_>>>()?;
256
- Ok(Arc::new(LixDirectoryScanExec::new(
257
- Arc::clone(&self.live_state),
258
- Arc::clone(&self.schema),
259
- projected_schema,
260
- projection.cloned(),
261
- request,
262
- physical_filters,
263
- limit,
264
- )))
265
- }
266
-
267
- async fn insert_into(
268
- &self,
269
- _state: &dyn Session,
270
- input: Arc<dyn ExecutionPlan>,
271
- insert_op: InsertOp,
272
- ) -> Result<Arc<dyn ExecutionPlan>> {
273
- if insert_op != InsertOp::Append {
274
- return not_impl_err!("{insert_op} not implemented for lix_directory yet");
275
- }
276
-
277
- let write_ctx = self
278
- .write_access
279
- .require_write("INSERT into lix_directory")?;
280
-
281
- let sink = LixDirectoryInsertSink::new(
282
- input.schema(),
283
- write_ctx.clone(),
284
- self.functions.clone(),
285
- self.version_binding.clone(),
286
- );
287
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
288
- }
289
-
290
- async fn delete_from(
291
- &self,
292
- state: &dyn Session,
293
- filters: Vec<Expr>,
294
- ) -> Result<Arc<dyn ExecutionPlan>> {
295
- let write_ctx = self
296
- .write_access
297
- .require_write("DELETE FROM lix_directory")?;
298
-
299
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
300
- let filters =
301
- canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
302
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
303
- let physical_filters = filters
304
- .iter()
305
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
306
- .collect::<Result<Vec<_>>>()?;
307
- let mut request =
308
- lix_directory_scan_request(self.version_binding.active_version_id(), None, None);
309
- if matches!(self.version_binding, VersionBinding::Explicit) {
310
- request.filter.version_ids = explicit_version_ids_from_dml_filters(&filters);
311
- if request.filter.version_ids.is_empty() {
312
- return Err(DataFusionError::Plan(
313
- "DELETE FROM lix_directory_by_version requires an explicit lixcol_version_id predicate"
314
- .to_string(),
315
- ));
316
- }
317
- }
318
-
319
- Ok(Arc::new(LixDirectoryDeleteExec::new(
320
- write_ctx.clone(),
321
- Arc::clone(&self.schema),
322
- self.version_binding.clone(),
323
- request,
324
- physical_filters,
325
- )))
326
- }
327
-
328
- async fn update(
329
- &self,
330
- state: &dyn Session,
331
- assignments: Vec<(String, Expr)>,
332
- filters: Vec<Expr>,
333
- ) -> Result<Arc<dyn ExecutionPlan>> {
334
- let write_ctx = self.write_access.require_write("UPDATE lix_directory")?;
335
-
336
- validate_lix_directory_update_assignments(&self.schema, &assignments)?;
337
-
338
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
339
- let physical_assignments = assignments
340
- .iter()
341
- .map(|(column_name, expr)| {
342
- Ok((
343
- column_name.clone(),
344
- create_physical_expr(expr, &df_schema, state.execution_props())?,
345
- ))
346
- })
347
- .collect::<Result<Vec<_>>>()?;
348
- let filters =
349
- canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
350
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
351
- let physical_filters = filters
352
- .iter()
353
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
354
- .collect::<Result<Vec<_>>>()?;
355
- let request =
356
- lix_directory_scan_request(self.version_binding.active_version_id(), None, None);
357
-
358
- Ok(Arc::new(LixDirectoryUpdateExec::new(
359
- write_ctx.clone(),
360
- Arc::clone(&self.schema),
361
- self.version_binding.clone(),
362
- request,
363
- physical_assignments,
364
- physical_filters,
365
- )))
366
- }
367
- }
368
-
369
- struct LixDirectoryInsertSink {
370
- write_ctx: SqlWriteContext,
371
- functions: FunctionProviderHandle,
372
- version_binding: VersionBinding,
373
- surface_name: &'static str,
374
- }
375
-
376
- impl std::fmt::Debug for LixDirectoryInsertSink {
377
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
378
- f.debug_struct("LixDirectoryInsertSink").finish()
379
- }
380
- }
381
-
382
- impl LixDirectoryInsertSink {
383
- fn new(
384
- _schema: SchemaRef,
385
- write_ctx: SqlWriteContext,
386
- functions: FunctionProviderHandle,
387
- version_binding: VersionBinding,
388
- ) -> Self {
389
- let surface_name = lix_directory_surface_name(&version_binding);
390
- Self {
391
- write_ctx,
392
- functions,
393
- version_binding,
394
- surface_name,
395
- }
396
- }
397
- }
398
-
399
- impl DisplayAs for LixDirectoryInsertSink {
400
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401
- match t {
402
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
403
- write!(f, "LixDirectoryInsertSink")
404
- }
405
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryInsertSink"),
406
- }
407
- }
408
- }
409
-
410
- #[async_trait]
411
- impl InsertSink for LixDirectoryInsertSink {
412
- async fn write_batches(
413
- &self,
414
- batches: Vec<RecordBatch>,
415
- _context: &Arc<TaskContext>,
416
- ) -> Result<u64> {
417
- let mut path_resolvers = None;
418
- let mut rows = Vec::new();
419
- let mut count = 0_u64;
420
- for batch in batches {
421
- if path_resolvers.is_none() {
422
- path_resolvers = Some(
423
- directory_path_resolvers_from_live_state(
424
- Arc::new(WriteContextLiveStateReader::new(self.write_ctx.clone())),
425
- self.version_binding.active_version_id(),
426
- )
427
- .await
428
- .map_err(lix_error_to_datafusion_error)?,
429
- );
430
- }
431
- count = count
432
- .checked_add(u64::try_from(batch.num_rows()).map_err(|_| {
433
- DataFusionError::Execution("lix_directory INSERT row count overflow".into())
434
- })?)
435
- .ok_or_else(|| {
436
- DataFusionError::Execution("lix_directory INSERT row count overflow".into())
437
- })?;
438
- if record_batch_has_non_null_column(&batch, "path")? {
439
- rows.extend(lix_directory_write_rows_from_batch_with_path_resolvers(
440
- &batch,
441
- self.version_binding.active_version_id(),
442
- self.surface_name,
443
- path_resolvers
444
- .as_mut()
445
- .expect("path resolver should be initialized"),
446
- &mut || self.functions.call_uuid_v7(),
447
- )?);
448
- } else {
449
- rows.extend(
450
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
451
- &batch,
452
- self.version_binding.active_version_id(),
453
- self.surface_name,
454
- true,
455
- path_resolvers.as_mut(),
456
- None,
457
- )?,
458
- );
459
- }
460
- }
461
-
462
- self.write_ctx
463
- .stage_write(TransactionWrite::Rows {
464
- mode: TransactionWriteMode::Insert,
465
- rows,
466
- })
467
- .await
468
- .map_err(lix_error_to_datafusion_error)?;
469
-
470
- Ok(count)
471
- }
472
- }
473
-
474
- fn lix_directory_surface_name(version_binding: &VersionBinding) -> &'static str {
475
- match version_binding {
476
- VersionBinding::Active { .. } => "lix_directory",
477
- VersionBinding::Explicit => "lix_directory_by_version",
478
- }
479
- }
480
-
481
- #[allow(dead_code)]
482
- struct LixDirectoryDeleteExec {
483
- write_ctx: SqlWriteContext,
484
- table_schema: SchemaRef,
485
- version_binding: VersionBinding,
486
- request: LiveStateScanRequest,
487
- filters: Vec<Arc<dyn PhysicalExpr>>,
488
- result_schema: SchemaRef,
489
- properties: Arc<PlanProperties>,
490
- }
491
-
492
- impl std::fmt::Debug for LixDirectoryDeleteExec {
493
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
494
- f.debug_struct("LixDirectoryDeleteExec").finish()
495
- }
496
- }
497
-
498
- impl LixDirectoryDeleteExec {
499
- fn new(
500
- write_ctx: SqlWriteContext,
501
- table_schema: SchemaRef,
502
- version_binding: VersionBinding,
503
- request: LiveStateScanRequest,
504
- filters: Vec<Arc<dyn PhysicalExpr>>,
505
- ) -> Self {
506
- let result_schema = dml_count_schema();
507
- let properties = PlanProperties::new(
508
- EquivalenceProperties::new(Arc::clone(&result_schema)),
509
- Partitioning::UnknownPartitioning(1),
510
- EmissionType::Final,
511
- Boundedness::Bounded,
512
- );
513
- Self {
514
- write_ctx,
515
- table_schema,
516
- version_binding,
517
- request,
518
- filters,
519
- result_schema,
520
- properties: Arc::new(properties),
521
- }
522
- }
523
- }
524
-
525
- impl DisplayAs for LixDirectoryDeleteExec {
526
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
527
- match t {
528
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
529
- write!(f, "LixDirectoryDeleteExec(filters={})", self.filters.len())
530
- }
531
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryDeleteExec"),
532
- }
533
- }
534
- }
535
-
536
- impl ExecutionPlan for LixDirectoryDeleteExec {
537
- fn name(&self) -> &str {
538
- "LixDirectoryDeleteExec"
539
- }
540
-
541
- fn as_any(&self) -> &dyn Any {
542
- self
543
- }
544
-
545
- fn properties(&self) -> &Arc<PlanProperties> {
546
- &self.properties
547
- }
548
-
549
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
550
- Vec::new()
551
- }
552
-
553
- fn with_new_children(
554
- self: Arc<Self>,
555
- children: Vec<Arc<dyn ExecutionPlan>>,
556
- ) -> Result<Arc<dyn ExecutionPlan>> {
557
- if !children.is_empty() {
558
- return Err(DataFusionError::Execution(
559
- "LixDirectoryDeleteExec does not accept children".to_string(),
560
- ));
561
- }
562
- Ok(self)
563
- }
564
-
565
- fn execute(
566
- &self,
567
- partition: usize,
568
- _context: Arc<TaskContext>,
569
- ) -> Result<SendableRecordBatchStream> {
570
- if partition != 0 {
571
- return Err(DataFusionError::Execution(format!(
572
- "LixDirectoryDeleteExec only exposes one partition, got {partition}"
573
- )));
574
- }
575
- let write_ctx = self.write_ctx.clone();
576
- let table_schema = Arc::clone(&self.table_schema);
577
- let version_binding = self.version_binding.clone();
578
- let request = self.request.clone();
579
- let filters = self.filters.clone();
580
- let result_schema = Arc::clone(&self.result_schema);
581
- let stream_schema = Arc::clone(&result_schema);
582
-
583
- let stream = stream::once(async move {
584
- let rows = write_ctx
585
- .scan_live_state(&request)
586
- .await
587
- .map_err(lix_error_to_datafusion_error)?;
588
- let source_batch = lix_directory_record_batch(&table_schema, rows)
589
- .map_err(lix_error_to_datafusion_error)?;
590
- let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
591
- let version_ids = directory_version_ids_from_batch(
592
- &matched_batch,
593
- version_binding.active_version_id(),
594
- )?;
595
- let mut visible_filesystems = BTreeMap::new();
596
- for version_id in version_ids {
597
- visible_filesystems.insert(
598
- version_id.clone(),
599
- VisibleFilesystem::load(
600
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
601
- &version_id,
602
- )
603
- .await
604
- .map_err(lix_error_to_datafusion_error)?,
605
- );
606
- }
607
- let (write_rows, count) = lix_directory_recursive_delete_rows_from_batch(
608
- &matched_batch,
609
- version_binding.active_version_id(),
610
- &visible_filesystems,
611
- )?;
612
-
613
- if count > 0 {
614
- write_ctx
615
- .stage_write(TransactionWrite::Rows {
616
- mode: TransactionWriteMode::Replace,
617
- rows: write_rows,
618
- })
619
- .await
620
- .map_err(lix_error_to_datafusion_error)?;
621
- }
622
-
623
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
624
- dml_count_batch(Arc::clone(&stream_schema), count)?,
625
- )]))
626
- })
627
- .try_flatten();
628
-
629
- Ok(Box::pin(RecordBatchStreamAdapter::new(
630
- result_schema,
631
- stream,
632
- )))
633
- }
634
- }
635
-
636
- #[allow(dead_code)]
637
- struct LixDirectoryUpdateExec {
638
- write_ctx: SqlWriteContext,
639
- table_schema: SchemaRef,
640
- version_binding: VersionBinding,
641
- request: LiveStateScanRequest,
642
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
643
- filters: Vec<Arc<dyn PhysicalExpr>>,
644
- result_schema: SchemaRef,
645
- properties: Arc<PlanProperties>,
646
- }
647
-
648
- impl std::fmt::Debug for LixDirectoryUpdateExec {
649
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
650
- f.debug_struct("LixDirectoryUpdateExec").finish()
651
- }
652
- }
653
-
654
- impl LixDirectoryUpdateExec {
655
- fn new(
656
- write_ctx: SqlWriteContext,
657
- table_schema: SchemaRef,
658
- version_binding: VersionBinding,
659
- request: LiveStateScanRequest,
660
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
661
- filters: Vec<Arc<dyn PhysicalExpr>>,
662
- ) -> Self {
663
- let result_schema = dml_count_schema();
664
- let properties = PlanProperties::new(
665
- EquivalenceProperties::new(Arc::clone(&result_schema)),
666
- Partitioning::UnknownPartitioning(1),
667
- EmissionType::Final,
668
- Boundedness::Bounded,
669
- );
670
- Self {
671
- write_ctx,
672
- table_schema,
673
- version_binding,
674
- request,
675
- assignments,
676
- filters,
677
- result_schema,
678
- properties: Arc::new(properties),
679
- }
680
- }
681
- }
682
-
683
- impl DisplayAs for LixDirectoryUpdateExec {
684
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
685
- match t {
686
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
687
- write!(
688
- f,
689
- "LixDirectoryUpdateExec(assignments={}, filters={})",
690
- self.assignments.len(),
691
- self.filters.len()
692
- )
693
- }
694
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryUpdateExec"),
695
- }
696
- }
697
- }
698
-
699
- impl ExecutionPlan for LixDirectoryUpdateExec {
700
- fn name(&self) -> &str {
701
- "LixDirectoryUpdateExec"
702
- }
703
-
704
- fn as_any(&self) -> &dyn Any {
705
- self
706
- }
707
-
708
- fn properties(&self) -> &Arc<PlanProperties> {
709
- &self.properties
710
- }
711
-
712
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
713
- Vec::new()
714
- }
715
-
716
- fn with_new_children(
717
- self: Arc<Self>,
718
- children: Vec<Arc<dyn ExecutionPlan>>,
719
- ) -> Result<Arc<dyn ExecutionPlan>> {
720
- if !children.is_empty() {
721
- return Err(DataFusionError::Execution(
722
- "LixDirectoryUpdateExec does not accept children".to_string(),
723
- ));
724
- }
725
- Ok(self)
726
- }
727
-
728
- fn execute(
729
- &self,
730
- partition: usize,
731
- _context: Arc<TaskContext>,
732
- ) -> Result<SendableRecordBatchStream> {
733
- if partition != 0 {
734
- return Err(DataFusionError::Execution(format!(
735
- "LixDirectoryUpdateExec only exposes one partition, got {partition}"
736
- )));
737
- }
738
- let write_ctx = self.write_ctx.clone();
739
- let table_schema = Arc::clone(&self.table_schema);
740
- let version_binding = self.version_binding.clone();
741
- let request = self.request.clone();
742
- let assignments = self.assignments.clone();
743
- let filters = self.filters.clone();
744
- let result_schema = Arc::clone(&self.result_schema);
745
- let stream_schema = Arc::clone(&result_schema);
746
-
747
- let stream = stream::once(async move {
748
- let rows = write_ctx
749
- .scan_live_state(&request)
750
- .await
751
- .map_err(lix_error_to_datafusion_error)?;
752
- let source_batch = lix_directory_record_batch(&table_schema, rows)
753
- .map_err(lix_error_to_datafusion_error)?;
754
- let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
755
- let mut path_resolvers = directory_path_resolvers_from_live_state(
756
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
757
- version_binding.active_version_id(),
758
- )
759
- .await
760
- .map_err(lix_error_to_datafusion_error)?;
761
- let write_rows = lix_directory_update_write_rows_from_batch(
762
- &matched_batch,
763
- &assignments,
764
- version_binding.active_version_id(),
765
- &mut path_resolvers,
766
- )?;
767
- let count = u64::try_from(write_rows.len()).map_err(|_| {
768
- DataFusionError::Execution("lix_directory UPDATE row count overflow".into())
769
- })?;
770
-
771
- if count > 0 {
772
- write_ctx
773
- .stage_write(TransactionWrite::Rows {
774
- mode: TransactionWriteMode::Replace,
775
- rows: write_rows,
776
- })
777
- .await
778
- .map_err(lix_error_to_datafusion_error)?;
779
- }
780
-
781
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
782
- dml_count_batch(Arc::clone(&stream_schema), count)?,
783
- )]))
784
- })
785
- .try_flatten();
786
-
787
- Ok(Box::pin(RecordBatchStreamAdapter::new(
788
- result_schema,
789
- stream,
790
- )))
791
- }
792
- }
793
-
794
- struct LixDirectoryScanExec {
795
- live_state: Arc<dyn LiveStateReader>,
796
- batch_schema: SchemaRef,
797
- output_schema: SchemaRef,
798
- projection: Option<Vec<usize>>,
799
- request: LiveStateScanRequest,
800
- filters: Vec<Arc<dyn PhysicalExpr>>,
801
- limit: Option<usize>,
802
- properties: Arc<PlanProperties>,
803
- }
804
-
805
- impl std::fmt::Debug for LixDirectoryScanExec {
806
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
807
- f.debug_struct("LixDirectoryScanExec").finish()
808
- }
809
- }
810
-
811
- impl LixDirectoryScanExec {
812
- fn new(
813
- live_state: Arc<dyn LiveStateReader>,
814
- batch_schema: SchemaRef,
815
- output_schema: SchemaRef,
816
- projection: Option<Vec<usize>>,
817
- request: LiveStateScanRequest,
818
- filters: Vec<Arc<dyn PhysicalExpr>>,
819
- limit: Option<usize>,
820
- ) -> Self {
821
- let properties = PlanProperties::new(
822
- EquivalenceProperties::new(Arc::clone(&output_schema)),
823
- Partitioning::UnknownPartitioning(1),
824
- EmissionType::Incremental,
825
- Boundedness::Bounded,
826
- );
827
- Self {
828
- live_state,
829
- batch_schema,
830
- output_schema,
831
- projection,
832
- request,
833
- filters,
834
- limit,
835
- properties: Arc::new(properties),
836
- }
837
- }
838
- }
839
-
840
- impl DisplayAs for LixDirectoryScanExec {
841
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
842
- match t {
843
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
844
- write!(f, "LixDirectoryScanExec(limit={:?})", self.limit)
845
- }
846
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryScanExec"),
847
- }
848
- }
849
- }
850
-
851
- impl ExecutionPlan for LixDirectoryScanExec {
852
- fn name(&self) -> &str {
853
- "LixDirectoryScanExec"
854
- }
855
-
856
- fn as_any(&self) -> &dyn Any {
857
- self
858
- }
859
-
860
- fn properties(&self) -> &Arc<PlanProperties> {
861
- &self.properties
862
- }
863
-
864
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
865
- Vec::new()
866
- }
867
-
868
- fn with_new_children(
869
- self: Arc<Self>,
870
- children: Vec<Arc<dyn ExecutionPlan>>,
871
- ) -> Result<Arc<dyn ExecutionPlan>> {
872
- if !children.is_empty() {
873
- return Err(DataFusionError::Execution(
874
- "LixDirectoryScanExec does not accept children".to_string(),
875
- ));
876
- }
877
- Ok(self)
878
- }
879
-
880
- fn execute(
881
- &self,
882
- partition: usize,
883
- _context: Arc<TaskContext>,
884
- ) -> Result<SendableRecordBatchStream> {
885
- if partition != 0 {
886
- return Err(DataFusionError::Execution(format!(
887
- "LixDirectoryScanExec only supports partition 0, got {partition}"
888
- )));
889
- }
890
-
891
- let live_state = Arc::clone(&self.live_state);
892
- let request = self.request.clone();
893
- let filters = self.filters.clone();
894
- let limit = self.limit;
895
- let output_schema = Arc::clone(&self.output_schema);
896
- let batch_schema = Arc::clone(&self.batch_schema);
897
- let projection = self.projection.clone();
898
- let fut = async move {
899
- let rows = live_state.scan_rows(&request).await.map_err(|error| {
900
- DataFusionError::Execution(format!("sql2 lix_directory scan failed: {error}"))
901
- })?;
902
- let batch = lix_directory_record_batch(&batch_schema, rows).map_err(|error| {
903
- DataFusionError::Execution(format!(
904
- "sql2 lix_directory batch build failed: {error}"
905
- ))
906
- })?;
907
- let filtered = filter_lix_directory_batch(batch, &filters)?;
908
- let projected = match projection {
909
- Some(indices) => filtered.project(&indices).map_err(DataFusionError::from),
910
- None => Ok(filtered),
911
- }?;
912
- match limit {
913
- Some(limit) => Ok(projected.slice(0, limit.min(projected.num_rows()))),
914
- None => Ok(projected),
915
- }
916
- };
917
-
918
- Ok(Box::pin(RecordBatchStreamAdapter::new(
919
- output_schema,
920
- stream::once(fut).map_ok(|batch| batch),
921
- )))
922
- }
923
- }
924
-
925
- #[derive(Debug, Clone)]
926
- struct DirectoryDescriptorRecord {
927
- id: String,
928
- parent_id: Option<String>,
929
- name: String,
930
- hidden: bool,
931
- live: MaterializedLiveStateRow,
932
- }
933
-
934
- #[derive(Debug, Deserialize)]
935
- struct DirectoryDescriptorSnapshot {
936
- id: String,
937
- parent_id: Option<String>,
938
- name: String,
939
- hidden: Option<bool>,
940
- }
941
-
942
- #[cfg(test)]
943
- fn lix_directory_write_rows_from_batch(
944
- batch: &RecordBatch,
945
- version_binding: Option<&str>,
946
- ) -> Result<Vec<TransactionWriteRow>> {
947
- lix_directory_write_rows_from_batch_with_options(batch, version_binding, "lix_directory", true)
948
- }
949
-
950
- fn lix_directory_write_rows_from_batch_with_path_resolvers(
951
- batch: &RecordBatch,
952
- version_binding: Option<&str>,
953
- surface_name: &str,
954
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
955
- generate_directory_id: &mut dyn FnMut() -> String,
956
- ) -> Result<Vec<TransactionWriteRow>> {
957
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
958
- batch,
959
- version_binding,
960
- surface_name,
961
- true,
962
- Some(path_resolvers),
963
- Some(generate_directory_id),
964
- )
965
- }
966
-
967
- fn lix_directory_update_write_rows_from_batch(
968
- batch: &RecordBatch,
969
- assignments: &[(String, Arc<dyn PhysicalExpr>)],
970
- version_binding: Option<&str>,
971
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
972
- ) -> Result<Vec<TransactionWriteRow>> {
973
- let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
974
- let mut rows = Vec::new();
975
- for row_index in 0..batch.num_rows() {
976
- let id = optional_string_value(batch, row_index, "id")?;
977
- let context = directory_row_context_from_update(
978
- batch,
979
- &assignment_values,
980
- row_index,
981
- version_binding,
982
- )?;
983
- let parent_id =
984
- update_optional_string_value(batch, &assignment_values, row_index, "parent_id")?;
985
- let name = update_required_string_value(batch, &assignment_values, row_index, "name")?;
986
- if let Some(directory_id) = id.as_ref() {
987
- let resolver = path_resolvers
988
- .entry(directory_path_resolver_key(&context))
989
- .or_insert_with(DirectoryPathResolver::default);
990
- resolver
991
- .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
992
- .map_err(lix_error_to_datafusion_error)?;
993
- }
994
- rows.push(directory_descriptor_write_row(
995
- DirectoryDescriptorWriteIntent {
996
- id,
997
- parent_id,
998
- name,
999
- hidden: update_optional_bool_value(batch, &assignment_values, row_index, "hidden")?,
1000
- context,
1001
- },
1002
- ));
1003
- }
1004
- Ok(rows)
1005
- }
1006
-
1007
- fn directory_version_ids_from_batch(
1008
- batch: &RecordBatch,
1009
- version_binding: Option<&str>,
1010
- ) -> Result<BTreeSet<String>> {
1011
- let mut version_ids = BTreeSet::new();
1012
- for row_index in 0..batch.num_rows() {
1013
- version_ids.insert(
1014
- directory_row_context_from_batch(batch, row_index, version_binding)?.version_id,
1015
- );
1016
- }
1017
- Ok(version_ids)
1018
- }
1019
-
1020
- fn lix_directory_recursive_delete_rows_from_batch(
1021
- batch: &RecordBatch,
1022
- version_binding: Option<&str>,
1023
- visible_filesystems: &BTreeMap<String, VisibleFilesystem>,
1024
- ) -> Result<(Vec<TransactionWriteRow>, u64)> {
1025
- let mut rows = Vec::new();
1026
- let mut seen = BTreeSet::new();
1027
- let mut count = 0u64;
1028
- for row_index in 0..batch.num_rows() {
1029
- let directory_id = required_string_value(batch, row_index, "id")?;
1030
- let context = directory_row_context_from_batch(batch, row_index, version_binding)?;
1031
- let visible_filesystem = visible_filesystems
1032
- .get(&context.version_id)
1033
- .ok_or_else(|| {
1034
- DataFusionError::Execution(format!(
1035
- "DELETE FROM lix_directory missing visible filesystem for version '{}'",
1036
- context.version_id
1037
- ))
1038
- })?;
1039
- append_deduped_delete_plan(
1040
- &mut rows,
1041
- &mut seen,
1042
- plan_recursive_directory_delete(&directory_id, visible_filesystem, context),
1043
- &mut count,
1044
- );
1045
- }
1046
- Ok((rows, count))
1047
- }
1048
-
1049
- fn append_deduped_delete_plan(
1050
- rows: &mut Vec<TransactionWriteRow>,
1051
- seen: &mut BTreeSet<StateRowDedupeKey>,
1052
- plan: FilesystemDeletePlan,
1053
- count: &mut u64,
1054
- ) {
1055
- for row in plan.rows {
1056
- if seen.insert(StateRowDedupeKey::from(&row)) {
1057
- if is_user_visible_filesystem_delete_row(&row) {
1058
- *count += 1;
1059
- }
1060
- rows.push(row);
1061
- }
1062
- }
1063
- }
1064
-
1065
- fn is_user_visible_filesystem_delete_row(row: &TransactionWriteRow) -> bool {
1066
- matches!(
1067
- row.schema_key.as_str(),
1068
- "lix_directory_descriptor" | "lix_file_descriptor"
1069
- )
1070
- }
1071
-
1072
- #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1073
- struct StateRowDedupeKey {
1074
- entity_id: String,
1075
- schema_key: String,
1076
- file_id: Option<String>,
1077
- version_id: String,
1078
- global: bool,
1079
- untracked: bool,
1080
- }
1081
-
1082
- impl From<&TransactionWriteRow> for StateRowDedupeKey {
1083
- fn from(row: &TransactionWriteRow) -> Self {
1084
- Self {
1085
- entity_id: row
1086
- .entity_id
1087
- .as_ref()
1088
- .expect("directory provider staged row should carry entity_id")
1089
- .as_single_string_owned()
1090
- .expect("directory provider staged row entity identity should project"),
1091
- schema_key: row.schema_key.clone(),
1092
- file_id: row.file_id.clone(),
1093
- version_id: row.version_id.clone(),
1094
- global: row.global,
1095
- untracked: row.untracked,
1096
- }
1097
- }
1098
- }
1099
-
1100
- #[cfg(test)]
1101
- fn lix_directory_write_rows_from_batch_with_options(
1102
- batch: &RecordBatch,
1103
- version_binding: Option<&str>,
1104
- surface_name: &str,
1105
- reject_read_only_fields: bool,
1106
- ) -> Result<Vec<TransactionWriteRow>> {
1107
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1108
- batch,
1109
- version_binding,
1110
- surface_name,
1111
- reject_read_only_fields,
1112
- None,
1113
- None,
1114
- )
1115
- }
1116
-
1117
- fn lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1118
- batch: &RecordBatch,
1119
- version_binding: Option<&str>,
1120
- surface_name: &str,
1121
- reject_read_only_fields: bool,
1122
- mut path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1123
- mut generate_directory_id: Option<&mut dyn FnMut() -> String>,
1124
- ) -> Result<Vec<TransactionWriteRow>> {
1125
- let mut rows = Vec::new();
1126
- for row_index in 0..batch.num_rows() {
1127
- if reject_read_only_fields {
1128
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_entity_id")?;
1129
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_schema_key")?;
1130
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_change_id")?;
1131
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_created_at")?;
1132
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_updated_at")?;
1133
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_commit_id")?;
1134
- }
1135
-
1136
- let path = optional_string_value(batch, row_index, "path")?;
1137
- let id = optional_string_value(batch, row_index, "id")?;
1138
- let hidden = optional_bool_value(batch, row_index, "hidden")?;
1139
- let context = directory_row_context_from_batch(batch, row_index, version_binding)?;
1140
-
1141
- if let Some(path) = path.filter(|_| reject_read_only_fields) {
1142
- reject_read_only_lix_directory_insert_field(batch, row_index, "parent_id")?;
1143
- reject_read_only_lix_directory_insert_field(batch, row_index, "name")?;
1144
-
1145
- let Some(path_resolvers) = path_resolvers.as_deref_mut() else {
1146
- return Err(DataFusionError::Execution(
1147
- "INSERT into lix_directory with path requires directory path resolver"
1148
- .to_string(),
1149
- ));
1150
- };
1151
- let resolver = path_resolvers
1152
- .entry(directory_path_resolver_key(&context))
1153
- .or_insert_with(DirectoryPathResolver::default);
1154
- let Some(generate_directory_id) = generate_directory_id.as_deref_mut() else {
1155
- return Err(DataFusionError::Execution(
1156
- "INSERT into lix_directory with path requires directory id generator"
1157
- .to_string(),
1158
- ));
1159
- };
1160
- let directory_id = id.unwrap_or_else(|| generate_directory_id());
1161
- let mut planned_rows = resolver
1162
- .create_directory_path_with_leaf_id(
1163
- &path,
1164
- Some(directory_id.clone()),
1165
- context,
1166
- hidden.unwrap_or(false),
1167
- generate_directory_id,
1168
- )
1169
- .map_err(lix_error_to_datafusion_error)?;
1170
- attach_lix_directory_insert_origin(&mut planned_rows, surface_name, &directory_id);
1171
- rows.extend(planned_rows);
1172
- continue;
1173
- }
1174
-
1175
- let parent_id = optional_string_value(batch, row_index, "parent_id")?;
1176
- let name = required_string_value(batch, row_index, "name")?;
1177
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1178
- if let Some(directory_id) = id.as_ref() {
1179
- let resolver = path_resolvers
1180
- .entry(directory_path_resolver_key(&context))
1181
- .or_insert_with(DirectoryPathResolver::default);
1182
- resolver
1183
- .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
1184
- .map_err(lix_error_to_datafusion_error)?;
1185
- }
1186
- }
1187
- let mut row = directory_descriptor_write_row(DirectoryDescriptorWriteIntent {
1188
- id: id.clone(),
1189
- parent_id,
1190
- name,
1191
- hidden,
1192
- context,
1193
- });
1194
- if let Some(directory_id) = id.as_ref() {
1195
- row.origin = Some(lix_directory_insert_origin(surface_name, directory_id));
1196
- }
1197
- rows.push(row);
1198
- }
1199
- Ok(rows)
1200
- }
1201
-
1202
- fn attach_lix_directory_insert_origin(
1203
- rows: &mut [TransactionWriteRow],
1204
- surface_name: &str,
1205
- directory_id: &str,
1206
- ) {
1207
- let origin = lix_directory_insert_origin(surface_name, directory_id);
1208
- for row in rows {
1209
- if row.schema_key != DIRECTORY_SCHEMA_KEY {
1210
- continue;
1211
- }
1212
- let Some(entity_id) = row
1213
- .entity_id
1214
- .as_ref()
1215
- .and_then(|entity_id| entity_id.as_single_string_owned().ok())
1216
- else {
1217
- continue;
1218
- };
1219
- if entity_id == directory_id {
1220
- row.origin = Some(origin.clone());
1221
- }
1222
- }
1223
- }
1224
-
1225
- fn lix_directory_insert_origin(surface_name: &str, directory_id: &str) -> TransactionWriteOrigin {
1226
- TransactionWriteOrigin {
1227
- surface: surface_name.to_string(),
1228
- operation: TransactionWriteOperation::Insert,
1229
- primary_key: Some(LogicalPrimaryKey {
1230
- columns: vec!["id".to_string()],
1231
- values: vec![directory_id.to_string()],
1232
- }),
1233
- }
1234
- }
1235
-
1236
- fn directory_row_context_from_batch(
1237
- batch: &RecordBatch,
1238
- row_index: usize,
1239
- version_binding: Option<&str>,
1240
- ) -> Result<FilesystemRowContext> {
1241
- let scope = resolve_write_version_scope(
1242
- optional_bool_value(batch, row_index, "lixcol_global")?,
1243
- optional_string_value(batch, row_index, "lixcol_version_id")?,
1244
- version_binding,
1245
- "INSERT into lix_directory_by_version",
1246
- "lix_directory",
1247
- )?;
1248
-
1249
- Ok(FilesystemRowContext {
1250
- version_id: scope.version_id,
1251
- global: scope.global,
1252
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1253
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1254
- metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", "lix_directory")?,
1255
- })
1256
- }
1257
-
1258
- fn directory_row_context_from_update(
1259
- batch: &RecordBatch,
1260
- assignment_values: &UpdateAssignmentValues,
1261
- row_index: usize,
1262
- version_binding: Option<&str>,
1263
- ) -> Result<FilesystemRowContext> {
1264
- let scope = resolve_write_version_scope(
1265
- optional_bool_value(batch, row_index, "lixcol_global")?,
1266
- optional_string_value(batch, row_index, "lixcol_version_id")?,
1267
- version_binding,
1268
- "UPDATE into lix_directory_by_version",
1269
- "lix_directory",
1270
- )?;
1271
-
1272
- Ok(FilesystemRowContext {
1273
- version_id: scope.version_id,
1274
- global: scope.global,
1275
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1276
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1277
- metadata: update_optional_metadata_value(
1278
- batch,
1279
- assignment_values,
1280
- row_index,
1281
- "lixcol_metadata",
1282
- "lix_directory",
1283
- )?,
1284
- })
1285
- }
1286
-
1287
- fn directory_path_resolver_key(context: &FilesystemRowContext) -> String {
1288
- filesystem_storage_scope_key(
1289
- &context.version_id,
1290
- context.global,
1291
- context.untracked,
1292
- context.file_id.as_deref(),
1293
- )
1294
- }
1295
-
1296
- async fn directory_path_resolvers_from_live_state(
1297
- live_state: Arc<dyn LiveStateReader>,
1298
- version_binding: Option<&str>,
1299
- ) -> std::result::Result<BTreeMap<String, DirectoryPathResolver>, LixError> {
1300
- let rows = live_state
1301
- .scan_rows(&LiveStateScanRequest {
1302
- filter: LiveStateFilter {
1303
- schema_keys: vec![
1304
- DIRECTORY_SCHEMA_KEY.to_string(),
1305
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1306
- ],
1307
- version_ids: version_binding
1308
- .map(|version_id| vec![version_id.to_string()])
1309
- .unwrap_or_default(),
1310
- ..Default::default()
1311
- },
1312
- ..Default::default()
1313
- })
1314
- .await?;
1315
- let mut resolvers = directory_path_resolvers_from_state_rows(rows)?;
1316
- if let Some(version_id) = version_binding {
1317
- let key = filesystem_storage_scope_key(version_id, false, false, None);
1318
- resolvers
1319
- .entry(key)
1320
- .or_insert_with(DirectoryPathResolver::default);
1321
- }
1322
- Ok(resolvers)
1323
- }
1324
-
1325
- fn lix_directory_record_batch(
1326
- schema: &SchemaRef,
1327
- rows: Vec<MaterializedLiveStateRow>,
1328
- ) -> Result<RecordBatch, LixError> {
1329
- let mut directory_rows = Vec::<DirectoryDescriptorRecord>::new();
1330
-
1331
- for row in rows {
1332
- if row.schema_key != DIRECTORY_SCHEMA_KEY {
1333
- continue;
1334
- }
1335
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1336
- continue;
1337
- };
1338
- let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
1339
- .map_err(|error| {
1340
- LixError::new(
1341
- "LIX_ERROR_UNKNOWN",
1342
- format!("invalid lix_directory_descriptor snapshot JSON: {error}"),
1343
- )
1344
- })?;
1345
- directory_rows.push(DirectoryDescriptorRecord {
1346
- id: snapshot.id,
1347
- parent_id: snapshot.parent_id,
1348
- name: snapshot.name,
1349
- hidden: snapshot.hidden.unwrap_or(false),
1350
- live: row,
1351
- });
1352
- }
1353
-
1354
- let directory_paths = derive_directory_paths(&directory_rows)?;
1355
- let mut ids = Vec::new();
1356
- let mut paths = Vec::new();
1357
- let mut parent_ids = Vec::new();
1358
- let mut names = Vec::new();
1359
- let mut hiddens = Vec::new();
1360
- let mut entity_ids = Vec::new();
1361
- let mut schema_keys = Vec::new();
1362
- let mut file_ids = Vec::new();
1363
- let mut globals = Vec::new();
1364
- let mut change_ids = Vec::new();
1365
- let mut created_ats = Vec::new();
1366
- let mut updated_ats = Vec::new();
1367
- let mut commit_ids = Vec::new();
1368
- let mut untracked_values = Vec::new();
1369
- let mut metadata_values = Vec::new();
1370
- let mut version_ids = Vec::new();
1371
-
1372
- for directory in directory_rows {
1373
- ids.push(Some(directory.id.clone()));
1374
- paths.push(
1375
- directory_paths
1376
- .get(&(directory.live.version_id.clone(), directory.id.clone()))
1377
- .cloned(),
1378
- );
1379
- parent_ids.push(directory.parent_id);
1380
- names.push(Some(directory.name));
1381
- hiddens.push(Some(directory.hidden));
1382
- entity_ids.push(Some(directory.live.entity_id.as_json_array_text()?));
1383
- schema_keys.push(Some(directory.live.schema_key));
1384
- file_ids.push(directory.live.file_id);
1385
- globals.push(Some(directory.live.global));
1386
- change_ids.push(directory.live.change_id);
1387
- created_ats.push(directory.live.created_at);
1388
- updated_ats.push(directory.live.updated_at);
1389
- commit_ids.push(directory.live.commit_id);
1390
- untracked_values.push(Some(directory.live.untracked));
1391
- metadata_values.push(directory.live.metadata.as_ref().map(serialize_row_metadata));
1392
- version_ids.push(Some(directory.live.version_id));
1393
- }
1394
-
1395
- let mut columns = Vec::<ArrayRef>::with_capacity(schema.fields().len());
1396
- for field in schema.fields() {
1397
- let array: ArrayRef = match field.name().as_str() {
1398
- "id" => Arc::new(StringArray::from(ids.clone())),
1399
- "path" => Arc::new(StringArray::from(paths.clone())),
1400
- "parent_id" => Arc::new(StringArray::from(parent_ids.clone())),
1401
- "name" => Arc::new(StringArray::from(names.clone())),
1402
- "hidden" => Arc::new(BooleanArray::from(hiddens.clone())),
1403
- "lixcol_entity_id" => Arc::new(StringArray::from(entity_ids.clone())),
1404
- "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1405
- "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1406
- "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1407
- "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1408
- "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
1409
- "lixcol_updated_at" => Arc::new(StringArray::from(updated_ats.clone())),
1410
- "lixcol_commit_id" => Arc::new(StringArray::from(commit_ids.clone())),
1411
- "lixcol_untracked" => Arc::new(BooleanArray::from(untracked_values.clone())),
1412
- "lixcol_metadata" => Arc::new(StringArray::from(metadata_values.clone())),
1413
- "lixcol_version_id" => Arc::new(StringArray::from(version_ids.clone())),
1414
- other => {
1415
- return Err(LixError::new(
1416
- "LIX_ERROR_UNKNOWN",
1417
- format!(
1418
- "sql2 lix_directory provider does not support projected column '{other}'"
1419
- ),
1420
- ))
1421
- }
1422
- };
1423
- columns.push(array);
1424
- }
1425
-
1426
- let options = RecordBatchOptions::new().with_row_count(Some(ids.len()));
1427
- RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
1428
- LixError::new(
1429
- "LIX_ERROR_UNKNOWN",
1430
- format!("sql2 failed to build lix_directory record batch: {error}"),
1431
- )
1432
- })
1433
- }
1434
-
1435
- fn derive_directory_paths(
1436
- rows: &[DirectoryDescriptorRecord],
1437
- ) -> std::result::Result<BTreeMap<(String, String), String>, LixError> {
1438
- let mut by_version = BTreeMap::<String, BTreeMap<String, &DirectoryDescriptorRecord>>::new();
1439
- for row in rows {
1440
- by_version
1441
- .entry(row.live.version_id.clone())
1442
- .or_default()
1443
- .insert(row.id.clone(), row);
1444
- }
1445
-
1446
- let mut paths = BTreeMap::<(String, String), String>::new();
1447
- for (version_id, records) in by_version {
1448
- for directory_id in records.keys() {
1449
- derive_directory_path_for(
1450
- &version_id,
1451
- directory_id,
1452
- &records,
1453
- &mut paths,
1454
- &mut BTreeSet::new(),
1455
- )?;
1456
- }
1457
- }
1458
- Ok(paths)
1459
- }
1460
-
1461
- fn derive_directory_path_for(
1462
- version_id: &str,
1463
- directory_id: &str,
1464
- records: &BTreeMap<String, &DirectoryDescriptorRecord>,
1465
- paths: &mut BTreeMap<(String, String), String>,
1466
- visiting: &mut BTreeSet<String>,
1467
- ) -> std::result::Result<Option<String>, LixError> {
1468
- if let Some(path) = paths.get(&(version_id.to_string(), directory_id.to_string())) {
1469
- return Ok(Some(path.clone()));
1470
- }
1471
- if !visiting.insert(directory_id.to_string()) {
1472
- return Err(directory_parent_cycle_error(version_id, directory_id));
1473
- }
1474
- let Some(row) = records.get(directory_id) else {
1475
- visiting.remove(directory_id);
1476
- return Ok(None);
1477
- };
1478
- let path = match row.parent_id.as_deref() {
1479
- Some(parent_id) => {
1480
- let Some(parent_path) =
1481
- derive_directory_path_for(version_id, parent_id, records, paths, visiting)?
1482
- else {
1483
- visiting.remove(directory_id);
1484
- return Ok(None);
1485
- };
1486
- format!("{parent_path}{}/", row.name)
1487
- }
1488
- None => format!("/{}/", row.name),
1489
- };
1490
- visiting.remove(directory_id);
1491
- paths.insert(
1492
- (version_id.to_string(), directory_id.to_string()),
1493
- path.clone(),
1494
- );
1495
- Ok(Some(path))
1496
- }
1497
-
1498
- fn directory_parent_cycle_error(version_id: &str, directory_id: &str) -> LixError {
1499
- LixError::new(
1500
- LixError::CODE_CONSTRAINT_VIOLATION,
1501
- format!(
1502
- "lix_directory_descriptor parent_id cycle in version '{version_id}' while resolving directory '{directory_id}'"
1503
- ),
1504
- )
1505
- }
1506
-
1507
- fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1508
- let fields = match projection {
1509
- Some(indices) => indices
1510
- .iter()
1511
- .map(|index| base_schema.field(*index).as_ref().clone())
1512
- .collect::<Vec<_>>(),
1513
- None => base_schema
1514
- .fields()
1515
- .iter()
1516
- .map(|field| field.as_ref().clone())
1517
- .collect::<Vec<_>>(),
1518
- };
1519
- Ok(Arc::new(Schema::new(fields)))
1520
- }
1521
-
1522
- fn lix_directory_scan_request(
1523
- version_binding: Option<&str>,
1524
- projected_schema: Option<&Schema>,
1525
- limit: Option<usize>,
1526
- ) -> LiveStateScanRequest {
1527
- LiveStateScanRequest {
1528
- filter: LiveStateFilter {
1529
- schema_keys: vec![DIRECTORY_SCHEMA_KEY.to_string()],
1530
- version_ids: version_binding
1531
- .map(|version_id| vec![version_id.to_string()])
1532
- .unwrap_or_default(),
1533
- ..LiveStateFilter::default()
1534
- },
1535
- projection: lix_directory_live_state_projection(projected_schema),
1536
- limit,
1537
- }
1538
- }
1539
-
1540
- fn lix_directory_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1541
- let Some(schema) = projected_schema else {
1542
- return LiveStateProjection::default();
1543
- };
1544
- let mut columns = Vec::new();
1545
- let needs_snapshot = schema
1546
- .fields()
1547
- .iter()
1548
- .any(|field| matches!(field.name().as_str(), "parent_id" | "name" | "hidden"));
1549
- if needs_snapshot {
1550
- columns.push("snapshot_content".to_string());
1551
- }
1552
- if schema
1553
- .fields()
1554
- .iter()
1555
- .any(|field| field.name() == "lixcol_metadata")
1556
- {
1557
- columns.push("metadata".to_string());
1558
- }
1559
- LiveStateProjection { columns }
1560
- }
1561
-
1562
- fn validate_lix_directory_update_assignments(
1563
- schema: &SchemaRef,
1564
- assignments: &[(String, Expr)],
1565
- ) -> Result<()> {
1566
- for (column_name, _) in assignments {
1567
- schema.field_with_name(column_name).map_err(|_| {
1568
- DataFusionError::Plan(format!(
1569
- "UPDATE lix_directory failed: column '{column_name}' does not exist"
1570
- ))
1571
- })?;
1572
- if !matches!(
1573
- column_name.as_str(),
1574
- "parent_id" | "name" | "hidden" | "lixcol_metadata"
1575
- ) {
1576
- return Err(DataFusionError::Execution(format!(
1577
- "UPDATE lix_directory cannot stage read-only column '{column_name}'"
1578
- )));
1579
- }
1580
- }
1581
- Ok(())
1582
- }
1583
-
1584
- fn filter_lix_directory_batch(
1585
- batch: RecordBatch,
1586
- filters: &[Arc<dyn PhysicalExpr>],
1587
- ) -> Result<RecordBatch> {
1588
- let Some(mask) = evaluate_lix_directory_filters(&batch, filters)? else {
1589
- return Ok(batch);
1590
- };
1591
- Ok(filter_record_batch(&batch, &mask)?)
1592
- }
1593
-
1594
- fn evaluate_lix_directory_filters(
1595
- batch: &RecordBatch,
1596
- filters: &[Arc<dyn PhysicalExpr>],
1597
- ) -> Result<Option<BooleanArray>> {
1598
- if filters.is_empty() {
1599
- return Ok(None);
1600
- }
1601
-
1602
- let mut combined_mask: Option<BooleanArray> = None;
1603
- for filter in filters {
1604
- let result = filter.evaluate(batch)?;
1605
- let array = result.into_array(batch.num_rows())?;
1606
- let bool_array = array
1607
- .as_any()
1608
- .downcast_ref::<BooleanArray>()
1609
- .ok_or_else(|| {
1610
- DataFusionError::Execution("lix_directory filter was not boolean".to_string())
1611
- })?;
1612
- let normalized = bool_array
1613
- .iter()
1614
- .map(|value| Some(value == Some(true)))
1615
- .collect::<BooleanArray>();
1616
- combined_mask = Some(match combined_mask {
1617
- Some(existing) => and(&existing, &normalized)?,
1618
- None => normalized,
1619
- });
1620
- }
1621
- Ok(combined_mask)
1622
- }
1623
-
1624
- fn dml_count_schema() -> SchemaRef {
1625
- Arc::new(Schema::new(vec![Field::new(
1626
- "count",
1627
- DataType::UInt64,
1628
- false,
1629
- )]))
1630
- }
1631
-
1632
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1633
- RecordBatch::try_new(
1634
- schema,
1635
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1636
- )
1637
- .map_err(DataFusionError::from)
1638
- }
1639
-
1640
- fn record_batch_has_non_null_column(batch: &RecordBatch, column_name: &str) -> Result<bool> {
1641
- for row_index in 0..batch.num_rows() {
1642
- if optional_scalar_value(batch, row_index, column_name)?
1643
- .is_some_and(|value| !value.is_null())
1644
- {
1645
- return Ok(true);
1646
- }
1647
- }
1648
- Ok(false)
1649
- }
1650
-
1651
- fn reject_read_only_lix_directory_insert_field(
1652
- batch: &RecordBatch,
1653
- row_index: usize,
1654
- column_name: &str,
1655
- ) -> Result<()> {
1656
- if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
1657
- return Err(DataFusionError::Execution(format!(
1658
- "INSERT into lix_directory cannot stage read-only column '{column_name}'"
1659
- )));
1660
- }
1661
- Ok(())
1662
- }
1663
-
1664
- fn required_string_value(
1665
- batch: &RecordBatch,
1666
- row_index: usize,
1667
- column_name: &str,
1668
- ) -> Result<String> {
1669
- optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
1670
- DataFusionError::Execution(format!(
1671
- "INSERT into lix_directory requires non-null text column '{column_name}'"
1672
- ))
1673
- })
1674
- }
1675
-
1676
- fn update_required_string_value(
1677
- batch: &RecordBatch,
1678
- assignment_values: &UpdateAssignmentValues,
1679
- row_index: usize,
1680
- column_name: &str,
1681
- ) -> Result<String> {
1682
- update_optional_string_value(batch, assignment_values, row_index, column_name)?.ok_or_else(
1683
- || {
1684
- DataFusionError::Execution(format!(
1685
- "UPDATE lix_directory requires non-null text column '{column_name}'"
1686
- ))
1687
- },
1688
- )
1689
- }
1690
-
1691
- fn update_optional_string_value(
1692
- batch: &RecordBatch,
1693
- assignment_values: &UpdateAssignmentValues,
1694
- row_index: usize,
1695
- column_name: &str,
1696
- ) -> Result<Option<String>> {
1697
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1698
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1699
- InsertCell::Provided(SqlCell::Value(
1700
- ScalarValue::Utf8(Some(value))
1701
- | ScalarValue::Utf8View(Some(value))
1702
- | ScalarValue::LargeUtf8(Some(value)),
1703
- )) => Ok(Some(value)),
1704
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1705
- "UPDATE lix_directory expected text-compatible column '{column_name}', got {other:?}"
1706
- ))),
1707
- }
1708
- }
1709
-
1710
- fn update_optional_metadata_value(
1711
- batch: &RecordBatch,
1712
- assignment_values: &UpdateAssignmentValues,
1713
- row_index: usize,
1714
- column_name: &str,
1715
- context: &str,
1716
- ) -> Result<Option<TransactionJson>> {
1717
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
1718
- .map(|value| {
1719
- let metadata = parse_row_metadata_value(&value, context)
1720
- .map_err(super::error::lix_error_to_datafusion_error)?;
1721
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1722
- .map_err(super::error::lix_error_to_datafusion_error)
1723
- })
1724
- .transpose()
1725
- }
1726
-
1727
- fn update_optional_bool_value(
1728
- batch: &RecordBatch,
1729
- assignment_values: &UpdateAssignmentValues,
1730
- row_index: usize,
1731
- column_name: &str,
1732
- ) -> Result<Option<bool>> {
1733
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1734
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1735
- InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(Some(value)),
1736
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1737
- "UPDATE lix_directory expected boolean column '{column_name}', got {other:?}"
1738
- ))),
1739
- }
1740
- }
1741
-
1742
- fn optional_string_value(
1743
- batch: &RecordBatch,
1744
- row_index: usize,
1745
- column_name: &str,
1746
- ) -> Result<Option<String>> {
1747
- match optional_scalar_value(batch, row_index, column_name)? {
1748
- None
1749
- | Some(ScalarValue::Null)
1750
- | Some(ScalarValue::Utf8(None))
1751
- | Some(ScalarValue::Utf8View(None))
1752
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1753
- Some(ScalarValue::Utf8(Some(value)))
1754
- | Some(ScalarValue::Utf8View(Some(value)))
1755
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1756
- Some(other) => Err(DataFusionError::Execution(format!(
1757
- "INSERT into lix_directory expected text-compatible column '{column_name}', got {other:?}"
1758
- ))),
1759
- }
1760
- }
1761
-
1762
- fn optional_metadata_value(
1763
- batch: &RecordBatch,
1764
- row_index: usize,
1765
- column_name: &str,
1766
- context: &str,
1767
- ) -> Result<Option<TransactionJson>> {
1768
- optional_string_value(batch, row_index, column_name)?
1769
- .map(|value| {
1770
- let metadata = parse_row_metadata_value(&value, context)
1771
- .map_err(super::error::lix_error_to_datafusion_error)?;
1772
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1773
- .map_err(super::error::lix_error_to_datafusion_error)
1774
- })
1775
- .transpose()
1776
- }
1777
-
1778
- fn optional_bool_value(
1779
- batch: &RecordBatch,
1780
- row_index: usize,
1781
- column_name: &str,
1782
- ) -> Result<Option<bool>> {
1783
- match optional_scalar_value(batch, row_index, column_name)? {
1784
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1785
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1786
- Some(other) => Err(DataFusionError::Execution(format!(
1787
- "INSERT into lix_directory expected boolean column '{column_name}', got {other:?}"
1788
- ))),
1789
- }
1790
- }
1791
-
1792
- fn optional_scalar_value(
1793
- batch: &RecordBatch,
1794
- row_index: usize,
1795
- column_name: &str,
1796
- ) -> Result<Option<ScalarValue>> {
1797
- let schema = batch.schema();
1798
- let column_index = match schema.index_of(column_name) {
1799
- Ok(column_index) => column_index,
1800
- Err(_) => return Ok(None),
1801
- };
1802
- if row_index >= batch.num_rows() {
1803
- return Err(DataFusionError::Execution(format!(
1804
- "row index {row_index} out of bounds for lix_directory batch with {} rows",
1805
- batch.num_rows()
1806
- )));
1807
- }
1808
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1809
- .map(Some)
1810
- .map_err(|error| {
1811
- DataFusionError::Execution(format!(
1812
- "failed to decode lix_directory column '{column_name}' at row {row_index}: {error}"
1813
- ))
1814
- })
1815
- }
1816
-
1817
- fn lix_directory_schema() -> SchemaRef {
1818
- Arc::new(Schema::new(vec![
1819
- Field::new("id", DataType::Utf8, true),
1820
- Field::new("path", DataType::Utf8, true),
1821
- Field::new("parent_id", DataType::Utf8, true),
1822
- Field::new("name", DataType::Utf8, false),
1823
- Field::new("hidden", DataType::Boolean, true),
1824
- json_field("lixcol_entity_id", false),
1825
- Field::new("lixcol_schema_key", DataType::Utf8, false),
1826
- Field::new("lixcol_file_id", DataType::Utf8, true),
1827
- Field::new("lixcol_global", DataType::Boolean, true),
1828
- Field::new("lixcol_change_id", DataType::Utf8, true),
1829
- Field::new("lixcol_created_at", DataType::Utf8, true),
1830
- Field::new("lixcol_updated_at", DataType::Utf8, true),
1831
- Field::new("lixcol_commit_id", DataType::Utf8, true),
1832
- Field::new("lixcol_untracked", DataType::Boolean, true),
1833
- json_field("lixcol_metadata", true),
1834
- ]))
1835
- }
1836
-
1837
- fn lix_directory_by_version_schema() -> SchemaRef {
1838
- let mut fields = lix_directory_schema()
1839
- .fields()
1840
- .iter()
1841
- .map(|field| field.as_ref().clone())
1842
- .collect::<Vec<_>>();
1843
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
1844
- Arc::new(Schema::new(fields))
1845
- }
1846
-
1847
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1848
- super::error::datafusion_error_to_lix_error(error)
1849
- }
1850
-
1851
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1852
- super::error::lix_error_to_datafusion_error(error)
1853
- }
1854
-
1855
- #[cfg(test)]
1856
- mod tests {
1857
- use std::collections::{BTreeMap, BTreeSet};
1858
- use std::sync::Arc;
1859
-
1860
- use async_trait::async_trait;
1861
- use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray};
1862
- use datafusion::arrow::datatypes::{DataType, Field, Schema};
1863
- use datafusion::arrow::record_batch::RecordBatch;
1864
- use datafusion::execution::TaskContext;
1865
- use serde_json::json;
1866
-
1867
- use crate::binary_cas::BlobDataReader;
1868
- use crate::functions::{
1869
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1870
- };
1871
- use crate::live_state::{
1872
- LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
1873
- };
1874
- use crate::sql2::dml::InsertSink;
1875
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1876
- use crate::transaction::types::{
1877
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
1878
- TransactionWriteRow,
1879
- };
1880
- use crate::LixError;
1881
-
1882
- use super::{
1883
- derive_directory_path_for, directory_path_resolvers_from_state_rows,
1884
- lix_directory_by_version_schema, lix_directory_insert_origin, lix_directory_record_batch,
1885
- lix_directory_recursive_delete_rows_from_batch, lix_directory_write_rows_from_batch,
1886
- lix_directory_write_rows_from_batch_with_path_resolvers, DirectoryDescriptorRecord,
1887
- LixDirectoryInsertSink, VersionBinding,
1888
- };
1889
- use crate::sql2::filesystem_visibility::VisibleFilesystem;
1890
-
1891
- fn test_id_generator(ids: &'static [&'static str]) -> impl FnMut() -> String {
1892
- let mut ids = ids.iter();
1893
- move || ids.next().expect("test id should exist").to_string()
1894
- }
1895
-
1896
- fn test_functions() -> FunctionProviderHandle {
1897
- SharedFunctionProvider::new(
1898
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1899
- )
1900
- }
1901
-
1902
- #[derive(Default)]
1903
- struct CapturingWriteContext {
1904
- rows: Vec<MaterializedLiveStateRow>,
1905
- writes: Vec<TransactionWrite>,
1906
- }
1907
-
1908
- #[async_trait]
1909
- impl BlobDataReader for CapturingWriteContext {
1910
- async fn load_bytes_many(
1911
- &self,
1912
- hashes: &[crate::binary_cas::BlobHash],
1913
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1914
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
1915
- None;
1916
- hashes.len()
1917
- ]))
1918
- }
1919
- }
1920
-
1921
- #[async_trait]
1922
- impl SqlWriteExecutionContext for CapturingWriteContext {
1923
- fn active_version_id(&self) -> &str {
1924
- "version-a"
1925
- }
1926
-
1927
- fn functions(&self) -> FunctionProviderHandle {
1928
- test_functions()
1929
- }
1930
-
1931
- fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1932
- Ok(Vec::new())
1933
- }
1934
-
1935
- async fn load_bytes_many(
1936
- &mut self,
1937
- hashes: &[crate::binary_cas::BlobHash],
1938
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1939
- BlobDataReader::load_bytes_many(self, hashes).await
1940
- }
1941
-
1942
- async fn scan_live_state(
1943
- &mut self,
1944
- _request: &LiveStateScanRequest,
1945
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1946
- Ok(self.rows.clone())
1947
- }
1948
-
1949
- async fn load_version_head(
1950
- &mut self,
1951
- version_id: &str,
1952
- ) -> Result<Option<String>, LixError> {
1953
- if version_id == "ghost-version" {
1954
- return Ok(None);
1955
- }
1956
- Ok(Some(format!("commit-{version_id}")))
1957
- }
1958
-
1959
- async fn stage_write(
1960
- &mut self,
1961
- write: TransactionWrite,
1962
- ) -> Result<TransactionWriteOutcome, LixError> {
1963
- self.writes.push(write);
1964
- Ok(TransactionWriteOutcome { count: 0 })
1965
- }
1966
- }
1967
-
1968
- #[derive(Default)]
1969
- #[allow(dead_code)]
1970
- struct RowsLiveStateReader {
1971
- rows: Vec<MaterializedLiveStateRow>,
1972
- }
1973
-
1974
- #[async_trait]
1975
- impl LiveStateReader for RowsLiveStateReader {
1976
- async fn scan_rows(
1977
- &self,
1978
- _request: &LiveStateScanRequest,
1979
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1980
- Ok(self.rows.clone())
1981
- }
1982
-
1983
- async fn load_row(
1984
- &self,
1985
- _request: &LiveStateRowRequest,
1986
- ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
1987
- Ok(None)
1988
- }
1989
- }
1990
-
1991
- fn live_row(
1992
- entity_id: &str,
1993
- version_id: &str,
1994
- snapshot_content: &str,
1995
- ) -> MaterializedLiveStateRow {
1996
- live_filesystem_row(
1997
- entity_id,
1998
- super::DIRECTORY_SCHEMA_KEY,
1999
- None,
2000
- version_id,
2001
- snapshot_content,
2002
- )
2003
- }
2004
-
2005
- fn live_filesystem_row(
2006
- entity_id: &str,
2007
- schema_key: &str,
2008
- file_id: Option<&str>,
2009
- version_id: &str,
2010
- snapshot_content: &str,
2011
- ) -> MaterializedLiveStateRow {
2012
- MaterializedLiveStateRow {
2013
- entity_id: crate::entity_identity::EntityIdentity::single(entity_id),
2014
- schema_key: schema_key.to_string(),
2015
- file_id: file_id.map(ToOwned::to_owned),
2016
- snapshot_content: Some(snapshot_content.to_string()),
2017
- metadata: Some(json!({"source": "test"}).to_string()),
2018
- deleted: false,
2019
- version_id: version_id.to_string(),
2020
- change_id: Some(format!("change-{entity_id}")),
2021
- commit_id: Some(format!("commit-{entity_id}")),
2022
- global: false,
2023
- untracked: false,
2024
- created_at: "2026-04-23T00:00:00Z".to_string(),
2025
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2026
- }
2027
- }
2028
-
2029
- fn filesystem_rows() -> Vec<MaterializedLiveStateRow> {
2030
- vec![
2031
- live_filesystem_row(
2032
- "dir-docs",
2033
- "lix_directory_descriptor",
2034
- None,
2035
- "version-a",
2036
- r#"{"id":"dir-docs","parent_id":null,"name":"docs","hidden":false}"#,
2037
- ),
2038
- live_filesystem_row(
2039
- "dir-guides",
2040
- "lix_directory_descriptor",
2041
- None,
2042
- "version-a",
2043
- r#"{"id":"dir-guides","parent_id":"dir-docs","name":"guides","hidden":false}"#,
2044
- ),
2045
- live_filesystem_row(
2046
- "file-index",
2047
- "lix_file_descriptor",
2048
- None,
2049
- "version-a",
2050
- r#"{"id":"file-index","directory_id":"dir-docs","name":"index.md","hidden":false}"#,
2051
- ),
2052
- live_filesystem_row(
2053
- "file-readme",
2054
- "lix_file_descriptor",
2055
- None,
2056
- "version-a",
2057
- r#"{"id":"file-readme","directory_id":"dir-guides","name":"readme.md","hidden":false}"#,
2058
- ),
2059
- live_filesystem_row(
2060
- "file-readme",
2061
- "lix_binary_blob_ref",
2062
- Some("file-readme"),
2063
- "version-a",
2064
- r#"{"id":"file-readme","blob_hash":"abc123","size_bytes":5}"#,
2065
- ),
2066
- ]
2067
- }
2068
-
2069
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2070
- Arc::new(StringArray::from(values)) as ArrayRef
2071
- }
2072
-
2073
- fn directory_insert_batch(include_version: bool, global: bool) -> RecordBatch {
2074
- let mut fields = vec![
2075
- Field::new("id", DataType::Utf8, false),
2076
- Field::new("parent_id", DataType::Utf8, true),
2077
- Field::new("name", DataType::Utf8, false),
2078
- Field::new("hidden", DataType::Boolean, false),
2079
- Field::new("lixcol_global", DataType::Boolean, false),
2080
- Field::new("lixcol_metadata", DataType::Utf8, true),
2081
- ];
2082
- let mut columns = vec![
2083
- string_column(vec![Some("dir-docs")]),
2084
- string_column(vec![None]),
2085
- string_column(vec![Some("docs")]),
2086
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2087
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2088
- string_column(vec![Some("{\"source\":\"directory\"}")]),
2089
- ];
2090
- if include_version {
2091
- fields.push(Field::new("lixcol_version_id", DataType::Utf8, false));
2092
- columns.push(string_column(vec![Some("version-a")]));
2093
- }
2094
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2095
- .expect("directory insert batch should build")
2096
- }
2097
-
2098
- fn directory_path_insert_batch(path: &str) -> RecordBatch {
2099
- RecordBatch::try_new(
2100
- Arc::new(Schema::new(vec![
2101
- Field::new("id", DataType::Utf8, false),
2102
- Field::new("path", DataType::Utf8, true),
2103
- Field::new("hidden", DataType::Boolean, false),
2104
- Field::new("lixcol_version_id", DataType::Utf8, false),
2105
- ])),
2106
- vec![
2107
- string_column(vec![Some("dir-nested")]),
2108
- string_column(vec![Some(path)]),
2109
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2110
- string_column(vec![Some("version-a")]),
2111
- ],
2112
- )
2113
- .expect("directory path insert batch should build")
2114
- }
2115
-
2116
- fn directory_delete_batch(ids: &[&str]) -> RecordBatch {
2117
- RecordBatch::try_new(
2118
- Arc::new(Schema::new(vec![
2119
- Field::new("id", DataType::Utf8, false),
2120
- Field::new("lixcol_version_id", DataType::Utf8, false),
2121
- ])),
2122
- vec![
2123
- string_column(ids.iter().copied().map(Some).collect::<Vec<_>>()),
2124
- string_column(vec![Some("version-a"); ids.len()]),
2125
- ],
2126
- )
2127
- .expect("directory delete batch should build")
2128
- }
2129
-
2130
- #[test]
2131
- fn derives_nested_directory_paths() {
2132
- let root = DirectoryDescriptorRecord {
2133
- id: "dir-docs".to_string(),
2134
- parent_id: None,
2135
- name: "docs".to_string(),
2136
- hidden: false,
2137
- live: live_row(
2138
- "dir-docs",
2139
- "version-a",
2140
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2141
- ),
2142
- };
2143
- let child = DirectoryDescriptorRecord {
2144
- id: "dir-guides".to_string(),
2145
- parent_id: Some("dir-docs".to_string()),
2146
- name: "guides".to_string(),
2147
- hidden: false,
2148
- live: live_row(
2149
- "dir-guides",
2150
- "version-a",
2151
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":false}",
2152
- ),
2153
- };
2154
- let mut records = BTreeMap::new();
2155
- records.insert(root.id.clone(), &root);
2156
- records.insert(child.id.clone(), &child);
2157
- let mut paths = BTreeMap::new();
2158
-
2159
- assert_eq!(
2160
- derive_directory_path_for(
2161
- "version-a",
2162
- "dir-guides",
2163
- &records,
2164
- &mut paths,
2165
- &mut BTreeSet::new()
2166
- )
2167
- .expect("path derivation should succeed"),
2168
- Some("/docs/guides/".to_string())
2169
- );
2170
- }
2171
-
2172
- #[test]
2173
- fn record_batch_projects_directory_columns() {
2174
- let rows = vec![
2175
- live_row(
2176
- "dir-docs",
2177
- "version-a",
2178
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2179
- ),
2180
- live_row(
2181
- "dir-guides",
2182
- "version-a",
2183
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":true}",
2184
- ),
2185
- ];
2186
-
2187
- let batch = lix_directory_record_batch(&lix_directory_by_version_schema(), rows)
2188
- .expect("directory batch should build");
2189
-
2190
- assert_eq!(batch.num_rows(), 2);
2191
- assert_eq!(
2192
- batch
2193
- .column_by_name("path")
2194
- .expect("path column")
2195
- .as_any()
2196
- .downcast_ref::<StringArray>()
2197
- .expect("path is string")
2198
- .value(1),
2199
- "/docs/guides/"
2200
- );
2201
- assert_eq!(
2202
- batch
2203
- .column_by_name("lixcol_version_id")
2204
- .expect("version column")
2205
- .as_any()
2206
- .downcast_ref::<StringArray>()
2207
- .expect("version is string")
2208
- .value(1),
2209
- "version-a"
2210
- );
2211
- }
2212
-
2213
- #[test]
2214
- fn decodes_directory_insert_into_lix_state_write_row() {
2215
- let rows = lix_directory_write_rows_from_batch(&directory_insert_batch(true, false), None)
2216
- .expect("directory batch should decode");
2217
-
2218
- assert_eq!(
2219
- rows,
2220
- vec![TransactionWriteRow {
2221
- entity_id: Some(crate::entity_identity::EntityIdentity::single("dir-docs")),
2222
- schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2223
- file_id: None,
2224
- snapshot: Some(TransactionJson::from_value_for_test(
2225
- json!({"hidden":false,"id":"dir-docs","name":"docs","parent_id":null})
2226
- )),
2227
- metadata: Some(TransactionJson::from_value_for_test(
2228
- json!({"source": "directory"})
2229
- )),
2230
- origin: Some(lix_directory_insert_origin("lix_directory", "dir-docs")),
2231
- created_at: None,
2232
- updated_at: None,
2233
- global: false,
2234
- change_id: None,
2235
- commit_id: None,
2236
- untracked: false,
2237
- version_id: "version-a".to_string(),
2238
- }]
2239
- );
2240
- }
2241
-
2242
- #[test]
2243
- fn active_directory_insert_defaults_version_id() {
2244
- let rows = lix_directory_write_rows_from_batch(
2245
- &directory_insert_batch(false, false),
2246
- Some("version-active"),
2247
- )
2248
- .expect("active directory batch should decode");
2249
-
2250
- assert_eq!(rows[0].version_id, "version-active");
2251
- }
2252
-
2253
- #[test]
2254
- fn by_version_directory_insert_requires_version_id_for_non_global_rows() {
2255
- let error =
2256
- lix_directory_write_rows_from_batch(&directory_insert_batch(false, false), None)
2257
- .expect_err("by-version insert should require version id");
2258
-
2259
- assert!(
2260
- error.to_string().contains("requires lixcol_version_id"),
2261
- "unexpected error: {error}"
2262
- );
2263
- }
2264
-
2265
- #[test]
2266
- fn directory_insert_rejects_global_with_non_global_version_id() {
2267
- let error = lix_directory_write_rows_from_batch(&directory_insert_batch(true, true), None)
2268
- .expect_err("global directory write should reject conflicting version id");
2269
-
2270
- assert!(
2271
- error
2272
- .to_string()
2273
- .contains("cannot set lixcol_global=true with non-global lixcol_version_id"),
2274
- "unexpected error: {error}"
2275
- );
2276
- }
2277
-
2278
- #[test]
2279
- fn directory_path_insert_reuses_existing_parent_descriptor() {
2280
- let existing_rows = vec![live_row(
2281
- "dir-docs",
2282
- "version-a",
2283
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2284
- )];
2285
- let mut resolvers = directory_path_resolvers_from_state_rows(existing_rows)
2286
- .expect("existing directory rows should seed paths");
2287
-
2288
- let rows = lix_directory_write_rows_from_batch_with_path_resolvers(
2289
- &directory_path_insert_batch("/docs/nested/"),
2290
- None,
2291
- "lix_directory",
2292
- &mut resolvers,
2293
- &mut test_id_generator(&["should-not-be-used"]),
2294
- )
2295
- .expect("directory path batch should decode");
2296
-
2297
- assert_eq!(rows.len(), 1);
2298
- let snapshot = rows[0].snapshot.as_ref().unwrap();
2299
- assert_eq!(snapshot["id"], "dir-nested");
2300
- assert_eq!(snapshot["parent_id"], "dir-docs");
2301
- assert_eq!(snapshot["name"], "nested");
2302
- }
2303
-
2304
- #[test]
2305
- fn recursive_directory_delete_deletes_nested_dirs_files_and_blob_refs() {
2306
- let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2307
- .expect("visible filesystem should build");
2308
- let mut visible_filesystems = BTreeMap::new();
2309
- visible_filesystems.insert("version-a".to_string(), visible_filesystem);
2310
-
2311
- let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2312
- &directory_delete_batch(&["dir-docs"]),
2313
- None,
2314
- &visible_filesystems,
2315
- )
2316
- .expect("recursive directory delete should plan");
2317
-
2318
- assert_eq!(count, 4);
2319
- assert_eq!(
2320
- rows.iter()
2321
- .map(|row| {
2322
- (
2323
- row.schema_key.as_str(),
2324
- row.entity_id
2325
- .as_ref()
2326
- .expect("planned delete row should carry entity_id")
2327
- .as_single_string_owned()
2328
- .expect("planned delete row should project entity_id"),
2329
- )
2330
- })
2331
- .collect::<Vec<_>>(),
2332
- vec![
2333
- ("lix_file_descriptor", "file-readme".to_string()),
2334
- ("lix_binary_blob_ref", "file-readme".to_string()),
2335
- ("lix_directory_descriptor", "dir-guides".to_string()),
2336
- ("lix_file_descriptor", "file-index".to_string()),
2337
- ("lix_directory_descriptor", "dir-docs".to_string()),
2338
- ]
2339
- );
2340
- assert!(rows.iter().all(|row| row.snapshot.is_none()));
2341
- }
2342
-
2343
- #[test]
2344
- fn recursive_directory_delete_dedupes_overlapping_parent_and_child() {
2345
- let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2346
- .expect("visible filesystem should build");
2347
- let mut visible_filesystems = BTreeMap::new();
2348
- visible_filesystems.insert("version-a".to_string(), visible_filesystem);
2349
-
2350
- let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2351
- &directory_delete_batch(&["dir-docs", "dir-guides"]),
2352
- None,
2353
- &visible_filesystems,
2354
- )
2355
- .expect("recursive directory delete should plan");
2356
-
2357
- assert_eq!(count, 4);
2358
- let identities = rows
2359
- .iter()
2360
- .map(|row| {
2361
- (
2362
- row.schema_key.clone(),
2363
- row.entity_id.clone(),
2364
- row.file_id.clone(),
2365
- row.version_id.clone(),
2366
- )
2367
- })
2368
- .collect::<std::collections::BTreeSet<_>>();
2369
- assert_eq!(identities.len(), rows.len());
2370
- assert_eq!(rows.len(), 5);
2371
- }
2372
-
2373
- #[tokio::test]
2374
- async fn directory_insert_sink_stages_decoded_lix_state_rows() {
2375
- let mut write_context = CapturingWriteContext::default();
2376
- let write_ctx = SqlWriteContext::new(&mut write_context);
2377
- let batch = directory_insert_batch(true, false);
2378
- let sink = LixDirectoryInsertSink::new(
2379
- batch.schema(),
2380
- write_ctx,
2381
- test_functions(),
2382
- VersionBinding::explicit(),
2383
- );
2384
- let count = sink
2385
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2386
- .await
2387
- .expect("directory sink should stage write");
2388
-
2389
- assert_eq!(count, 1);
2390
- assert_eq!(
2391
- write_context.writes.as_slice(),
2392
- &[TransactionWrite::Rows {
2393
- mode: TransactionWriteMode::Insert,
2394
- rows: vec![TransactionWriteRow {
2395
- entity_id: Some(crate::entity_identity::EntityIdentity::single("dir-docs")),
2396
- schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2397
- file_id: None,
2398
- snapshot: Some(TransactionJson::from_value_for_test(
2399
- json!({"hidden":false,"id":"dir-docs","name":"docs","parent_id":null})
2400
- )),
2401
- metadata: Some(TransactionJson::from_value_for_test(
2402
- json!({"source": "directory"})
2403
- )),
2404
- origin: Some(lix_directory_insert_origin(
2405
- "lix_directory_by_version",
2406
- "dir-docs"
2407
- )),
2408
- created_at: None,
2409
- updated_at: None,
2410
- global: false,
2411
- change_id: None,
2412
- commit_id: None,
2413
- untracked: false,
2414
- version_id: "version-a".to_string(),
2415
- }]
2416
- }]
2417
- );
2418
- }
2419
-
2420
- #[tokio::test]
2421
- async fn directory_insert_sink_seeds_path_resolver_from_live_state() {
2422
- let mut write_context = CapturingWriteContext {
2423
- rows: vec![live_row(
2424
- "dir-docs",
2425
- "version-a",
2426
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2427
- )],
2428
- writes: Vec::new(),
2429
- };
2430
- let write_ctx = SqlWriteContext::new(&mut write_context);
2431
- let batch = directory_path_insert_batch("/docs/nested/");
2432
- let sink = LixDirectoryInsertSink::new(
2433
- batch.schema(),
2434
- write_ctx,
2435
- test_functions(),
2436
- VersionBinding::explicit(),
2437
- );
2438
- let count = sink
2439
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2440
- .await
2441
- .expect("directory sink should stage path write");
2442
-
2443
- assert_eq!(count, 1);
2444
- let [TransactionWrite::Rows { rows, .. }] = write_context.writes.as_slice() else {
2445
- panic!("expected one directory staged write");
2446
- };
2447
- assert_eq!(rows.len(), 1);
2448
- let snapshot = rows[0].snapshot.as_ref().unwrap();
2449
- assert_eq!(snapshot["id"], "dir-nested");
2450
- assert_eq!(snapshot["parent_id"], "dir-docs");
2451
- assert_eq!(snapshot["name"], "nested");
2452
- }
2453
- }