@lix-js/sdk 0.6.0-preview.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +38 -207
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +38 -71
  16. package/SKILL.md +0 -507
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -833
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -27
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -359
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/capabilities.rs +0 -67
  31. package/dist-engine-src/src/backend/conformance/baseline.rs +0 -1127
  32. package/dist-engine-src/src/backend/conformance/factory.rs +0 -93
  33. package/dist-engine-src/src/backend/conformance/failure_tests.rs +0 -608
  34. package/dist-engine-src/src/backend/conformance/fixtures.rs +0 -26
  35. package/dist-engine-src/src/backend/conformance/mod.rs +0 -75
  36. package/dist-engine-src/src/backend/conformance/model.rs +0 -28
  37. package/dist-engine-src/src/backend/conformance/model_based.rs +0 -257
  38. package/dist-engine-src/src/backend/conformance/persistence.rs +0 -204
  39. package/dist-engine-src/src/backend/conformance/projection.rs +0 -21
  40. package/dist-engine-src/src/backend/conformance/pushdown.rs +0 -24
  41. package/dist-engine-src/src/backend/conformance/runner.rs +0 -90
  42. package/dist-engine-src/src/backend/conformance/scan.rs +0 -24
  43. package/dist-engine-src/src/backend/conformance/write.rs +0 -16
  44. package/dist-engine-src/src/backend/error.rs +0 -94
  45. package/dist-engine-src/src/backend/in_memory.rs +0 -670
  46. package/dist-engine-src/src/backend/mod.rs +0 -39
  47. package/dist-engine-src/src/backend/predicate.rs +0 -80
  48. package/dist-engine-src/src/backend/traits.rs +0 -260
  49. package/dist-engine-src/src/backend/types.rs +0 -239
  50. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  51. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  52. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  53. package/dist-engine-src/src/binary_cas/kv.rs +0 -1038
  54. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  55. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  56. package/dist-engine-src/src/branch/context.rs +0 -40
  57. package/dist-engine-src/src/branch/lifecycle.rs +0 -221
  58. package/dist-engine-src/src/branch/mod.rs +0 -13
  59. package/dist-engine-src/src/branch/refs.rs +0 -321
  60. package/dist-engine-src/src/branch/stage_rows.rs +0 -67
  61. package/dist-engine-src/src/branch/types.rs +0 -21
  62. package/dist-engine-src/src/catalog/context.rs +0 -412
  63. package/dist-engine-src/src/catalog/mod.rs +0 -10
  64. package/dist-engine-src/src/catalog/schema.rs +0 -4
  65. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  66. package/dist-engine-src/src/cel/context.rs +0 -86
  67. package/dist-engine-src/src/cel/error.rs +0 -19
  68. package/dist-engine-src/src/cel/mod.rs +0 -8
  69. package/dist-engine-src/src/cel/provider.rs +0 -9
  70. package/dist-engine-src/src/cel/runtime.rs +0 -167
  71. package/dist-engine-src/src/cel/value.rs +0 -50
  72. package/dist-engine-src/src/changelog/bench_support.rs +0 -785
  73. package/dist-engine-src/src/changelog/change.rs +0 -1
  74. package/dist-engine-src/src/changelog/codec.rs +0 -497
  75. package/dist-engine-src/src/changelog/commit.rs +0 -1
  76. package/dist-engine-src/src/changelog/context.rs +0 -1614
  77. package/dist-engine-src/src/changelog/mod.rs +0 -29
  78. package/dist-engine-src/src/changelog/store.rs +0 -163
  79. package/dist-engine-src/src/changelog/test_support.rs +0 -54
  80. package/dist-engine-src/src/changelog/types.rs +0 -213
  81. package/dist-engine-src/src/commit_graph/context.rs +0 -944
  82. package/dist-engine-src/src/commit_graph/mod.rs +0 -9
  83. package/dist-engine-src/src/commit_graph/types.rs +0 -89
  84. package/dist-engine-src/src/commit_graph/walker.rs +0 -786
  85. package/dist-engine-src/src/common/error.rs +0 -347
  86. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  87. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  88. package/dist-engine-src/src/common/identity.rs +0 -145
  89. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  90. package/dist-engine-src/src/common/metadata.rs +0 -40
  91. package/dist-engine-src/src/common/mod.rs +0 -23
  92. package/dist-engine-src/src/common/types.rs +0 -105
  93. package/dist-engine-src/src/common/wire.rs +0 -222
  94. package/dist-engine-src/src/domain.rs +0 -320
  95. package/dist-engine-src/src/engine.rs +0 -203
  96. package/dist-engine-src/src/entity_pk.rs +0 -402
  97. package/dist-engine-src/src/functions/context.rs +0 -296
  98. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  99. package/dist-engine-src/src/functions/mod.rs +0 -18
  100. package/dist-engine-src/src/functions/provider.rs +0 -130
  101. package/dist-engine-src/src/functions/state.rs +0 -335
  102. package/dist-engine-src/src/functions/types.rs +0 -37
  103. package/dist-engine-src/src/init.rs +0 -692
  104. package/dist-engine-src/src/json_store/compression.rs +0 -77
  105. package/dist-engine-src/src/json_store/context.rs +0 -172
  106. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  107. package/dist-engine-src/src/json_store/mod.rs +0 -38
  108. package/dist-engine-src/src/json_store/store.rs +0 -494
  109. package/dist-engine-src/src/json_store/types.rs +0 -212
  110. package/dist-engine-src/src/lib.rs +0 -92
  111. package/dist-engine-src/src/live_state/context.rs +0 -1883
  112. package/dist-engine-src/src/live_state/mod.rs +0 -21
  113. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  114. package/dist-engine-src/src/live_state/reader.rs +0 -23
  115. package/dist-engine-src/src/live_state/types.rs +0 -231
  116. package/dist-engine-src/src/live_state/visibility.rs +0 -666
  117. package/dist-engine-src/src/plugin/archive.rs +0 -438
  118. package/dist-engine-src/src/plugin/component.rs +0 -183
  119. package/dist-engine-src/src/plugin/install.rs +0 -619
  120. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  121. package/dist-engine-src/src/plugin/materializer.rs +0 -202
  122. package/dist-engine-src/src/plugin/mod.rs +0 -33
  123. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -119
  124. package/dist-engine-src/src/plugin/storage.rs +0 -74
  125. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  126. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  127. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  128. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  129. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  130. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +0 -34
  131. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +0 -48
  132. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  133. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  134. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  135. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  136. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  137. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  138. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  139. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  140. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  141. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  142. package/dist-engine-src/src/schema/builtin/mod.rs +0 -220
  143. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  144. package/dist-engine-src/src/schema/definition.json +0 -187
  145. package/dist-engine-src/src/schema/definition.rs +0 -742
  146. package/dist-engine-src/src/schema/key.rs +0 -138
  147. package/dist-engine-src/src/schema/mod.rs +0 -20
  148. package/dist-engine-src/src/schema/seed.rs +0 -14
  149. package/dist-engine-src/src/schema/tests.rs +0 -780
  150. package/dist-engine-src/src/session/context.rs +0 -1059
  151. package/dist-engine-src/src/session/create_branch.rs +0 -94
  152. package/dist-engine-src/src/session/execute.rs +0 -681
  153. package/dist-engine-src/src/session/merge/analysis.rs +0 -108
  154. package/dist-engine-src/src/session/merge/branch.rs +0 -417
  155. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  156. package/dist-engine-src/src/session/merge/mod.rs +0 -10
  157. package/dist-engine-src/src/session/merge/stats.rs +0 -61
  158. package/dist-engine-src/src/session/mod.rs +0 -30
  159. package/dist-engine-src/src/session/switch_branch.rs +0 -113
  160. package/dist-engine-src/src/session/transaction.rs +0 -557
  161. package/dist-engine-src/src/sql2/bind/classify.rs +0 -102
  162. package/dist-engine-src/src/sql2/bind/error.rs +0 -5
  163. package/dist-engine-src/src/sql2/bind/expr.rs +0 -29
  164. package/dist-engine-src/src/sql2/bind/mod.rs +0 -12
  165. package/dist-engine-src/src/sql2/bind/public_udf.rs +0 -306
  166. package/dist-engine-src/src/sql2/bind/read.rs +0 -65
  167. package/dist-engine-src/src/sql2/bind/statement.rs +0 -2236
  168. package/dist-engine-src/src/sql2/bind/table.rs +0 -273
  169. package/dist-engine-src/src/sql2/bind/write.rs +0 -86
  170. package/dist-engine-src/src/sql2/branch_scope.rs +0 -436
  171. package/dist-engine-src/src/sql2/catalog/capability.rs +0 -20
  172. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +0 -296
  173. package/dist-engine-src/src/sql2/catalog/mod.rs +0 -15
  174. package/dist-engine-src/src/sql2/catalog/registry.rs +0 -556
  175. package/dist-engine-src/src/sql2/catalog/schema.rs +0 -88
  176. package/dist-engine-src/src/sql2/catalog/surface.rs +0 -41
  177. package/dist-engine-src/src/sql2/change_materialization.rs +0 -122
  178. package/dist-engine-src/src/sql2/context.rs +0 -317
  179. package/dist-engine-src/src/sql2/dml.rs +0 -148
  180. package/dist-engine-src/src/sql2/error.rs +0 -215
  181. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +0 -1593
  182. package/dist-engine-src/src/sql2/exec/datafusion.rs +0 -5266
  183. package/dist-engine-src/src/sql2/exec/fast_write.rs +0 -82
  184. package/dist-engine-src/src/sql2/exec/mod.rs +0 -24
  185. package/dist-engine-src/src/sql2/exec/write.rs +0 -661
  186. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1485
  187. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  188. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  189. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  190. package/dist-engine-src/src/sql2/history_route.rs +0 -661
  191. package/dist-engine-src/src/sql2/mod.rs +0 -52
  192. package/dist-engine-src/src/sql2/optimize/datafusion.rs +0 -1
  193. package/dist-engine-src/src/sql2/optimize/mod.rs +0 -2
  194. package/dist-engine-src/src/sql2/optimize/simple_write.rs +0 -116
  195. package/dist-engine-src/src/sql2/parse/mod.rs +0 -69
  196. package/dist-engine-src/src/sql2/parse/normalize.rs +0 -1
  197. package/dist-engine-src/src/sql2/plan/branch_scope.rs +0 -24
  198. package/dist-engine-src/src/sql2/plan/mod.rs +0 -5
  199. package/dist-engine-src/src/sql2/plan/predicate.rs +0 -22
  200. package/dist-engine-src/src/sql2/plan/write.rs +0 -147
  201. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -504
  202. package/dist-engine-src/src/sql2/providers/branch.rs +0 -1206
  203. package/dist-engine-src/src/sql2/providers/change.rs +0 -445
  204. package/dist-engine-src/src/sql2/providers/directory.rs +0 -2422
  205. package/dist-engine-src/src/sql2/providers/directory_history.rs +0 -645
  206. package/dist-engine-src/src/sql2/providers/entity.rs +0 -1484
  207. package/dist-engine-src/src/sql2/providers/entity_history.rs +0 -452
  208. package/dist-engine-src/src/sql2/providers/file.rs +0 -3686
  209. package/dist-engine-src/src/sql2/providers/file_history.rs +0 -924
  210. package/dist-engine-src/src/sql2/providers/history.rs +0 -426
  211. package/dist-engine-src/src/sql2/providers/lix_state.rs +0 -2542
  212. package/dist-engine-src/src/sql2/providers/mod.rs +0 -508
  213. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  214. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  215. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  216. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  217. package/dist-engine-src/src/sql2/session.rs +0 -83
  218. package/dist-engine-src/src/sql2/storage/constraints.rs +0 -1
  219. package/dist-engine-src/src/sql2/storage/mod.rs +0 -1
  220. package/dist-engine-src/src/sql2/test_support/differential.rs +0 -712
  221. package/dist-engine-src/src/sql2/test_support/generators.rs +0 -354
  222. package/dist-engine-src/src/sql2/test_support/mod.rs +0 -2
  223. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  224. package/dist-engine-src/src/sql2/udfs/lix_active_branch_commit_id.rs +0 -53
  225. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  226. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  227. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  228. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  229. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  230. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  231. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  232. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  233. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -86
  234. package/dist-engine-src/src/sql2/write_normalization.rs +0 -368
  235. package/dist-engine-src/src/storage/conformance.rs +0 -399
  236. package/dist-engine-src/src/storage/context.rs +0 -620
  237. package/dist-engine-src/src/storage/mod.rs +0 -52
  238. package/dist-engine-src/src/storage/point.rs +0 -440
  239. package/dist-engine-src/src/storage/read_scope.rs +0 -67
  240. package/dist-engine-src/src/storage/reader.rs +0 -867
  241. package/dist-engine-src/src/storage/scan.rs +0 -784
  242. package/dist-engine-src/src/storage/spaces.rs +0 -236
  243. package/dist-engine-src/src/storage/stats.rs +0 -80
  244. package/dist-engine-src/src/storage/write_set.rs +0 -962
  245. package/dist-engine-src/src/storage_bench.rs +0 -171
  246. package/dist-engine-src/src/test_support.rs +0 -450
  247. package/dist-engine-src/src/tracked_state/bench_support.rs +0 -394
  248. package/dist-engine-src/src/tracked_state/codec.rs +0 -1183
  249. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +0 -358
  250. package/dist-engine-src/src/tracked_state/context.rs +0 -2801
  251. package/dist-engine-src/src/tracked_state/diff.rs +0 -2140
  252. package/dist-engine-src/src/tracked_state/merge.rs +0 -478
  253. package/dist-engine-src/src/tracked_state/mod.rs +0 -35
  254. package/dist-engine-src/src/tracked_state/row_materialization.rs +0 -275
  255. package/dist-engine-src/src/tracked_state/storage.rs +0 -427
  256. package/dist-engine-src/src/tracked_state/tree.rs +0 -3063
  257. package/dist-engine-src/src/tracked_state/types.rs +0 -238
  258. package/dist-engine-src/src/transaction/bench_support.rs +0 -407
  259. package/dist-engine-src/src/transaction/commit.rs +0 -1592
  260. package/dist-engine-src/src/transaction/context.rs +0 -1653
  261. package/dist-engine-src/src/transaction/mod.rs +0 -24
  262. package/dist-engine-src/src/transaction/normalization.rs +0 -877
  263. package/dist-engine-src/src/transaction/prep.rs +0 -37
  264. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -163
  265. package/dist-engine-src/src/transaction/staging.rs +0 -1525
  266. package/dist-engine-src/src/transaction/types.rs +0 -403
  267. package/dist-engine-src/src/transaction/validation.rs +0 -5766
  268. package/dist-engine-src/src/untracked_state/codec.rs +0 -615
  269. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  270. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  271. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  272. package/dist-engine-src/src/untracked_state/storage.rs +0 -898
  273. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  274. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,2422 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{
7
- ArrayRef, BooleanArray, RecordBatchOptions, StringArray, UInt64Array,
8
- };
9
- use datafusion::arrow::compute::{and, filter_record_batch};
10
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
- use datafusion::arrow::record_batch::RecordBatch;
12
- use datafusion::catalog::{Session, TableProvider};
13
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue, SchemaExt};
14
- use datafusion::datasource::TableType;
15
- use datafusion::execution::TaskContext;
16
- use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
18
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
19
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
20
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
21
- use datafusion::physical_plan::{
22
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
23
- };
24
- use datafusion::prelude::SessionContext;
25
- use futures_util::{stream, TryStreamExt};
26
- use serde::Deserialize;
27
-
28
- use crate::branch::BranchRefReader;
29
- use crate::functions::FunctionProviderHandle;
30
- use crate::live_state::MaterializedLiveStateRow;
31
- use crate::live_state::{
32
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
33
- };
34
- use crate::sql2::branch_scope::{
35
- explicit_branch_ids_from_dml_filters, resolve_provider_branch_ids, resolve_write_branch_scope,
36
- BranchBinding,
37
- };
38
- use crate::sql2::dml::{InsertExec, InsertSink};
39
- use crate::sql2::filesystem_predicates::{
40
- canonicalize_filesystem_path_filters, FilesystemPathKind,
41
- };
42
- use crate::sql2::predicate_typecheck::{
43
- canonicalize_json_identity_text_filters, validate_json_predicate_filters,
44
- };
45
- use crate::sql2::write_normalization::{InsertCell, SqlCell, UpdateAssignmentValues};
46
- use crate::transaction::types::{
47
- LogicalPrimaryKey, TransactionJson, TransactionWriteOperation, TransactionWriteOrigin,
48
- TransactionWriteRow,
49
- };
50
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
51
-
52
- use crate::sql2::filesystem_planner::{
53
- directory_descriptor_write_row, directory_path_resolvers_from_state_rows,
54
- filesystem_storage_scope_key, plan_recursive_directory_delete, DirectoryDescriptorWriteIntent,
55
- DirectoryPathResolver, FilesystemDeletePlan, FilesystemRowContext,
56
- };
57
- use crate::sql2::filesystem_visibility::VisibleFilesystem;
58
- use crate::sql2::result_metadata::json_field;
59
- use crate::sql2::{
60
- SqlWriteContext, WriteAccess, WriteContextBranchRefReader, WriteContextLiveStateReader,
61
- };
62
- use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
63
-
64
- const DIRECTORY_SCHEMA_KEY: &str = "lix_directory_descriptor";
65
- const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
66
-
67
- pub(super) async fn register_lix_directory_active_provider(
68
- session: &SessionContext,
69
- surface_name: &str,
70
- active_branch_id: &str,
71
- live_state: Arc<dyn LiveStateReader>,
72
- branch_ref: Arc<dyn BranchRefReader>,
73
- functions: FunctionProviderHandle,
74
- ) -> Result<(), LixError> {
75
- session
76
- .register_table(
77
- surface_name,
78
- Arc::new(LixDirectoryProvider::active_branch(
79
- active_branch_id,
80
- live_state,
81
- branch_ref,
82
- functions,
83
- )),
84
- )
85
- .map_err(datafusion_error_to_lix_error)?;
86
- Ok(())
87
- }
88
-
89
- pub(super) async fn register_lix_directory_by_branch_provider(
90
- session: &SessionContext,
91
- surface_name: &str,
92
- live_state: Arc<dyn LiveStateReader>,
93
- branch_ref: Arc<dyn BranchRefReader>,
94
- functions: FunctionProviderHandle,
95
- ) -> Result<(), LixError> {
96
- session
97
- .register_table(
98
- surface_name,
99
- Arc::new(LixDirectoryProvider::by_branch(
100
- live_state, branch_ref, functions,
101
- )),
102
- )
103
- .map_err(datafusion_error_to_lix_error)?;
104
- Ok(())
105
- }
106
-
107
- pub(super) async fn register_by_branch_write_provider(
108
- session: &SessionContext,
109
- surface_name: &str,
110
- write_ctx: SqlWriteContext,
111
- ) -> Result<(), LixError> {
112
- session
113
- .register_table(
114
- surface_name,
115
- Arc::new(LixDirectoryProvider::by_branch_with_write(write_ctx)),
116
- )
117
- .map_err(datafusion_error_to_lix_error)?;
118
- Ok(())
119
- }
120
-
121
- pub(super) async fn register_active_write_provider(
122
- session: &SessionContext,
123
- surface_name: &str,
124
- write_ctx: SqlWriteContext,
125
- ) -> Result<(), LixError> {
126
- session
127
- .register_table(
128
- surface_name,
129
- Arc::new(LixDirectoryProvider::active_branch_with_write(write_ctx)),
130
- )
131
- .map_err(datafusion_error_to_lix_error)?;
132
- Ok(())
133
- }
134
-
135
- pub(crate) struct LixDirectoryProvider {
136
- schema: SchemaRef,
137
- live_state: Arc<dyn LiveStateReader>,
138
- branch_ref: Arc<dyn BranchRefReader>,
139
- write_access: WriteAccess,
140
- functions: FunctionProviderHandle,
141
- branch_binding: BranchBinding,
142
- }
143
-
144
- impl std::fmt::Debug for LixDirectoryProvider {
145
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146
- f.debug_struct("LixDirectoryProvider").finish()
147
- }
148
- }
149
-
150
- impl LixDirectoryProvider {
151
- fn active_branch(
152
- active_branch_id: impl Into<String>,
153
- live_state: Arc<dyn LiveStateReader>,
154
- branch_ref: Arc<dyn BranchRefReader>,
155
- functions: FunctionProviderHandle,
156
- ) -> Self {
157
- Self {
158
- schema: lix_directory_schema(),
159
- live_state,
160
- branch_ref,
161
- write_access: WriteAccess::read_only(),
162
- functions,
163
- branch_binding: BranchBinding::active(active_branch_id),
164
- }
165
- }
166
-
167
- fn active_branch_with_write(write_ctx: SqlWriteContext) -> Self {
168
- let active_branch_id = write_ctx.active_branch_id();
169
- let functions = write_ctx.functions();
170
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
171
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
172
- Self {
173
- schema: lix_directory_schema(),
174
- live_state,
175
- branch_ref,
176
- write_access: WriteAccess::write(write_ctx),
177
- functions,
178
- branch_binding: BranchBinding::active(active_branch_id),
179
- }
180
- }
181
-
182
- fn by_branch(
183
- live_state: Arc<dyn LiveStateReader>,
184
- branch_ref: Arc<dyn BranchRefReader>,
185
- functions: FunctionProviderHandle,
186
- ) -> Self {
187
- Self {
188
- schema: lix_directory_by_branch_schema(),
189
- live_state,
190
- branch_ref,
191
- write_access: WriteAccess::read_only(),
192
- functions,
193
- branch_binding: BranchBinding::explicit(),
194
- }
195
- }
196
-
197
- fn by_branch_with_write(write_ctx: SqlWriteContext) -> Self {
198
- let functions = write_ctx.functions();
199
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
200
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
201
- Self {
202
- schema: lix_directory_by_branch_schema(),
203
- live_state,
204
- branch_ref,
205
- write_access: WriteAccess::write(write_ctx),
206
- functions,
207
- branch_binding: BranchBinding::explicit(),
208
- }
209
- }
210
- }
211
-
212
- #[async_trait]
213
- impl TableProvider for LixDirectoryProvider {
214
- fn as_any(&self) -> &dyn Any {
215
- self
216
- }
217
-
218
- fn schema(&self) -> SchemaRef {
219
- Arc::clone(&self.schema)
220
- }
221
-
222
- fn table_type(&self) -> TableType {
223
- TableType::Base
224
- }
225
-
226
- fn supports_filters_pushdown(
227
- &self,
228
- filters: &[&Expr],
229
- ) -> Result<Vec<TableProviderFilterPushDown>> {
230
- Ok(filters
231
- .iter()
232
- .map(|_| TableProviderFilterPushDown::Exact)
233
- .collect())
234
- }
235
-
236
- async fn scan(
237
- &self,
238
- _state: &dyn Session,
239
- projection: Option<&Vec<usize>>,
240
- filters: &[Expr],
241
- limit: Option<usize>,
242
- ) -> Result<Arc<dyn ExecutionPlan>> {
243
- let projected_schema = projected_schema(&self.schema, projection)?;
244
- let scan_limit = if filters.is_empty() { limit } else { None };
245
- let mut request = lix_directory_scan_request(
246
- self.branch_binding.active_branch_id(),
247
- Some(projected_schema.as_ref()),
248
- scan_limit,
249
- );
250
- request.filter.branch_ids = resolve_provider_branch_ids(
251
- self.branch_ref.as_ref(),
252
- &self.branch_binding,
253
- request.filter.branch_ids,
254
- )
255
- .await
256
- .map_err(lix_error_to_datafusion_error)?;
257
- let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::Directory)?;
258
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
259
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
260
- let physical_filters = filters
261
- .iter()
262
- .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
263
- .collect::<Result<Vec<_>>>()?;
264
- Ok(Arc::new(LixDirectoryScanExec::new(
265
- Arc::clone(&self.live_state),
266
- Arc::clone(&self.schema),
267
- projected_schema,
268
- projection.cloned(),
269
- request,
270
- physical_filters,
271
- limit,
272
- )))
273
- }
274
-
275
- async fn insert_into(
276
- &self,
277
- _state: &dyn Session,
278
- input: Arc<dyn ExecutionPlan>,
279
- insert_op: InsertOp,
280
- ) -> Result<Arc<dyn ExecutionPlan>> {
281
- if insert_op != InsertOp::Append {
282
- return not_impl_err!("{insert_op} not implemented for lix_directory yet");
283
- }
284
- let write_ctx = self
285
- .write_access
286
- .require_write("INSERT into lix_directory")?;
287
- self.schema
288
- .logically_equivalent_names_and_types(&input.schema())?;
289
- let sink = LixDirectoryInsertSink::new(
290
- write_ctx,
291
- self.functions.clone(),
292
- self.branch_binding.clone(),
293
- );
294
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
295
- }
296
-
297
- async fn delete_from(
298
- &self,
299
- state: &dyn Session,
300
- filters: Vec<Expr>,
301
- ) -> Result<Arc<dyn ExecutionPlan>> {
302
- let write_ctx = self
303
- .write_access
304
- .require_write("DELETE FROM lix_directory")?;
305
- let filters =
306
- canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
307
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
308
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
309
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
310
- let physical_filters = filters
311
- .iter()
312
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
313
- .collect::<Result<Vec<_>>>()?;
314
- let mut request =
315
- lix_directory_scan_request(self.branch_binding.active_branch_id(), None, None);
316
- request.filter.branch_ids = explicit_branch_ids_from_dml_filters(&filters);
317
- request.filter.branch_ids = resolve_provider_branch_ids(
318
- self.branch_ref.as_ref(),
319
- &self.branch_binding,
320
- request.filter.branch_ids,
321
- )
322
- .await
323
- .map_err(lix_error_to_datafusion_error)?;
324
- Ok(Arc::new(LixDirectoryDeleteExec::new(
325
- write_ctx,
326
- Arc::clone(&self.schema),
327
- self.branch_binding.clone(),
328
- request,
329
- physical_filters,
330
- )))
331
- }
332
-
333
- async fn update(
334
- &self,
335
- state: &dyn Session,
336
- assignments: Vec<(String, Expr)>,
337
- filters: Vec<Expr>,
338
- ) -> Result<Arc<dyn ExecutionPlan>> {
339
- let write_ctx = self.write_access.require_write("UPDATE lix_directory")?;
340
- validate_lix_directory_update_assignments(&self.schema, &assignments)?;
341
- let filters =
342
- canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::Directory)?;
343
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
344
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
345
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
346
- let physical_assignments = assignments
347
- .iter()
348
- .map(|(column_name, expr)| {
349
- Ok((
350
- column_name.clone(),
351
- create_physical_expr(expr, &df_schema, state.execution_props())?,
352
- ))
353
- })
354
- .collect::<Result<Vec<_>>>()?;
355
- let physical_filters = filters
356
- .iter()
357
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
358
- .collect::<Result<Vec<_>>>()?;
359
- let mut request =
360
- lix_directory_scan_request(self.branch_binding.active_branch_id(), None, None);
361
- request.filter.branch_ids = explicit_branch_ids_from_dml_filters(&filters);
362
- request.filter.branch_ids = resolve_provider_branch_ids(
363
- self.branch_ref.as_ref(),
364
- &self.branch_binding,
365
- request.filter.branch_ids,
366
- )
367
- .await
368
- .map_err(lix_error_to_datafusion_error)?;
369
- Ok(Arc::new(LixDirectoryUpdateExec::new(
370
- write_ctx,
371
- Arc::clone(&self.schema),
372
- self.branch_binding.clone(),
373
- request,
374
- physical_assignments,
375
- physical_filters,
376
- )))
377
- }
378
- }
379
-
380
- struct LixDirectoryInsertSink {
381
- write_ctx: SqlWriteContext,
382
- functions: FunctionProviderHandle,
383
- branch_binding: BranchBinding,
384
- surface_name: &'static str,
385
- }
386
-
387
- impl std::fmt::Debug for LixDirectoryInsertSink {
388
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
389
- f.debug_struct("LixDirectoryInsertSink").finish()
390
- }
391
- }
392
-
393
- impl LixDirectoryInsertSink {
394
- fn new(
395
- write_ctx: SqlWriteContext,
396
- functions: FunctionProviderHandle,
397
- branch_binding: BranchBinding,
398
- ) -> Self {
399
- let surface_name = lix_directory_surface_name(&branch_binding);
400
- Self {
401
- write_ctx,
402
- functions,
403
- branch_binding,
404
- surface_name,
405
- }
406
- }
407
- }
408
-
409
- impl DisplayAs for LixDirectoryInsertSink {
410
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
411
- match t {
412
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
413
- write!(f, "LixDirectoryInsertSink")
414
- }
415
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryInsertSink"),
416
- }
417
- }
418
- }
419
-
420
- #[async_trait]
421
- impl InsertSink for LixDirectoryInsertSink {
422
- async fn write_batches(
423
- &self,
424
- batches: Vec<RecordBatch>,
425
- _context: &Arc<TaskContext>,
426
- ) -> Result<u64> {
427
- let mut path_resolvers = None;
428
- let mut rows = Vec::new();
429
- let mut count = 0_u64;
430
- for batch in batches {
431
- if path_resolvers.is_none() {
432
- path_resolvers = Some(
433
- directory_path_resolvers_from_live_state(
434
- Arc::new(WriteContextLiveStateReader::new(self.write_ctx.clone())),
435
- self.branch_binding.active_branch_id(),
436
- )
437
- .await
438
- .map_err(lix_error_to_datafusion_error)?,
439
- );
440
- }
441
- count = count
442
- .checked_add(u64::try_from(batch.num_rows()).map_err(|_| {
443
- DataFusionError::Execution("lix_directory INSERT row count overflow".into())
444
- })?)
445
- .ok_or_else(|| {
446
- DataFusionError::Execution("lix_directory INSERT row count overflow".into())
447
- })?;
448
- if record_batch_has_non_null_column(&batch, "path")? {
449
- rows.extend(lix_directory_write_rows_from_batch_with_path_resolvers(
450
- &batch,
451
- self.branch_binding.active_branch_id(),
452
- self.surface_name,
453
- path_resolvers
454
- .as_mut()
455
- .expect("path resolver should be initialized"),
456
- &mut || self.functions.call_uuid_v7(),
457
- )?);
458
- } else {
459
- rows.extend(
460
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
461
- &batch,
462
- self.branch_binding.active_branch_id(),
463
- self.surface_name,
464
- true,
465
- path_resolvers.as_mut(),
466
- None,
467
- )?,
468
- );
469
- }
470
- }
471
-
472
- self.write_ctx
473
- .stage_write(TransactionWrite::Rows {
474
- mode: TransactionWriteMode::Insert,
475
- rows,
476
- })
477
- .await
478
- .map_err(lix_error_to_datafusion_error)?;
479
-
480
- Ok(count)
481
- }
482
- }
483
-
484
- fn lix_directory_surface_name(branch_binding: &BranchBinding) -> &'static str {
485
- match branch_binding {
486
- BranchBinding::Active { .. } => "lix_directory",
487
- BranchBinding::Explicit => "lix_directory_by_branch",
488
- }
489
- }
490
-
491
- #[allow(dead_code)]
492
- struct LixDirectoryDeleteExec {
493
- write_ctx: SqlWriteContext,
494
- table_schema: SchemaRef,
495
- branch_binding: BranchBinding,
496
- request: LiveStateScanRequest,
497
- filters: Vec<Arc<dyn PhysicalExpr>>,
498
- result_schema: SchemaRef,
499
- properties: Arc<PlanProperties>,
500
- }
501
-
502
- impl std::fmt::Debug for LixDirectoryDeleteExec {
503
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
504
- f.debug_struct("LixDirectoryDeleteExec").finish()
505
- }
506
- }
507
-
508
- impl LixDirectoryDeleteExec {
509
- fn new(
510
- write_ctx: SqlWriteContext,
511
- table_schema: SchemaRef,
512
- branch_binding: BranchBinding,
513
- request: LiveStateScanRequest,
514
- filters: Vec<Arc<dyn PhysicalExpr>>,
515
- ) -> Self {
516
- let result_schema = dml_count_schema();
517
- let properties = PlanProperties::new(
518
- EquivalenceProperties::new(Arc::clone(&result_schema)),
519
- Partitioning::UnknownPartitioning(1),
520
- EmissionType::Final,
521
- Boundedness::Bounded,
522
- );
523
- Self {
524
- write_ctx,
525
- table_schema,
526
- branch_binding,
527
- request,
528
- filters,
529
- result_schema,
530
- properties: Arc::new(properties),
531
- }
532
- }
533
- }
534
-
535
- impl DisplayAs for LixDirectoryDeleteExec {
536
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
537
- match t {
538
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
539
- write!(f, "LixDirectoryDeleteExec(filters={})", self.filters.len())
540
- }
541
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryDeleteExec"),
542
- }
543
- }
544
- }
545
-
546
- impl ExecutionPlan for LixDirectoryDeleteExec {
547
- fn name(&self) -> &str {
548
- "LixDirectoryDeleteExec"
549
- }
550
-
551
- fn as_any(&self) -> &dyn Any {
552
- self
553
- }
554
-
555
- fn properties(&self) -> &Arc<PlanProperties> {
556
- &self.properties
557
- }
558
-
559
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
560
- Vec::new()
561
- }
562
-
563
- fn with_new_children(
564
- self: Arc<Self>,
565
- children: Vec<Arc<dyn ExecutionPlan>>,
566
- ) -> Result<Arc<dyn ExecutionPlan>> {
567
- if !children.is_empty() {
568
- return Err(DataFusionError::Execution(
569
- "LixDirectoryDeleteExec does not accept children".to_string(),
570
- ));
571
- }
572
- Ok(self)
573
- }
574
-
575
- fn execute(
576
- &self,
577
- partition: usize,
578
- _context: Arc<TaskContext>,
579
- ) -> Result<SendableRecordBatchStream> {
580
- if partition != 0 {
581
- return Err(DataFusionError::Execution(format!(
582
- "LixDirectoryDeleteExec only exposes one partition, got {partition}"
583
- )));
584
- }
585
- let write_ctx = self.write_ctx.clone();
586
- let table_schema = Arc::clone(&self.table_schema);
587
- let branch_binding = self.branch_binding.clone();
588
- let request = self.request.clone();
589
- let filters = self.filters.clone();
590
- let result_schema = Arc::clone(&self.result_schema);
591
- let stream_schema = Arc::clone(&result_schema);
592
-
593
- let stream = stream::once(async move {
594
- let rows = write_ctx
595
- .scan_live_state(&request)
596
- .await
597
- .map_err(lix_error_to_datafusion_error)?;
598
- let source_batch = lix_directory_record_batch(&table_schema, rows)
599
- .map_err(lix_error_to_datafusion_error)?;
600
- let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
601
- let branch_ids =
602
- directory_branch_ids_from_batch(&matched_batch, branch_binding.active_branch_id())?;
603
- let mut visible_filesystems = BTreeMap::new();
604
- for branch_id in branch_ids {
605
- visible_filesystems.insert(
606
- branch_id.clone(),
607
- VisibleFilesystem::load(
608
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
609
- &branch_id,
610
- )
611
- .await
612
- .map_err(lix_error_to_datafusion_error)?,
613
- );
614
- }
615
- let (write_rows, count) = lix_directory_recursive_delete_rows_from_batch(
616
- &matched_batch,
617
- branch_binding.active_branch_id(),
618
- &visible_filesystems,
619
- )?;
620
-
621
- if count > 0 {
622
- write_ctx
623
- .stage_write(TransactionWrite::Rows {
624
- mode: TransactionWriteMode::Replace,
625
- rows: write_rows,
626
- })
627
- .await
628
- .map_err(lix_error_to_datafusion_error)?;
629
- }
630
-
631
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
632
- dml_count_batch(Arc::clone(&stream_schema), count)?,
633
- )]))
634
- })
635
- .try_flatten();
636
-
637
- Ok(Box::pin(RecordBatchStreamAdapter::new(
638
- result_schema,
639
- stream,
640
- )))
641
- }
642
- }
643
-
644
- #[allow(dead_code)]
645
- struct LixDirectoryUpdateExec {
646
- write_ctx: SqlWriteContext,
647
- table_schema: SchemaRef,
648
- branch_binding: BranchBinding,
649
- request: LiveStateScanRequest,
650
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
651
- filters: Vec<Arc<dyn PhysicalExpr>>,
652
- result_schema: SchemaRef,
653
- properties: Arc<PlanProperties>,
654
- }
655
-
656
- impl std::fmt::Debug for LixDirectoryUpdateExec {
657
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
658
- f.debug_struct("LixDirectoryUpdateExec").finish()
659
- }
660
- }
661
-
662
- impl LixDirectoryUpdateExec {
663
- fn new(
664
- write_ctx: SqlWriteContext,
665
- table_schema: SchemaRef,
666
- branch_binding: BranchBinding,
667
- request: LiveStateScanRequest,
668
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
669
- filters: Vec<Arc<dyn PhysicalExpr>>,
670
- ) -> Self {
671
- let result_schema = dml_count_schema();
672
- let properties = PlanProperties::new(
673
- EquivalenceProperties::new(Arc::clone(&result_schema)),
674
- Partitioning::UnknownPartitioning(1),
675
- EmissionType::Final,
676
- Boundedness::Bounded,
677
- );
678
- Self {
679
- write_ctx,
680
- table_schema,
681
- branch_binding,
682
- request,
683
- assignments,
684
- filters,
685
- result_schema,
686
- properties: Arc::new(properties),
687
- }
688
- }
689
- }
690
-
691
- impl DisplayAs for LixDirectoryUpdateExec {
692
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
693
- match t {
694
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
695
- write!(
696
- f,
697
- "LixDirectoryUpdateExec(assignments={}, filters={})",
698
- self.assignments.len(),
699
- self.filters.len()
700
- )
701
- }
702
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryUpdateExec"),
703
- }
704
- }
705
- }
706
-
707
- impl ExecutionPlan for LixDirectoryUpdateExec {
708
- fn name(&self) -> &str {
709
- "LixDirectoryUpdateExec"
710
- }
711
-
712
- fn as_any(&self) -> &dyn Any {
713
- self
714
- }
715
-
716
- fn properties(&self) -> &Arc<PlanProperties> {
717
- &self.properties
718
- }
719
-
720
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
721
- Vec::new()
722
- }
723
-
724
- fn with_new_children(
725
- self: Arc<Self>,
726
- children: Vec<Arc<dyn ExecutionPlan>>,
727
- ) -> Result<Arc<dyn ExecutionPlan>> {
728
- if !children.is_empty() {
729
- return Err(DataFusionError::Execution(
730
- "LixDirectoryUpdateExec does not accept children".to_string(),
731
- ));
732
- }
733
- Ok(self)
734
- }
735
-
736
- fn execute(
737
- &self,
738
- partition: usize,
739
- _context: Arc<TaskContext>,
740
- ) -> Result<SendableRecordBatchStream> {
741
- if partition != 0 {
742
- return Err(DataFusionError::Execution(format!(
743
- "LixDirectoryUpdateExec only exposes one partition, got {partition}"
744
- )));
745
- }
746
- let write_ctx = self.write_ctx.clone();
747
- let table_schema = Arc::clone(&self.table_schema);
748
- let branch_binding = self.branch_binding.clone();
749
- let request = self.request.clone();
750
- let assignments = self.assignments.clone();
751
- let filters = self.filters.clone();
752
- let result_schema = Arc::clone(&self.result_schema);
753
- let stream_schema = Arc::clone(&result_schema);
754
-
755
- let stream = stream::once(async move {
756
- let rows = write_ctx
757
- .scan_live_state(&request)
758
- .await
759
- .map_err(lix_error_to_datafusion_error)?;
760
- let source_batch = lix_directory_record_batch(&table_schema, rows)
761
- .map_err(lix_error_to_datafusion_error)?;
762
- let matched_batch = filter_lix_directory_batch(source_batch, &filters)?;
763
- let mut path_resolvers = directory_path_resolvers_from_live_state(
764
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
765
- branch_binding.active_branch_id(),
766
- )
767
- .await
768
- .map_err(lix_error_to_datafusion_error)?;
769
- let write_rows = lix_directory_update_write_rows_from_batch(
770
- &matched_batch,
771
- &assignments,
772
- branch_binding.active_branch_id(),
773
- &mut path_resolvers,
774
- )?;
775
- let count = u64::try_from(write_rows.len()).map_err(|_| {
776
- DataFusionError::Execution("lix_directory UPDATE row count overflow".into())
777
- })?;
778
-
779
- if count > 0 {
780
- write_ctx
781
- .stage_write(TransactionWrite::Rows {
782
- mode: TransactionWriteMode::Replace,
783
- rows: write_rows,
784
- })
785
- .await
786
- .map_err(lix_error_to_datafusion_error)?;
787
- }
788
-
789
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
790
- dml_count_batch(Arc::clone(&stream_schema), count)?,
791
- )]))
792
- })
793
- .try_flatten();
794
-
795
- Ok(Box::pin(RecordBatchStreamAdapter::new(
796
- result_schema,
797
- stream,
798
- )))
799
- }
800
- }
801
-
802
- struct LixDirectoryScanExec {
803
- live_state: Arc<dyn LiveStateReader>,
804
- batch_schema: SchemaRef,
805
- output_schema: SchemaRef,
806
- projection: Option<Vec<usize>>,
807
- request: LiveStateScanRequest,
808
- filters: Vec<Arc<dyn PhysicalExpr>>,
809
- limit: Option<usize>,
810
- properties: Arc<PlanProperties>,
811
- }
812
-
813
- impl std::fmt::Debug for LixDirectoryScanExec {
814
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
815
- f.debug_struct("LixDirectoryScanExec").finish()
816
- }
817
- }
818
-
819
- impl LixDirectoryScanExec {
820
- fn new(
821
- live_state: Arc<dyn LiveStateReader>,
822
- batch_schema: SchemaRef,
823
- output_schema: SchemaRef,
824
- projection: Option<Vec<usize>>,
825
- request: LiveStateScanRequest,
826
- filters: Vec<Arc<dyn PhysicalExpr>>,
827
- limit: Option<usize>,
828
- ) -> Self {
829
- let properties = PlanProperties::new(
830
- EquivalenceProperties::new(Arc::clone(&output_schema)),
831
- Partitioning::UnknownPartitioning(1),
832
- EmissionType::Incremental,
833
- Boundedness::Bounded,
834
- );
835
- Self {
836
- live_state,
837
- batch_schema,
838
- output_schema,
839
- projection,
840
- request,
841
- filters,
842
- limit,
843
- properties: Arc::new(properties),
844
- }
845
- }
846
- }
847
-
848
- impl DisplayAs for LixDirectoryScanExec {
849
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
850
- match t {
851
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
852
- write!(f, "LixDirectoryScanExec(limit={:?})", self.limit)
853
- }
854
- DisplayFormatType::TreeRender => write!(f, "LixDirectoryScanExec"),
855
- }
856
- }
857
- }
858
-
859
- impl ExecutionPlan for LixDirectoryScanExec {
860
- fn name(&self) -> &str {
861
- "LixDirectoryScanExec"
862
- }
863
-
864
- fn as_any(&self) -> &dyn Any {
865
- self
866
- }
867
-
868
- fn properties(&self) -> &Arc<PlanProperties> {
869
- &self.properties
870
- }
871
-
872
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
873
- Vec::new()
874
- }
875
-
876
- fn with_new_children(
877
- self: Arc<Self>,
878
- children: Vec<Arc<dyn ExecutionPlan>>,
879
- ) -> Result<Arc<dyn ExecutionPlan>> {
880
- if !children.is_empty() {
881
- return Err(DataFusionError::Execution(
882
- "LixDirectoryScanExec does not accept children".to_string(),
883
- ));
884
- }
885
- Ok(self)
886
- }
887
-
888
- fn execute(
889
- &self,
890
- partition: usize,
891
- _context: Arc<TaskContext>,
892
- ) -> Result<SendableRecordBatchStream> {
893
- if partition != 0 {
894
- return Err(DataFusionError::Execution(format!(
895
- "LixDirectoryScanExec only supports partition 0, got {partition}"
896
- )));
897
- }
898
-
899
- let live_state = Arc::clone(&self.live_state);
900
- let request = self.request.clone();
901
- let filters = self.filters.clone();
902
- let limit = self.limit;
903
- let output_schema = Arc::clone(&self.output_schema);
904
- let batch_schema = Arc::clone(&self.batch_schema);
905
- let projection = self.projection.clone();
906
- let fut = async move {
907
- let rows = live_state.scan_rows(&request).await.map_err(|error| {
908
- DataFusionError::Execution(format!("sql2 lix_directory scan failed: {error}"))
909
- })?;
910
- let batch = lix_directory_record_batch(&batch_schema, rows).map_err(|error| {
911
- DataFusionError::Execution(format!(
912
- "sql2 lix_directory batch build failed: {error}"
913
- ))
914
- })?;
915
- let filtered = filter_lix_directory_batch(batch, &filters)?;
916
- let projected = match projection {
917
- Some(indices) => filtered.project(&indices).map_err(DataFusionError::from),
918
- None => Ok(filtered),
919
- }?;
920
- match limit {
921
- Some(limit) => Ok(projected.slice(0, limit.min(projected.num_rows()))),
922
- None => Ok(projected),
923
- }
924
- };
925
-
926
- Ok(Box::pin(RecordBatchStreamAdapter::new(
927
- output_schema,
928
- stream::once(fut).map_ok(|batch| batch),
929
- )))
930
- }
931
- }
932
-
933
- #[derive(Debug, Clone)]
934
- struct DirectoryDescriptorRecord {
935
- id: String,
936
- parent_id: Option<String>,
937
- name: String,
938
- hidden: bool,
939
- live: MaterializedLiveStateRow,
940
- }
941
-
942
- #[derive(Debug, Deserialize)]
943
- struct DirectoryDescriptorSnapshot {
944
- id: String,
945
- parent_id: Option<String>,
946
- name: String,
947
- hidden: Option<bool>,
948
- }
949
-
950
- #[cfg(test)]
951
- fn lix_directory_write_rows_from_batch(
952
- batch: &RecordBatch,
953
- branch_binding: Option<&str>,
954
- ) -> Result<Vec<TransactionWriteRow>> {
955
- lix_directory_write_rows_from_batch_with_options(batch, branch_binding, "lix_directory", true)
956
- }
957
-
958
- fn lix_directory_write_rows_from_batch_with_path_resolvers(
959
- batch: &RecordBatch,
960
- branch_binding: Option<&str>,
961
- surface_name: &str,
962
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
963
- generate_directory_id: &mut dyn FnMut() -> String,
964
- ) -> Result<Vec<TransactionWriteRow>> {
965
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
966
- batch,
967
- branch_binding,
968
- surface_name,
969
- true,
970
- Some(path_resolvers),
971
- Some(generate_directory_id),
972
- )
973
- }
974
-
975
- fn lix_directory_update_write_rows_from_batch(
976
- batch: &RecordBatch,
977
- assignments: &[(String, Arc<dyn PhysicalExpr>)],
978
- branch_binding: Option<&str>,
979
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
980
- ) -> Result<Vec<TransactionWriteRow>> {
981
- let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
982
- let mut rows = Vec::new();
983
- for row_index in 0..batch.num_rows() {
984
- let id = optional_string_value(batch, row_index, "id")?;
985
- let context = directory_row_context_from_update(
986
- batch,
987
- &assignment_values,
988
- row_index,
989
- branch_binding,
990
- )?;
991
- let parent_id =
992
- update_optional_string_value(batch, &assignment_values, row_index, "parent_id")?;
993
- let name = update_required_string_value(batch, &assignment_values, row_index, "name")?;
994
- if let Some(directory_id) = id.as_ref() {
995
- let resolver = path_resolvers
996
- .entry(directory_path_resolver_key(&context))
997
- .or_insert_with(DirectoryPathResolver::default);
998
- resolver
999
- .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
1000
- .map_err(lix_error_to_datafusion_error)?;
1001
- }
1002
- rows.push(directory_descriptor_write_row(
1003
- DirectoryDescriptorWriteIntent {
1004
- id,
1005
- parent_id,
1006
- name,
1007
- hidden: update_optional_bool_value(batch, &assignment_values, row_index, "hidden")?,
1008
- context,
1009
- },
1010
- ));
1011
- }
1012
- Ok(rows)
1013
- }
1014
-
1015
- fn directory_branch_ids_from_batch(
1016
- batch: &RecordBatch,
1017
- branch_binding: Option<&str>,
1018
- ) -> Result<BTreeSet<String>> {
1019
- let mut branch_ids = BTreeSet::new();
1020
- for row_index in 0..batch.num_rows() {
1021
- branch_ids
1022
- .insert(directory_row_context_from_batch(batch, row_index, branch_binding)?.branch_id);
1023
- }
1024
- Ok(branch_ids)
1025
- }
1026
-
1027
- fn lix_directory_recursive_delete_rows_from_batch(
1028
- batch: &RecordBatch,
1029
- branch_binding: Option<&str>,
1030
- visible_filesystems: &BTreeMap<String, VisibleFilesystem>,
1031
- ) -> Result<(Vec<TransactionWriteRow>, u64)> {
1032
- let mut rows = Vec::new();
1033
- let mut seen = BTreeSet::new();
1034
- let mut count = 0u64;
1035
- for row_index in 0..batch.num_rows() {
1036
- let directory_id = required_string_value(batch, row_index, "id")?;
1037
- let context = directory_row_context_from_batch(batch, row_index, branch_binding)?;
1038
- let visible_filesystem = visible_filesystems.get(&context.branch_id).ok_or_else(|| {
1039
- DataFusionError::Execution(format!(
1040
- "DELETE FROM lix_directory missing visible filesystem for branch '{}'",
1041
- context.branch_id
1042
- ))
1043
- })?;
1044
- append_deduped_delete_plan(
1045
- &mut rows,
1046
- &mut seen,
1047
- plan_recursive_directory_delete(&directory_id, visible_filesystem, context),
1048
- &mut count,
1049
- );
1050
- }
1051
- Ok((rows, count))
1052
- }
1053
-
1054
- fn append_deduped_delete_plan(
1055
- rows: &mut Vec<TransactionWriteRow>,
1056
- seen: &mut BTreeSet<StateRowDedupeKey>,
1057
- plan: FilesystemDeletePlan,
1058
- count: &mut u64,
1059
- ) {
1060
- for row in plan.rows {
1061
- if seen.insert(StateRowDedupeKey::from(&row)) {
1062
- if is_user_visible_filesystem_delete_row(&row) {
1063
- *count += 1;
1064
- }
1065
- rows.push(row);
1066
- }
1067
- }
1068
- }
1069
-
1070
- fn is_user_visible_filesystem_delete_row(row: &TransactionWriteRow) -> bool {
1071
- matches!(
1072
- row.schema_key.as_str(),
1073
- "lix_directory_descriptor" | "lix_file_descriptor"
1074
- )
1075
- }
1076
-
1077
- #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1078
- struct StateRowDedupeKey {
1079
- entity_pk: String,
1080
- schema_key: String,
1081
- file_id: Option<String>,
1082
- branch_id: String,
1083
- global: bool,
1084
- untracked: bool,
1085
- }
1086
-
1087
- impl From<&TransactionWriteRow> for StateRowDedupeKey {
1088
- fn from(row: &TransactionWriteRow) -> Self {
1089
- Self {
1090
- entity_pk: row
1091
- .entity_pk
1092
- .as_ref()
1093
- .expect("directory provider staged row should carry entity_pk")
1094
- .as_single_string_owned()
1095
- .expect("directory provider staged row entity primary key should project"),
1096
- schema_key: row.schema_key.clone(),
1097
- file_id: row.file_id.clone(),
1098
- branch_id: row.branch_id.clone(),
1099
- global: row.global,
1100
- untracked: row.untracked,
1101
- }
1102
- }
1103
- }
1104
-
1105
- #[cfg(test)]
1106
- fn lix_directory_write_rows_from_batch_with_options(
1107
- batch: &RecordBatch,
1108
- branch_binding: Option<&str>,
1109
- surface_name: &str,
1110
- reject_read_only_fields: bool,
1111
- ) -> Result<Vec<TransactionWriteRow>> {
1112
- lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1113
- batch,
1114
- branch_binding,
1115
- surface_name,
1116
- reject_read_only_fields,
1117
- None,
1118
- None,
1119
- )
1120
- }
1121
-
1122
- fn lix_directory_write_rows_from_batch_with_options_and_path_resolvers(
1123
- batch: &RecordBatch,
1124
- branch_binding: Option<&str>,
1125
- surface_name: &str,
1126
- reject_read_only_fields: bool,
1127
- mut path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1128
- mut generate_directory_id: Option<&mut dyn FnMut() -> String>,
1129
- ) -> Result<Vec<TransactionWriteRow>> {
1130
- let mut rows = Vec::new();
1131
- for row_index in 0..batch.num_rows() {
1132
- if reject_read_only_fields {
1133
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_entity_pk")?;
1134
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_schema_key")?;
1135
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_change_id")?;
1136
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_created_at")?;
1137
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_updated_at")?;
1138
- reject_read_only_lix_directory_insert_field(batch, row_index, "lixcol_commit_id")?;
1139
- }
1140
-
1141
- let path = optional_string_value(batch, row_index, "path")?;
1142
- let id = optional_string_value(batch, row_index, "id")?;
1143
- let hidden = optional_bool_value(batch, row_index, "hidden")?;
1144
- let context = directory_row_context_from_batch(batch, row_index, branch_binding)?;
1145
-
1146
- if let Some(path) = path.filter(|_| reject_read_only_fields) {
1147
- reject_read_only_lix_directory_insert_field(batch, row_index, "parent_id")?;
1148
- reject_read_only_lix_directory_insert_field(batch, row_index, "name")?;
1149
-
1150
- let Some(path_resolvers) = path_resolvers.as_deref_mut() else {
1151
- return Err(DataFusionError::Execution(
1152
- "INSERT into lix_directory with path requires directory path resolver"
1153
- .to_string(),
1154
- ));
1155
- };
1156
- let resolver = path_resolvers
1157
- .entry(directory_path_resolver_key(&context))
1158
- .or_insert_with(DirectoryPathResolver::default);
1159
- let Some(generate_directory_id) = generate_directory_id.as_deref_mut() else {
1160
- return Err(DataFusionError::Execution(
1161
- "INSERT into lix_directory with path requires directory id generator"
1162
- .to_string(),
1163
- ));
1164
- };
1165
- let directory_id = id.unwrap_or_else(|| generate_directory_id());
1166
- let mut planned_rows = resolver
1167
- .create_directory_path_with_leaf_id(
1168
- &path,
1169
- Some(directory_id.clone()),
1170
- context,
1171
- hidden.unwrap_or(false),
1172
- generate_directory_id,
1173
- )
1174
- .map_err(lix_error_to_datafusion_error)?;
1175
- attach_lix_directory_insert_origin(&mut planned_rows, surface_name, &directory_id);
1176
- rows.extend(planned_rows);
1177
- continue;
1178
- }
1179
-
1180
- let parent_id = optional_string_value(batch, row_index, "parent_id")?;
1181
- let name = required_string_value(batch, row_index, "name")?;
1182
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1183
- if let Some(directory_id) = id.as_ref() {
1184
- let resolver = path_resolvers
1185
- .entry(directory_path_resolver_key(&context))
1186
- .or_insert_with(DirectoryPathResolver::default);
1187
- resolver
1188
- .reserve_directory(parent_id.clone(), name.clone(), directory_id.clone())
1189
- .map_err(lix_error_to_datafusion_error)?;
1190
- }
1191
- }
1192
- let mut row = directory_descriptor_write_row(DirectoryDescriptorWriteIntent {
1193
- id: id.clone(),
1194
- parent_id,
1195
- name,
1196
- hidden,
1197
- context,
1198
- });
1199
- if let Some(directory_id) = id.as_ref() {
1200
- row.origin = Some(lix_directory_insert_origin(surface_name, directory_id));
1201
- }
1202
- rows.push(row);
1203
- }
1204
- Ok(rows)
1205
- }
1206
-
1207
- fn attach_lix_directory_insert_origin(
1208
- rows: &mut [TransactionWriteRow],
1209
- surface_name: &str,
1210
- directory_id: &str,
1211
- ) {
1212
- let origin = lix_directory_insert_origin(surface_name, directory_id);
1213
- for row in rows {
1214
- if row.schema_key != DIRECTORY_SCHEMA_KEY {
1215
- continue;
1216
- }
1217
- let Some(entity_pk) = row
1218
- .entity_pk
1219
- .as_ref()
1220
- .and_then(|entity_pk| entity_pk.as_single_string_owned().ok())
1221
- else {
1222
- continue;
1223
- };
1224
- if entity_pk == directory_id {
1225
- row.origin = Some(origin.clone());
1226
- }
1227
- }
1228
- }
1229
-
1230
- fn lix_directory_insert_origin(surface_name: &str, directory_id: &str) -> TransactionWriteOrigin {
1231
- TransactionWriteOrigin {
1232
- surface: surface_name.to_string(),
1233
- operation: TransactionWriteOperation::Insert,
1234
- primary_key: Some(LogicalPrimaryKey {
1235
- columns: vec!["id".to_string()],
1236
- values: vec![directory_id.to_string()],
1237
- }),
1238
- }
1239
- }
1240
-
1241
- fn directory_row_context_from_batch(
1242
- batch: &RecordBatch,
1243
- row_index: usize,
1244
- branch_binding: Option<&str>,
1245
- ) -> Result<FilesystemRowContext> {
1246
- let scope = resolve_write_branch_scope(
1247
- optional_bool_value(batch, row_index, "lixcol_global")?,
1248
- optional_string_value(batch, row_index, "lixcol_branch_id")?,
1249
- branch_binding,
1250
- "INSERT into lix_directory_by_branch",
1251
- "lix_directory",
1252
- )?;
1253
-
1254
- Ok(FilesystemRowContext {
1255
- branch_id: scope.branch_id,
1256
- global: scope.global,
1257
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1258
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1259
- metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", "lix_directory")?,
1260
- })
1261
- }
1262
-
1263
- fn directory_row_context_from_update(
1264
- batch: &RecordBatch,
1265
- assignment_values: &UpdateAssignmentValues,
1266
- row_index: usize,
1267
- branch_binding: Option<&str>,
1268
- ) -> Result<FilesystemRowContext> {
1269
- let scope = resolve_write_branch_scope(
1270
- optional_bool_value(batch, row_index, "lixcol_global")?,
1271
- optional_string_value(batch, row_index, "lixcol_branch_id")?,
1272
- branch_binding,
1273
- "UPDATE into lix_directory_by_branch",
1274
- "lix_directory",
1275
- )?;
1276
-
1277
- Ok(FilesystemRowContext {
1278
- branch_id: scope.branch_id,
1279
- global: scope.global,
1280
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1281
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1282
- metadata: update_optional_metadata_value(
1283
- batch,
1284
- assignment_values,
1285
- row_index,
1286
- "lixcol_metadata",
1287
- "lix_directory",
1288
- )?,
1289
- })
1290
- }
1291
-
1292
- fn directory_path_resolver_key(context: &FilesystemRowContext) -> String {
1293
- filesystem_storage_scope_key(
1294
- &context.branch_id,
1295
- context.global,
1296
- context.untracked,
1297
- context.file_id.as_deref(),
1298
- )
1299
- }
1300
-
1301
- async fn directory_path_resolvers_from_live_state(
1302
- live_state: Arc<dyn LiveStateReader>,
1303
- branch_binding: Option<&str>,
1304
- ) -> std::result::Result<BTreeMap<String, DirectoryPathResolver>, LixError> {
1305
- let rows = live_state
1306
- .scan_rows(&LiveStateScanRequest {
1307
- filter: LiveStateFilter {
1308
- schema_keys: vec![
1309
- DIRECTORY_SCHEMA_KEY.to_string(),
1310
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1311
- ],
1312
- branch_ids: branch_binding
1313
- .map(|branch_id| vec![branch_id.to_string()])
1314
- .unwrap_or_default(),
1315
- ..Default::default()
1316
- },
1317
- ..Default::default()
1318
- })
1319
- .await?;
1320
- let mut resolvers = directory_path_resolvers_from_state_rows(rows)?;
1321
- if let Some(branch_id) = branch_binding {
1322
- let key = filesystem_storage_scope_key(branch_id, false, false, None);
1323
- resolvers
1324
- .entry(key)
1325
- .or_insert_with(DirectoryPathResolver::default);
1326
- }
1327
- Ok(resolvers)
1328
- }
1329
-
1330
- fn lix_directory_record_batch(
1331
- schema: &SchemaRef,
1332
- rows: Vec<MaterializedLiveStateRow>,
1333
- ) -> Result<RecordBatch, LixError> {
1334
- let mut directory_rows = Vec::<DirectoryDescriptorRecord>::new();
1335
-
1336
- for row in rows {
1337
- if row.schema_key != DIRECTORY_SCHEMA_KEY {
1338
- continue;
1339
- }
1340
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1341
- continue;
1342
- };
1343
- let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
1344
- .map_err(|error| {
1345
- LixError::new(
1346
- "LIX_ERROR_UNKNOWN",
1347
- format!("invalid lix_directory_descriptor snapshot JSON: {error}"),
1348
- )
1349
- })?;
1350
- directory_rows.push(DirectoryDescriptorRecord {
1351
- id: snapshot.id,
1352
- parent_id: snapshot.parent_id,
1353
- name: snapshot.name,
1354
- hidden: snapshot.hidden.unwrap_or(false),
1355
- live: row,
1356
- });
1357
- }
1358
-
1359
- let directory_paths = derive_directory_paths(&directory_rows)?;
1360
- let mut ids = Vec::new();
1361
- let mut paths = Vec::new();
1362
- let mut parent_ids = Vec::new();
1363
- let mut names = Vec::new();
1364
- let mut hiddens = Vec::new();
1365
- let mut entity_pks = Vec::new();
1366
- let mut schema_keys = Vec::new();
1367
- let mut file_ids = Vec::new();
1368
- let mut globals = Vec::new();
1369
- let mut change_ids = Vec::new();
1370
- let mut created_ats = Vec::new();
1371
- let mut updated_ats = Vec::new();
1372
- let mut commit_ids = Vec::new();
1373
- let mut untracked_values = Vec::new();
1374
- let mut metadata_values = Vec::new();
1375
- let mut branch_ids = Vec::new();
1376
-
1377
- for directory in directory_rows {
1378
- ids.push(Some(directory.id.clone()));
1379
- paths.push(
1380
- directory_paths
1381
- .get(&(directory.live.branch_id.clone(), directory.id.clone()))
1382
- .cloned(),
1383
- );
1384
- parent_ids.push(directory.parent_id);
1385
- names.push(Some(directory.name));
1386
- hiddens.push(Some(directory.hidden));
1387
- entity_pks.push(Some(directory.live.entity_pk.as_json_array_text()?));
1388
- schema_keys.push(Some(directory.live.schema_key));
1389
- file_ids.push(directory.live.file_id);
1390
- globals.push(Some(directory.live.global));
1391
- change_ids.push(directory.live.change_id);
1392
- created_ats.push(directory.live.created_at);
1393
- updated_ats.push(directory.live.updated_at);
1394
- commit_ids.push(directory.live.commit_id);
1395
- untracked_values.push(Some(directory.live.untracked));
1396
- metadata_values.push(directory.live.metadata.as_ref().map(serialize_row_metadata));
1397
- branch_ids.push(Some(directory.live.branch_id));
1398
- }
1399
-
1400
- let mut columns = Vec::<ArrayRef>::with_capacity(schema.fields().len());
1401
- for field in schema.fields() {
1402
- let array: ArrayRef = match field.name().as_str() {
1403
- "id" => Arc::new(StringArray::from(ids.clone())),
1404
- "path" => Arc::new(StringArray::from(paths.clone())),
1405
- "parent_id" => Arc::new(StringArray::from(parent_ids.clone())),
1406
- "name" => Arc::new(StringArray::from(names.clone())),
1407
- "hidden" => Arc::new(BooleanArray::from(hiddens.clone())),
1408
- "lixcol_entity_pk" => Arc::new(StringArray::from(entity_pks.clone())),
1409
- "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1410
- "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1411
- "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1412
- "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1413
- "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
1414
- "lixcol_updated_at" => Arc::new(StringArray::from(updated_ats.clone())),
1415
- "lixcol_commit_id" => Arc::new(StringArray::from(commit_ids.clone())),
1416
- "lixcol_untracked" => Arc::new(BooleanArray::from(untracked_values.clone())),
1417
- "lixcol_metadata" => Arc::new(StringArray::from(metadata_values.clone())),
1418
- "lixcol_branch_id" => Arc::new(StringArray::from(branch_ids.clone())),
1419
- other => {
1420
- return Err(LixError::new(
1421
- "LIX_ERROR_UNKNOWN",
1422
- format!(
1423
- "sql2 lix_directory provider does not support projected column '{other}'"
1424
- ),
1425
- ))
1426
- }
1427
- };
1428
- columns.push(array);
1429
- }
1430
-
1431
- let options = RecordBatchOptions::new().with_row_count(Some(ids.len()));
1432
- RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
1433
- LixError::new(
1434
- "LIX_ERROR_UNKNOWN",
1435
- format!("sql2 failed to build lix_directory record batch: {error}"),
1436
- )
1437
- })
1438
- }
1439
-
1440
- fn derive_directory_paths(
1441
- rows: &[DirectoryDescriptorRecord],
1442
- ) -> std::result::Result<BTreeMap<(String, String), String>, LixError> {
1443
- let mut by_branch = BTreeMap::<String, BTreeMap<String, &DirectoryDescriptorRecord>>::new();
1444
- for row in rows {
1445
- by_branch
1446
- .entry(row.live.branch_id.clone())
1447
- .or_default()
1448
- .insert(row.id.clone(), row);
1449
- }
1450
-
1451
- let mut paths = BTreeMap::<(String, String), String>::new();
1452
- for (branch_id, records) in by_branch {
1453
- for directory_id in records.keys() {
1454
- derive_directory_path_for(
1455
- &branch_id,
1456
- directory_id,
1457
- &records,
1458
- &mut paths,
1459
- &mut BTreeSet::new(),
1460
- )?;
1461
- }
1462
- }
1463
- Ok(paths)
1464
- }
1465
-
1466
- fn derive_directory_path_for(
1467
- branch_id: &str,
1468
- directory_id: &str,
1469
- records: &BTreeMap<String, &DirectoryDescriptorRecord>,
1470
- paths: &mut BTreeMap<(String, String), String>,
1471
- visiting: &mut BTreeSet<String>,
1472
- ) -> std::result::Result<Option<String>, LixError> {
1473
- if let Some(path) = paths.get(&(branch_id.to_string(), directory_id.to_string())) {
1474
- return Ok(Some(path.clone()));
1475
- }
1476
- if !visiting.insert(directory_id.to_string()) {
1477
- return Err(directory_parent_cycle_error(branch_id, directory_id));
1478
- }
1479
- let Some(row) = records.get(directory_id) else {
1480
- visiting.remove(directory_id);
1481
- return Ok(None);
1482
- };
1483
- let path = match row.parent_id.as_deref() {
1484
- Some(parent_id) => {
1485
- let Some(parent_path) =
1486
- derive_directory_path_for(branch_id, parent_id, records, paths, visiting)?
1487
- else {
1488
- visiting.remove(directory_id);
1489
- return Ok(None);
1490
- };
1491
- format!("{parent_path}{}/", row.name)
1492
- }
1493
- None => format!("/{}/", row.name),
1494
- };
1495
- visiting.remove(directory_id);
1496
- paths.insert(
1497
- (branch_id.to_string(), directory_id.to_string()),
1498
- path.clone(),
1499
- );
1500
- Ok(Some(path))
1501
- }
1502
-
1503
- fn directory_parent_cycle_error(branch_id: &str, directory_id: &str) -> LixError {
1504
- LixError::new(
1505
- LixError::CODE_CONSTRAINT_VIOLATION,
1506
- format!(
1507
- "lix_directory_descriptor parent_id cycle in branch '{branch_id}' while resolving directory '{directory_id}'"
1508
- ),
1509
- )
1510
- }
1511
-
1512
- fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1513
- let fields = match projection {
1514
- Some(indices) => indices
1515
- .iter()
1516
- .map(|index| base_schema.field(*index).as_ref().clone())
1517
- .collect::<Vec<_>>(),
1518
- None => base_schema
1519
- .fields()
1520
- .iter()
1521
- .map(|field| field.as_ref().clone())
1522
- .collect::<Vec<_>>(),
1523
- };
1524
- Ok(Arc::new(Schema::new(fields)))
1525
- }
1526
-
1527
- fn lix_directory_scan_request(
1528
- branch_binding: Option<&str>,
1529
- projected_schema: Option<&Schema>,
1530
- limit: Option<usize>,
1531
- ) -> LiveStateScanRequest {
1532
- LiveStateScanRequest {
1533
- filter: LiveStateFilter {
1534
- schema_keys: vec![DIRECTORY_SCHEMA_KEY.to_string()],
1535
- branch_ids: branch_binding
1536
- .map(|branch_id| vec![branch_id.to_string()])
1537
- .unwrap_or_default(),
1538
- ..LiveStateFilter::default()
1539
- },
1540
- projection: lix_directory_live_state_projection(projected_schema),
1541
- limit,
1542
- }
1543
- }
1544
-
1545
- fn lix_directory_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
1546
- let Some(schema) = projected_schema else {
1547
- return LiveStateProjection::default();
1548
- };
1549
- let mut columns = Vec::new();
1550
- let needs_snapshot = schema
1551
- .fields()
1552
- .iter()
1553
- .any(|field| matches!(field.name().as_str(), "parent_id" | "name" | "hidden"));
1554
- if needs_snapshot {
1555
- columns.push("snapshot_content".to_string());
1556
- }
1557
- if schema
1558
- .fields()
1559
- .iter()
1560
- .any(|field| field.name() == "lixcol_metadata")
1561
- {
1562
- columns.push("metadata".to_string());
1563
- }
1564
- LiveStateProjection { columns }
1565
- }
1566
-
1567
- fn validate_lix_directory_update_assignments(
1568
- schema: &SchemaRef,
1569
- assignments: &[(String, Expr)],
1570
- ) -> Result<()> {
1571
- for (column_name, _) in assignments {
1572
- schema.field_with_name(column_name).map_err(|_| {
1573
- DataFusionError::Plan(format!(
1574
- "UPDATE lix_directory failed: column '{column_name}' does not exist"
1575
- ))
1576
- })?;
1577
- if !matches!(
1578
- column_name.as_str(),
1579
- "parent_id" | "name" | "hidden" | "lixcol_metadata"
1580
- ) {
1581
- return Err(DataFusionError::Execution(format!(
1582
- "UPDATE lix_directory cannot stage read-only column '{column_name}'"
1583
- )));
1584
- }
1585
- }
1586
- Ok(())
1587
- }
1588
-
1589
- fn filter_lix_directory_batch(
1590
- batch: RecordBatch,
1591
- filters: &[Arc<dyn PhysicalExpr>],
1592
- ) -> Result<RecordBatch> {
1593
- let Some(mask) = evaluate_lix_directory_filters(&batch, filters)? else {
1594
- return Ok(batch);
1595
- };
1596
- Ok(filter_record_batch(&batch, &mask)?)
1597
- }
1598
-
1599
- fn evaluate_lix_directory_filters(
1600
- batch: &RecordBatch,
1601
- filters: &[Arc<dyn PhysicalExpr>],
1602
- ) -> Result<Option<BooleanArray>> {
1603
- if filters.is_empty() {
1604
- return Ok(None);
1605
- }
1606
-
1607
- let mut combined_mask: Option<BooleanArray> = None;
1608
- for filter in filters {
1609
- let result = filter.evaluate(batch)?;
1610
- let array = result.into_array(batch.num_rows())?;
1611
- let bool_array = array
1612
- .as_any()
1613
- .downcast_ref::<BooleanArray>()
1614
- .ok_or_else(|| {
1615
- DataFusionError::Execution("lix_directory filter was not boolean".to_string())
1616
- })?;
1617
- let normalized = bool_array
1618
- .iter()
1619
- .map(|value| Some(value == Some(true)))
1620
- .collect::<BooleanArray>();
1621
- combined_mask = Some(match combined_mask {
1622
- Some(existing) => and(&existing, &normalized)?,
1623
- None => normalized,
1624
- });
1625
- }
1626
- Ok(combined_mask)
1627
- }
1628
-
1629
- fn dml_count_schema() -> SchemaRef {
1630
- Arc::new(Schema::new(vec![Field::new(
1631
- "count",
1632
- DataType::UInt64,
1633
- false,
1634
- )]))
1635
- }
1636
-
1637
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
1638
- RecordBatch::try_new(
1639
- schema,
1640
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
1641
- )
1642
- .map_err(DataFusionError::from)
1643
- }
1644
-
1645
- fn record_batch_has_non_null_column(batch: &RecordBatch, column_name: &str) -> Result<bool> {
1646
- for row_index in 0..batch.num_rows() {
1647
- if optional_scalar_value(batch, row_index, column_name)?
1648
- .is_some_and(|value| !value.is_null())
1649
- {
1650
- return Ok(true);
1651
- }
1652
- }
1653
- Ok(false)
1654
- }
1655
-
1656
- fn reject_read_only_lix_directory_insert_field(
1657
- batch: &RecordBatch,
1658
- row_index: usize,
1659
- column_name: &str,
1660
- ) -> Result<()> {
1661
- if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
1662
- return Err(DataFusionError::Execution(format!(
1663
- "INSERT into lix_directory cannot stage read-only column '{column_name}'"
1664
- )));
1665
- }
1666
- Ok(())
1667
- }
1668
-
1669
- fn required_string_value(
1670
- batch: &RecordBatch,
1671
- row_index: usize,
1672
- column_name: &str,
1673
- ) -> Result<String> {
1674
- optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
1675
- DataFusionError::Execution(format!(
1676
- "INSERT into lix_directory requires non-null text column '{column_name}'"
1677
- ))
1678
- })
1679
- }
1680
-
1681
- fn update_required_string_value(
1682
- batch: &RecordBatch,
1683
- assignment_values: &UpdateAssignmentValues,
1684
- row_index: usize,
1685
- column_name: &str,
1686
- ) -> Result<String> {
1687
- update_optional_string_value(batch, assignment_values, row_index, column_name)?.ok_or_else(
1688
- || {
1689
- DataFusionError::Execution(format!(
1690
- "UPDATE lix_directory requires non-null text column '{column_name}'"
1691
- ))
1692
- },
1693
- )
1694
- }
1695
-
1696
- fn update_optional_string_value(
1697
- batch: &RecordBatch,
1698
- assignment_values: &UpdateAssignmentValues,
1699
- row_index: usize,
1700
- column_name: &str,
1701
- ) -> Result<Option<String>> {
1702
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1703
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1704
- InsertCell::Provided(SqlCell::Value(
1705
- ScalarValue::Utf8(Some(value))
1706
- | ScalarValue::Utf8View(Some(value))
1707
- | ScalarValue::LargeUtf8(Some(value)),
1708
- )) => Ok(Some(value)),
1709
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1710
- "UPDATE lix_directory expected text-compatible column '{column_name}', got {other:?}"
1711
- ))),
1712
- }
1713
- }
1714
-
1715
- fn update_optional_metadata_value(
1716
- batch: &RecordBatch,
1717
- assignment_values: &UpdateAssignmentValues,
1718
- row_index: usize,
1719
- column_name: &str,
1720
- context: &str,
1721
- ) -> Result<Option<TransactionJson>> {
1722
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
1723
- .map(|value| {
1724
- let metadata = parse_row_metadata_value(&value, context)
1725
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
1726
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1727
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
1728
- })
1729
- .transpose()
1730
- }
1731
-
1732
- fn update_optional_bool_value(
1733
- batch: &RecordBatch,
1734
- assignment_values: &UpdateAssignmentValues,
1735
- row_index: usize,
1736
- column_name: &str,
1737
- ) -> Result<Option<bool>> {
1738
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
1739
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
1740
- InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(Some(value)),
1741
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
1742
- "UPDATE lix_directory expected boolean column '{column_name}', got {other:?}"
1743
- ))),
1744
- }
1745
- }
1746
-
1747
- fn optional_string_value(
1748
- batch: &RecordBatch,
1749
- row_index: usize,
1750
- column_name: &str,
1751
- ) -> Result<Option<String>> {
1752
- match optional_scalar_value(batch, row_index, column_name)? {
1753
- None
1754
- | Some(ScalarValue::Null)
1755
- | Some(ScalarValue::Utf8(None))
1756
- | Some(ScalarValue::Utf8View(None))
1757
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
1758
- Some(ScalarValue::Utf8(Some(value)))
1759
- | Some(ScalarValue::Utf8View(Some(value)))
1760
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
1761
- Some(other) => Err(DataFusionError::Execution(format!(
1762
- "INSERT into lix_directory expected text-compatible column '{column_name}', got {other:?}"
1763
- ))),
1764
- }
1765
- }
1766
-
1767
- fn optional_metadata_value(
1768
- batch: &RecordBatch,
1769
- row_index: usize,
1770
- column_name: &str,
1771
- context: &str,
1772
- ) -> Result<Option<TransactionJson>> {
1773
- optional_string_value(batch, row_index, column_name)?
1774
- .map(|value| {
1775
- let metadata = parse_row_metadata_value(&value, context)
1776
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
1777
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
1778
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
1779
- })
1780
- .transpose()
1781
- }
1782
-
1783
- fn optional_bool_value(
1784
- batch: &RecordBatch,
1785
- row_index: usize,
1786
- column_name: &str,
1787
- ) -> Result<Option<bool>> {
1788
- match optional_scalar_value(batch, row_index, column_name)? {
1789
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
1790
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
1791
- Some(other) => Err(DataFusionError::Execution(format!(
1792
- "INSERT into lix_directory expected boolean column '{column_name}', got {other:?}"
1793
- ))),
1794
- }
1795
- }
1796
-
1797
- fn optional_scalar_value(
1798
- batch: &RecordBatch,
1799
- row_index: usize,
1800
- column_name: &str,
1801
- ) -> Result<Option<ScalarValue>> {
1802
- let schema = batch.schema();
1803
- let column_index = match schema.index_of(column_name) {
1804
- Ok(column_index) => column_index,
1805
- Err(_) => return Ok(None),
1806
- };
1807
- if row_index >= batch.num_rows() {
1808
- return Err(DataFusionError::Execution(format!(
1809
- "row index {row_index} out of bounds for lix_directory batch with {} rows",
1810
- batch.num_rows()
1811
- )));
1812
- }
1813
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
1814
- .map(Some)
1815
- .map_err(|error| {
1816
- DataFusionError::Execution(format!(
1817
- "failed to decode lix_directory column '{column_name}' at row {row_index}: {error}"
1818
- ))
1819
- })
1820
- }
1821
-
1822
- pub(super) fn lix_directory_schema() -> SchemaRef {
1823
- Arc::new(Schema::new(vec![
1824
- Field::new("id", DataType::Utf8, true),
1825
- Field::new("path", DataType::Utf8, true),
1826
- Field::new("parent_id", DataType::Utf8, true),
1827
- Field::new("name", DataType::Utf8, false),
1828
- Field::new("hidden", DataType::Boolean, true),
1829
- json_field("lixcol_entity_pk", false),
1830
- Field::new("lixcol_schema_key", DataType::Utf8, false),
1831
- Field::new("lixcol_file_id", DataType::Utf8, true),
1832
- Field::new("lixcol_global", DataType::Boolean, true),
1833
- Field::new("lixcol_change_id", DataType::Utf8, true),
1834
- Field::new("lixcol_created_at", DataType::Utf8, true),
1835
- Field::new("lixcol_updated_at", DataType::Utf8, true),
1836
- Field::new("lixcol_commit_id", DataType::Utf8, true),
1837
- Field::new("lixcol_untracked", DataType::Boolean, true),
1838
- json_field("lixcol_metadata", true),
1839
- ]))
1840
- }
1841
-
1842
- pub(super) fn lix_directory_by_branch_schema() -> SchemaRef {
1843
- let mut fields = lix_directory_schema()
1844
- .fields()
1845
- .iter()
1846
- .map(|field| field.as_ref().clone())
1847
- .collect::<Vec<_>>();
1848
- fields.push(Field::new("lixcol_branch_id", DataType::Utf8, false));
1849
- Arc::new(Schema::new(fields))
1850
- }
1851
-
1852
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
1853
- crate::sql2::error::datafusion_error_to_lix_error(error)
1854
- }
1855
-
1856
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
1857
- crate::sql2::error::lix_error_to_datafusion_error(error)
1858
- }
1859
-
1860
- #[cfg(test)]
1861
- mod tests {
1862
- use std::collections::{BTreeMap, BTreeSet};
1863
- use std::sync::Arc;
1864
-
1865
- use async_trait::async_trait;
1866
- use datafusion::arrow::array::{ArrayRef, BooleanArray, StringArray};
1867
- use datafusion::arrow::datatypes::{DataType, Field, Schema};
1868
- use datafusion::arrow::record_batch::RecordBatch;
1869
- use datafusion::execution::TaskContext;
1870
- use serde_json::json;
1871
-
1872
- use crate::binary_cas::BlobDataReader;
1873
- use crate::functions::{
1874
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
1875
- };
1876
- use crate::live_state::{LiveStateScanRequest, MaterializedLiveStateRow};
1877
- use crate::sql2::dml::InsertSink;
1878
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
1879
- use crate::transaction::types::{
1880
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
1881
- TransactionWriteRow,
1882
- };
1883
- use crate::LixError;
1884
-
1885
- use super::{
1886
- derive_directory_path_for, directory_path_resolvers_from_state_rows,
1887
- lix_directory_by_branch_schema, lix_directory_insert_origin, lix_directory_record_batch,
1888
- lix_directory_recursive_delete_rows_from_batch, lix_directory_write_rows_from_batch,
1889
- lix_directory_write_rows_from_batch_with_path_resolvers, BranchBinding,
1890
- DirectoryDescriptorRecord, LixDirectoryInsertSink,
1891
- };
1892
- use crate::sql2::filesystem_visibility::VisibleFilesystem;
1893
-
1894
- fn test_id_generator(ids: &'static [&'static str]) -> impl FnMut() -> String {
1895
- let mut ids = ids.iter();
1896
- move || ids.next().expect("test id should exist").to_string()
1897
- }
1898
-
1899
- fn test_functions() -> FunctionProviderHandle {
1900
- SharedFunctionProvider::new(
1901
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
1902
- )
1903
- }
1904
-
1905
- #[derive(Default)]
1906
- struct CapturingWriteContext {
1907
- rows: Vec<MaterializedLiveStateRow>,
1908
- writes: Vec<TransactionWrite>,
1909
- }
1910
-
1911
- #[async_trait]
1912
- impl BlobDataReader for CapturingWriteContext {
1913
- async fn load_bytes_many(
1914
- &self,
1915
- hashes: &[crate::binary_cas::BlobHash],
1916
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1917
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
1918
- None;
1919
- hashes.len()
1920
- ]))
1921
- }
1922
- }
1923
-
1924
- #[async_trait]
1925
- impl SqlWriteExecutionContext for CapturingWriteContext {
1926
- fn active_branch_id(&self) -> &str {
1927
- "branch-a"
1928
- }
1929
-
1930
- fn functions(&self) -> FunctionProviderHandle {
1931
- test_functions()
1932
- }
1933
-
1934
- fn list_visible_schemas(&self) -> Result<Vec<serde_json::Value>, LixError> {
1935
- Ok(Vec::new())
1936
- }
1937
-
1938
- async fn load_bytes_many(
1939
- &mut self,
1940
- hashes: &[crate::binary_cas::BlobHash],
1941
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
1942
- BlobDataReader::load_bytes_many(self, hashes).await
1943
- }
1944
-
1945
- async fn scan_live_state(
1946
- &mut self,
1947
- _request: &LiveStateScanRequest,
1948
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
1949
- Ok(self.rows.clone())
1950
- }
1951
-
1952
- async fn load_branch_head(&mut self, branch_id: &str) -> Result<Option<String>, LixError> {
1953
- if branch_id == "ghost-branch" {
1954
- return Ok(None);
1955
- }
1956
- Ok(Some(format!("commit-{branch_id}")))
1957
- }
1958
-
1959
- async fn stage_write(
1960
- &mut self,
1961
- write: TransactionWrite,
1962
- ) -> Result<TransactionWriteOutcome, LixError> {
1963
- self.writes.push(write);
1964
- Ok(TransactionWriteOutcome { count: 0 })
1965
- }
1966
- }
1967
-
1968
- fn live_row(
1969
- entity_pk: &str,
1970
- branch_id: &str,
1971
- snapshot_content: &str,
1972
- ) -> MaterializedLiveStateRow {
1973
- live_filesystem_row(
1974
- entity_pk,
1975
- super::DIRECTORY_SCHEMA_KEY,
1976
- None,
1977
- branch_id,
1978
- snapshot_content,
1979
- )
1980
- }
1981
-
1982
- fn live_filesystem_row(
1983
- entity_pk: &str,
1984
- schema_key: &str,
1985
- file_id: Option<&str>,
1986
- branch_id: &str,
1987
- snapshot_content: &str,
1988
- ) -> MaterializedLiveStateRow {
1989
- MaterializedLiveStateRow {
1990
- entity_pk: crate::entity_pk::EntityPk::single(entity_pk),
1991
- schema_key: schema_key.to_string(),
1992
- file_id: file_id.map(ToOwned::to_owned),
1993
- snapshot_content: Some(snapshot_content.to_string()),
1994
- metadata: Some(json!({"source": "test"}).to_string()),
1995
- deleted: false,
1996
- branch_id: branch_id.to_string(),
1997
- change_id: Some(format!("change-{entity_pk}")),
1998
- commit_id: Some(format!("commit-{entity_pk}")),
1999
- global: false,
2000
- untracked: false,
2001
- created_at: "2026-04-23T00:00:00Z".to_string(),
2002
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2003
- }
2004
- }
2005
-
2006
- fn filesystem_rows() -> Vec<MaterializedLiveStateRow> {
2007
- vec![
2008
- live_filesystem_row(
2009
- "dir-docs",
2010
- "lix_directory_descriptor",
2011
- None,
2012
- "branch-a",
2013
- r#"{"id":"dir-docs","parent_id":null,"name":"docs","hidden":false}"#,
2014
- ),
2015
- live_filesystem_row(
2016
- "dir-guides",
2017
- "lix_directory_descriptor",
2018
- None,
2019
- "branch-a",
2020
- r#"{"id":"dir-guides","parent_id":"dir-docs","name":"guides","hidden":false}"#,
2021
- ),
2022
- live_filesystem_row(
2023
- "file-index",
2024
- "lix_file_descriptor",
2025
- None,
2026
- "branch-a",
2027
- r#"{"id":"file-index","directory_id":"dir-docs","name":"index.md","hidden":false}"#,
2028
- ),
2029
- live_filesystem_row(
2030
- "file-readme",
2031
- "lix_file_descriptor",
2032
- None,
2033
- "branch-a",
2034
- r#"{"id":"file-readme","directory_id":"dir-guides","name":"readme.md","hidden":false}"#,
2035
- ),
2036
- live_filesystem_row(
2037
- "file-readme",
2038
- "lix_binary_blob_ref",
2039
- Some("file-readme"),
2040
- "branch-a",
2041
- r#"{"id":"file-readme","blob_hash":"abc123","size_bytes":5}"#,
2042
- ),
2043
- ]
2044
- }
2045
-
2046
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2047
- Arc::new(StringArray::from(values)) as ArrayRef
2048
- }
2049
-
2050
- fn directory_insert_batch(include_branch: bool, global: bool) -> RecordBatch {
2051
- let mut fields = vec![
2052
- Field::new("id", DataType::Utf8, false),
2053
- Field::new("parent_id", DataType::Utf8, true),
2054
- Field::new("name", DataType::Utf8, false),
2055
- Field::new("hidden", DataType::Boolean, false),
2056
- Field::new("lixcol_global", DataType::Boolean, false),
2057
- Field::new("lixcol_metadata", DataType::Utf8, true),
2058
- ];
2059
- let mut columns = vec![
2060
- string_column(vec![Some("dir-docs")]),
2061
- string_column(vec![None]),
2062
- string_column(vec![Some("docs")]),
2063
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2064
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2065
- string_column(vec![Some("{\"source\":\"directory\"}")]),
2066
- ];
2067
- if include_branch {
2068
- fields.push(Field::new("lixcol_branch_id", DataType::Utf8, false));
2069
- columns.push(string_column(vec![Some("branch-a")]));
2070
- }
2071
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
2072
- .expect("directory insert batch should build")
2073
- }
2074
-
2075
- fn directory_path_insert_batch(path: &str) -> RecordBatch {
2076
- RecordBatch::try_new(
2077
- Arc::new(Schema::new(vec![
2078
- Field::new("id", DataType::Utf8, false),
2079
- Field::new("path", DataType::Utf8, true),
2080
- Field::new("hidden", DataType::Boolean, false),
2081
- Field::new("lixcol_branch_id", DataType::Utf8, false),
2082
- ])),
2083
- vec![
2084
- string_column(vec![Some("dir-nested")]),
2085
- string_column(vec![Some(path)]),
2086
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2087
- string_column(vec![Some("branch-a")]),
2088
- ],
2089
- )
2090
- .expect("directory path insert batch should build")
2091
- }
2092
-
2093
- fn directory_delete_batch(ids: &[&str]) -> RecordBatch {
2094
- RecordBatch::try_new(
2095
- Arc::new(Schema::new(vec![
2096
- Field::new("id", DataType::Utf8, false),
2097
- Field::new("lixcol_branch_id", DataType::Utf8, false),
2098
- ])),
2099
- vec![
2100
- string_column(ids.iter().copied().map(Some).collect::<Vec<_>>()),
2101
- string_column(vec![Some("branch-a"); ids.len()]),
2102
- ],
2103
- )
2104
- .expect("directory delete batch should build")
2105
- }
2106
-
2107
- #[test]
2108
- fn derives_nested_directory_paths() {
2109
- let root = DirectoryDescriptorRecord {
2110
- id: "dir-docs".to_string(),
2111
- parent_id: None,
2112
- name: "docs".to_string(),
2113
- hidden: false,
2114
- live: live_row(
2115
- "dir-docs",
2116
- "branch-a",
2117
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2118
- ),
2119
- };
2120
- let child = DirectoryDescriptorRecord {
2121
- id: "dir-guides".to_string(),
2122
- parent_id: Some("dir-docs".to_string()),
2123
- name: "guides".to_string(),
2124
- hidden: false,
2125
- live: live_row(
2126
- "dir-guides",
2127
- "branch-a",
2128
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":false}",
2129
- ),
2130
- };
2131
- let mut records = BTreeMap::new();
2132
- records.insert(root.id.clone(), &root);
2133
- records.insert(child.id.clone(), &child);
2134
- let mut paths = BTreeMap::new();
2135
-
2136
- assert_eq!(
2137
- derive_directory_path_for(
2138
- "branch-a",
2139
- "dir-guides",
2140
- &records,
2141
- &mut paths,
2142
- &mut BTreeSet::new()
2143
- )
2144
- .expect("path derivation should succeed"),
2145
- Some("/docs/guides/".to_string())
2146
- );
2147
- }
2148
-
2149
- #[test]
2150
- fn record_batch_projects_directory_columns() {
2151
- let rows = vec![
2152
- live_row(
2153
- "dir-docs",
2154
- "branch-a",
2155
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2156
- ),
2157
- live_row(
2158
- "dir-guides",
2159
- "branch-a",
2160
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\",\"hidden\":true}",
2161
- ),
2162
- ];
2163
-
2164
- let batch = lix_directory_record_batch(&lix_directory_by_branch_schema(), rows)
2165
- .expect("directory batch should build");
2166
-
2167
- assert_eq!(batch.num_rows(), 2);
2168
- assert_eq!(
2169
- batch
2170
- .column_by_name("path")
2171
- .expect("path column")
2172
- .as_any()
2173
- .downcast_ref::<StringArray>()
2174
- .expect("path is string")
2175
- .value(1),
2176
- "/docs/guides/"
2177
- );
2178
- assert_eq!(
2179
- batch
2180
- .column_by_name("lixcol_branch_id")
2181
- .expect("branch column")
2182
- .as_any()
2183
- .downcast_ref::<StringArray>()
2184
- .expect("branch is string")
2185
- .value(1),
2186
- "branch-a"
2187
- );
2188
- }
2189
-
2190
- #[test]
2191
- fn decodes_directory_insert_into_lix_state_write_row() {
2192
- let rows = lix_directory_write_rows_from_batch(&directory_insert_batch(true, false), None)
2193
- .expect("directory batch should decode");
2194
-
2195
- assert_eq!(
2196
- rows,
2197
- vec![TransactionWriteRow {
2198
- entity_pk: Some(crate::entity_pk::EntityPk::single("dir-docs")),
2199
- schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2200
- file_id: None,
2201
- snapshot: Some(TransactionJson::from_value_for_test(
2202
- json!({"hidden":false,"id":"dir-docs","name":"docs","parent_id":null})
2203
- )),
2204
- metadata: Some(TransactionJson::from_value_for_test(
2205
- json!({"source": "directory"})
2206
- )),
2207
- origin: Some(lix_directory_insert_origin("lix_directory", "dir-docs")),
2208
- created_at: None,
2209
- updated_at: None,
2210
- global: false,
2211
- change_id: None,
2212
- commit_id: None,
2213
- untracked: false,
2214
- branch_id: "branch-a".to_string(),
2215
- }]
2216
- );
2217
- }
2218
-
2219
- #[test]
2220
- fn active_directory_insert_defaults_branch_id() {
2221
- let rows = lix_directory_write_rows_from_batch(
2222
- &directory_insert_batch(false, false),
2223
- Some("branch-active"),
2224
- )
2225
- .expect("active directory batch should decode");
2226
-
2227
- assert_eq!(rows[0].branch_id, "branch-active");
2228
- }
2229
-
2230
- #[test]
2231
- fn by_branch_directory_insert_requires_branch_id_for_non_global_rows() {
2232
- let error =
2233
- lix_directory_write_rows_from_batch(&directory_insert_batch(false, false), None)
2234
- .expect_err("by-branch insert should require branch id");
2235
-
2236
- assert!(
2237
- error.to_string().contains("requires lixcol_branch_id"),
2238
- "unexpected error: {error}"
2239
- );
2240
- }
2241
-
2242
- #[test]
2243
- fn directory_insert_rejects_global_with_non_global_branch_id() {
2244
- let error = lix_directory_write_rows_from_batch(&directory_insert_batch(true, true), None)
2245
- .expect_err("global directory write should reject conflicting branch id");
2246
-
2247
- assert!(
2248
- error
2249
- .to_string()
2250
- .contains("cannot set lixcol_global=true with non-global lixcol_branch_id"),
2251
- "unexpected error: {error}"
2252
- );
2253
- }
2254
-
2255
- #[test]
2256
- fn directory_path_insert_reuses_existing_parent_descriptor() {
2257
- let existing_rows = vec![live_row(
2258
- "dir-docs",
2259
- "branch-a",
2260
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2261
- )];
2262
- let mut resolvers = directory_path_resolvers_from_state_rows(existing_rows)
2263
- .expect("existing directory rows should seed paths");
2264
-
2265
- let rows = lix_directory_write_rows_from_batch_with_path_resolvers(
2266
- &directory_path_insert_batch("/docs/nested/"),
2267
- None,
2268
- "lix_directory",
2269
- &mut resolvers,
2270
- &mut test_id_generator(&["should-not-be-used"]),
2271
- )
2272
- .expect("directory path batch should decode");
2273
-
2274
- assert_eq!(rows.len(), 1);
2275
- let snapshot = rows[0].snapshot.as_ref().unwrap();
2276
- assert_eq!(snapshot["id"], "dir-nested");
2277
- assert_eq!(snapshot["parent_id"], "dir-docs");
2278
- assert_eq!(snapshot["name"], "nested");
2279
- }
2280
-
2281
- #[test]
2282
- fn recursive_directory_delete_deletes_nested_dirs_files_and_blob_refs() {
2283
- let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2284
- .expect("visible filesystem should build");
2285
- let mut visible_filesystems = BTreeMap::new();
2286
- visible_filesystems.insert("branch-a".to_string(), visible_filesystem);
2287
-
2288
- let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2289
- &directory_delete_batch(&["dir-docs"]),
2290
- None,
2291
- &visible_filesystems,
2292
- )
2293
- .expect("recursive directory delete should plan");
2294
-
2295
- assert_eq!(count, 4);
2296
- assert_eq!(
2297
- rows.iter()
2298
- .map(|row| {
2299
- (
2300
- row.schema_key.as_str(),
2301
- row.entity_pk
2302
- .as_ref()
2303
- .expect("planned delete row should carry entity_pk")
2304
- .as_single_string_owned()
2305
- .expect("planned delete row should project entity_pk"),
2306
- )
2307
- })
2308
- .collect::<Vec<_>>(),
2309
- vec![
2310
- ("lix_file_descriptor", "file-readme".to_string()),
2311
- ("lix_binary_blob_ref", "file-readme".to_string()),
2312
- ("lix_directory_descriptor", "dir-guides".to_string()),
2313
- ("lix_file_descriptor", "file-index".to_string()),
2314
- ("lix_directory_descriptor", "dir-docs".to_string()),
2315
- ]
2316
- );
2317
- assert!(rows.iter().all(|row| row.snapshot.is_none()));
2318
- }
2319
-
2320
- #[test]
2321
- fn recursive_directory_delete_dedupes_overlapping_parent_and_child() {
2322
- let visible_filesystem = VisibleFilesystem::from_live_rows(filesystem_rows())
2323
- .expect("visible filesystem should build");
2324
- let mut visible_filesystems = BTreeMap::new();
2325
- visible_filesystems.insert("branch-a".to_string(), visible_filesystem);
2326
-
2327
- let (rows, count) = lix_directory_recursive_delete_rows_from_batch(
2328
- &directory_delete_batch(&["dir-docs", "dir-guides"]),
2329
- None,
2330
- &visible_filesystems,
2331
- )
2332
- .expect("recursive directory delete should plan");
2333
-
2334
- assert_eq!(count, 4);
2335
- let identities = rows
2336
- .iter()
2337
- .map(|row| {
2338
- (
2339
- row.schema_key.clone(),
2340
- row.entity_pk.clone(),
2341
- row.file_id.clone(),
2342
- row.branch_id.clone(),
2343
- )
2344
- })
2345
- .collect::<std::collections::BTreeSet<_>>();
2346
- assert_eq!(identities.len(), rows.len());
2347
- assert_eq!(rows.len(), 5);
2348
- }
2349
-
2350
- #[tokio::test]
2351
- async fn directory_insert_sink_stages_decoded_lix_state_rows() {
2352
- let mut write_context = CapturingWriteContext::default();
2353
- let write_ctx = SqlWriteContext::new(&mut write_context);
2354
- let batch = directory_insert_batch(true, false);
2355
- let sink =
2356
- LixDirectoryInsertSink::new(write_ctx, test_functions(), BranchBinding::explicit());
2357
- let count = sink
2358
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2359
- .await
2360
- .expect("directory sink should stage write");
2361
-
2362
- assert_eq!(count, 1);
2363
- assert_eq!(
2364
- write_context.writes.as_slice(),
2365
- &[TransactionWrite::Rows {
2366
- mode: TransactionWriteMode::Insert,
2367
- rows: vec![TransactionWriteRow {
2368
- entity_pk: Some(crate::entity_pk::EntityPk::single("dir-docs")),
2369
- schema_key: super::DIRECTORY_SCHEMA_KEY.to_string(),
2370
- file_id: None,
2371
- snapshot: Some(TransactionJson::from_value_for_test(
2372
- json!({"hidden":false,"id":"dir-docs","name":"docs","parent_id":null})
2373
- )),
2374
- metadata: Some(TransactionJson::from_value_for_test(
2375
- json!({"source": "directory"})
2376
- )),
2377
- origin: Some(lix_directory_insert_origin(
2378
- "lix_directory_by_branch",
2379
- "dir-docs"
2380
- )),
2381
- created_at: None,
2382
- updated_at: None,
2383
- global: false,
2384
- change_id: None,
2385
- commit_id: None,
2386
- untracked: false,
2387
- branch_id: "branch-a".to_string(),
2388
- }]
2389
- }]
2390
- );
2391
- }
2392
-
2393
- #[tokio::test]
2394
- async fn directory_insert_sink_seeds_path_resolver_from_live_state() {
2395
- let mut write_context = CapturingWriteContext {
2396
- rows: vec![live_row(
2397
- "dir-docs",
2398
- "branch-a",
2399
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\",\"hidden\":false}",
2400
- )],
2401
- writes: Vec::new(),
2402
- };
2403
- let write_ctx = SqlWriteContext::new(&mut write_context);
2404
- let batch = directory_path_insert_batch("/docs/nested/");
2405
- let sink =
2406
- LixDirectoryInsertSink::new(write_ctx, test_functions(), BranchBinding::explicit());
2407
- let count = sink
2408
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
2409
- .await
2410
- .expect("directory sink should stage path write");
2411
-
2412
- assert_eq!(count, 1);
2413
- let [TransactionWrite::Rows { rows, .. }] = write_context.writes.as_slice() else {
2414
- panic!("expected one directory staged write");
2415
- };
2416
- assert_eq!(rows.len(), 1);
2417
- let snapshot = rows[0].snapshot.as_ref().unwrap();
2418
- assert_eq!(snapshot["id"], "dir-nested");
2419
- assert_eq!(snapshot["parent_id"], "dir-docs");
2420
- assert_eq!(snapshot["name"], "nested");
2421
- }
2422
- }