@lix-js/sdk 0.6.0-preview.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +38 -207
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +38 -71
  16. package/SKILL.md +0 -507
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -833
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -27
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -359
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/capabilities.rs +0 -67
  31. package/dist-engine-src/src/backend/conformance/baseline.rs +0 -1127
  32. package/dist-engine-src/src/backend/conformance/factory.rs +0 -93
  33. package/dist-engine-src/src/backend/conformance/failure_tests.rs +0 -608
  34. package/dist-engine-src/src/backend/conformance/fixtures.rs +0 -26
  35. package/dist-engine-src/src/backend/conformance/mod.rs +0 -75
  36. package/dist-engine-src/src/backend/conformance/model.rs +0 -28
  37. package/dist-engine-src/src/backend/conformance/model_based.rs +0 -257
  38. package/dist-engine-src/src/backend/conformance/persistence.rs +0 -204
  39. package/dist-engine-src/src/backend/conformance/projection.rs +0 -21
  40. package/dist-engine-src/src/backend/conformance/pushdown.rs +0 -24
  41. package/dist-engine-src/src/backend/conformance/runner.rs +0 -90
  42. package/dist-engine-src/src/backend/conformance/scan.rs +0 -24
  43. package/dist-engine-src/src/backend/conformance/write.rs +0 -16
  44. package/dist-engine-src/src/backend/error.rs +0 -94
  45. package/dist-engine-src/src/backend/in_memory.rs +0 -670
  46. package/dist-engine-src/src/backend/mod.rs +0 -39
  47. package/dist-engine-src/src/backend/predicate.rs +0 -80
  48. package/dist-engine-src/src/backend/traits.rs +0 -260
  49. package/dist-engine-src/src/backend/types.rs +0 -239
  50. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  51. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  52. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  53. package/dist-engine-src/src/binary_cas/kv.rs +0 -1038
  54. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  55. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  56. package/dist-engine-src/src/branch/context.rs +0 -40
  57. package/dist-engine-src/src/branch/lifecycle.rs +0 -221
  58. package/dist-engine-src/src/branch/mod.rs +0 -13
  59. package/dist-engine-src/src/branch/refs.rs +0 -321
  60. package/dist-engine-src/src/branch/stage_rows.rs +0 -67
  61. package/dist-engine-src/src/branch/types.rs +0 -21
  62. package/dist-engine-src/src/catalog/context.rs +0 -412
  63. package/dist-engine-src/src/catalog/mod.rs +0 -10
  64. package/dist-engine-src/src/catalog/schema.rs +0 -4
  65. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  66. package/dist-engine-src/src/cel/context.rs +0 -86
  67. package/dist-engine-src/src/cel/error.rs +0 -19
  68. package/dist-engine-src/src/cel/mod.rs +0 -8
  69. package/dist-engine-src/src/cel/provider.rs +0 -9
  70. package/dist-engine-src/src/cel/runtime.rs +0 -167
  71. package/dist-engine-src/src/cel/value.rs +0 -50
  72. package/dist-engine-src/src/changelog/bench_support.rs +0 -785
  73. package/dist-engine-src/src/changelog/change.rs +0 -1
  74. package/dist-engine-src/src/changelog/codec.rs +0 -497
  75. package/dist-engine-src/src/changelog/commit.rs +0 -1
  76. package/dist-engine-src/src/changelog/context.rs +0 -1614
  77. package/dist-engine-src/src/changelog/mod.rs +0 -29
  78. package/dist-engine-src/src/changelog/store.rs +0 -163
  79. package/dist-engine-src/src/changelog/test_support.rs +0 -54
  80. package/dist-engine-src/src/changelog/types.rs +0 -213
  81. package/dist-engine-src/src/commit_graph/context.rs +0 -944
  82. package/dist-engine-src/src/commit_graph/mod.rs +0 -9
  83. package/dist-engine-src/src/commit_graph/types.rs +0 -89
  84. package/dist-engine-src/src/commit_graph/walker.rs +0 -786
  85. package/dist-engine-src/src/common/error.rs +0 -347
  86. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  87. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  88. package/dist-engine-src/src/common/identity.rs +0 -145
  89. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  90. package/dist-engine-src/src/common/metadata.rs +0 -40
  91. package/dist-engine-src/src/common/mod.rs +0 -23
  92. package/dist-engine-src/src/common/types.rs +0 -105
  93. package/dist-engine-src/src/common/wire.rs +0 -222
  94. package/dist-engine-src/src/domain.rs +0 -320
  95. package/dist-engine-src/src/engine.rs +0 -203
  96. package/dist-engine-src/src/entity_pk.rs +0 -402
  97. package/dist-engine-src/src/functions/context.rs +0 -296
  98. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  99. package/dist-engine-src/src/functions/mod.rs +0 -18
  100. package/dist-engine-src/src/functions/provider.rs +0 -130
  101. package/dist-engine-src/src/functions/state.rs +0 -335
  102. package/dist-engine-src/src/functions/types.rs +0 -37
  103. package/dist-engine-src/src/init.rs +0 -692
  104. package/dist-engine-src/src/json_store/compression.rs +0 -77
  105. package/dist-engine-src/src/json_store/context.rs +0 -172
  106. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  107. package/dist-engine-src/src/json_store/mod.rs +0 -38
  108. package/dist-engine-src/src/json_store/store.rs +0 -494
  109. package/dist-engine-src/src/json_store/types.rs +0 -212
  110. package/dist-engine-src/src/lib.rs +0 -92
  111. package/dist-engine-src/src/live_state/context.rs +0 -1883
  112. package/dist-engine-src/src/live_state/mod.rs +0 -21
  113. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  114. package/dist-engine-src/src/live_state/reader.rs +0 -23
  115. package/dist-engine-src/src/live_state/types.rs +0 -231
  116. package/dist-engine-src/src/live_state/visibility.rs +0 -666
  117. package/dist-engine-src/src/plugin/archive.rs +0 -438
  118. package/dist-engine-src/src/plugin/component.rs +0 -183
  119. package/dist-engine-src/src/plugin/install.rs +0 -619
  120. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  121. package/dist-engine-src/src/plugin/materializer.rs +0 -202
  122. package/dist-engine-src/src/plugin/mod.rs +0 -33
  123. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -119
  124. package/dist-engine-src/src/plugin/storage.rs +0 -74
  125. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  126. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  127. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  128. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  129. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  130. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +0 -34
  131. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +0 -48
  132. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  133. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  134. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  135. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  136. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  137. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  138. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  139. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  140. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  141. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  142. package/dist-engine-src/src/schema/builtin/mod.rs +0 -220
  143. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  144. package/dist-engine-src/src/schema/definition.json +0 -187
  145. package/dist-engine-src/src/schema/definition.rs +0 -742
  146. package/dist-engine-src/src/schema/key.rs +0 -138
  147. package/dist-engine-src/src/schema/mod.rs +0 -20
  148. package/dist-engine-src/src/schema/seed.rs +0 -14
  149. package/dist-engine-src/src/schema/tests.rs +0 -780
  150. package/dist-engine-src/src/session/context.rs +0 -1059
  151. package/dist-engine-src/src/session/create_branch.rs +0 -94
  152. package/dist-engine-src/src/session/execute.rs +0 -681
  153. package/dist-engine-src/src/session/merge/analysis.rs +0 -108
  154. package/dist-engine-src/src/session/merge/branch.rs +0 -417
  155. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  156. package/dist-engine-src/src/session/merge/mod.rs +0 -10
  157. package/dist-engine-src/src/session/merge/stats.rs +0 -61
  158. package/dist-engine-src/src/session/mod.rs +0 -30
  159. package/dist-engine-src/src/session/switch_branch.rs +0 -113
  160. package/dist-engine-src/src/session/transaction.rs +0 -557
  161. package/dist-engine-src/src/sql2/bind/classify.rs +0 -102
  162. package/dist-engine-src/src/sql2/bind/error.rs +0 -5
  163. package/dist-engine-src/src/sql2/bind/expr.rs +0 -29
  164. package/dist-engine-src/src/sql2/bind/mod.rs +0 -12
  165. package/dist-engine-src/src/sql2/bind/public_udf.rs +0 -306
  166. package/dist-engine-src/src/sql2/bind/read.rs +0 -65
  167. package/dist-engine-src/src/sql2/bind/statement.rs +0 -2236
  168. package/dist-engine-src/src/sql2/bind/table.rs +0 -273
  169. package/dist-engine-src/src/sql2/bind/write.rs +0 -86
  170. package/dist-engine-src/src/sql2/branch_scope.rs +0 -436
  171. package/dist-engine-src/src/sql2/catalog/capability.rs +0 -20
  172. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +0 -296
  173. package/dist-engine-src/src/sql2/catalog/mod.rs +0 -15
  174. package/dist-engine-src/src/sql2/catalog/registry.rs +0 -556
  175. package/dist-engine-src/src/sql2/catalog/schema.rs +0 -88
  176. package/dist-engine-src/src/sql2/catalog/surface.rs +0 -41
  177. package/dist-engine-src/src/sql2/change_materialization.rs +0 -122
  178. package/dist-engine-src/src/sql2/context.rs +0 -317
  179. package/dist-engine-src/src/sql2/dml.rs +0 -148
  180. package/dist-engine-src/src/sql2/error.rs +0 -215
  181. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +0 -1593
  182. package/dist-engine-src/src/sql2/exec/datafusion.rs +0 -5266
  183. package/dist-engine-src/src/sql2/exec/fast_write.rs +0 -82
  184. package/dist-engine-src/src/sql2/exec/mod.rs +0 -24
  185. package/dist-engine-src/src/sql2/exec/write.rs +0 -661
  186. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1485
  187. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  188. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  189. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  190. package/dist-engine-src/src/sql2/history_route.rs +0 -661
  191. package/dist-engine-src/src/sql2/mod.rs +0 -52
  192. package/dist-engine-src/src/sql2/optimize/datafusion.rs +0 -1
  193. package/dist-engine-src/src/sql2/optimize/mod.rs +0 -2
  194. package/dist-engine-src/src/sql2/optimize/simple_write.rs +0 -116
  195. package/dist-engine-src/src/sql2/parse/mod.rs +0 -69
  196. package/dist-engine-src/src/sql2/parse/normalize.rs +0 -1
  197. package/dist-engine-src/src/sql2/plan/branch_scope.rs +0 -24
  198. package/dist-engine-src/src/sql2/plan/mod.rs +0 -5
  199. package/dist-engine-src/src/sql2/plan/predicate.rs +0 -22
  200. package/dist-engine-src/src/sql2/plan/write.rs +0 -147
  201. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -504
  202. package/dist-engine-src/src/sql2/providers/branch.rs +0 -1206
  203. package/dist-engine-src/src/sql2/providers/change.rs +0 -445
  204. package/dist-engine-src/src/sql2/providers/directory.rs +0 -2422
  205. package/dist-engine-src/src/sql2/providers/directory_history.rs +0 -645
  206. package/dist-engine-src/src/sql2/providers/entity.rs +0 -1484
  207. package/dist-engine-src/src/sql2/providers/entity_history.rs +0 -452
  208. package/dist-engine-src/src/sql2/providers/file.rs +0 -3686
  209. package/dist-engine-src/src/sql2/providers/file_history.rs +0 -924
  210. package/dist-engine-src/src/sql2/providers/history.rs +0 -426
  211. package/dist-engine-src/src/sql2/providers/lix_state.rs +0 -2542
  212. package/dist-engine-src/src/sql2/providers/mod.rs +0 -508
  213. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  214. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  215. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  216. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  217. package/dist-engine-src/src/sql2/session.rs +0 -83
  218. package/dist-engine-src/src/sql2/storage/constraints.rs +0 -1
  219. package/dist-engine-src/src/sql2/storage/mod.rs +0 -1
  220. package/dist-engine-src/src/sql2/test_support/differential.rs +0 -712
  221. package/dist-engine-src/src/sql2/test_support/generators.rs +0 -354
  222. package/dist-engine-src/src/sql2/test_support/mod.rs +0 -2
  223. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  224. package/dist-engine-src/src/sql2/udfs/lix_active_branch_commit_id.rs +0 -53
  225. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  226. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  227. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  228. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  229. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  230. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  231. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  232. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  233. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -86
  234. package/dist-engine-src/src/sql2/write_normalization.rs +0 -368
  235. package/dist-engine-src/src/storage/conformance.rs +0 -399
  236. package/dist-engine-src/src/storage/context.rs +0 -620
  237. package/dist-engine-src/src/storage/mod.rs +0 -52
  238. package/dist-engine-src/src/storage/point.rs +0 -440
  239. package/dist-engine-src/src/storage/read_scope.rs +0 -67
  240. package/dist-engine-src/src/storage/reader.rs +0 -867
  241. package/dist-engine-src/src/storage/scan.rs +0 -784
  242. package/dist-engine-src/src/storage/spaces.rs +0 -236
  243. package/dist-engine-src/src/storage/stats.rs +0 -80
  244. package/dist-engine-src/src/storage/write_set.rs +0 -962
  245. package/dist-engine-src/src/storage_bench.rs +0 -171
  246. package/dist-engine-src/src/test_support.rs +0 -450
  247. package/dist-engine-src/src/tracked_state/bench_support.rs +0 -394
  248. package/dist-engine-src/src/tracked_state/codec.rs +0 -1183
  249. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +0 -358
  250. package/dist-engine-src/src/tracked_state/context.rs +0 -2801
  251. package/dist-engine-src/src/tracked_state/diff.rs +0 -2140
  252. package/dist-engine-src/src/tracked_state/merge.rs +0 -478
  253. package/dist-engine-src/src/tracked_state/mod.rs +0 -35
  254. package/dist-engine-src/src/tracked_state/row_materialization.rs +0 -275
  255. package/dist-engine-src/src/tracked_state/storage.rs +0 -427
  256. package/dist-engine-src/src/tracked_state/tree.rs +0 -3063
  257. package/dist-engine-src/src/tracked_state/types.rs +0 -238
  258. package/dist-engine-src/src/transaction/bench_support.rs +0 -407
  259. package/dist-engine-src/src/transaction/commit.rs +0 -1592
  260. package/dist-engine-src/src/transaction/context.rs +0 -1653
  261. package/dist-engine-src/src/transaction/mod.rs +0 -24
  262. package/dist-engine-src/src/transaction/normalization.rs +0 -877
  263. package/dist-engine-src/src/transaction/prep.rs +0 -37
  264. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -163
  265. package/dist-engine-src/src/transaction/staging.rs +0 -1525
  266. package/dist-engine-src/src/transaction/types.rs +0 -403
  267. package/dist-engine-src/src/transaction/validation.rs +0 -5766
  268. package/dist-engine-src/src/untracked_state/codec.rs +0 -615
  269. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  270. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  271. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  272. package/dist-engine-src/src/untracked_state/storage.rs +0 -898
  273. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  274. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,3686 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{
7
- ArrayRef, BinaryArray, BooleanArray, RecordBatchOptions, StringArray, UInt64Array,
8
- };
9
- use datafusion::arrow::compute::{and, filter_record_batch};
10
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
11
- use datafusion::arrow::record_batch::RecordBatch;
12
- use datafusion::catalog::{Session, TableProvider};
13
- use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue, SchemaExt};
14
- use datafusion::datasource::TableType;
15
- use datafusion::execution::TaskContext;
16
- use datafusion::logical_expr::dml::InsertOp;
17
- use datafusion::logical_expr::expr::InList;
18
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
19
- use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
20
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
21
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
22
- use datafusion::physical_plan::{
23
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
24
- };
25
- use datafusion::prelude::SessionContext;
26
- use futures_util::{stream, TryStreamExt};
27
- use serde::Deserialize;
28
-
29
- use crate::binary_cas::{BlobDataReader, BlobHash};
30
- use crate::branch::BranchRefReader;
31
- use crate::entity_pk::EntityPk;
32
- use crate::functions::FunctionProviderHandle;
33
- use crate::live_state::MaterializedLiveStateRow;
34
- use crate::live_state::{
35
- LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
36
- };
37
- use crate::sql2::branch_scope::{
38
- explicit_branch_ids_from_dml_filters, resolve_provider_branch_ids, resolve_write_branch_scope,
39
- BranchBinding,
40
- };
41
- use crate::sql2::dml::{InsertExec, InsertSink};
42
- use crate::sql2::filesystem_predicates::{
43
- canonicalize_filesystem_path_filters, FilesystemPathKind,
44
- };
45
- use crate::sql2::predicate_typecheck::{
46
- canonicalize_json_identity_text_filters, validate_json_predicate_filters,
47
- };
48
- use crate::sql2::write_normalization::{
49
- is_binary_type, lix_file_data_type_error, lix_file_data_type_error_with_value,
50
- logical_expr_is_binary_or_null, reject_non_binary_casts_for_insert_column,
51
- scalar_is_binary_or_null, InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues,
52
- UpdateCell,
53
- };
54
- use crate::transaction::types::{TransactionJson, TransactionWriteRow};
55
- use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
56
-
57
- const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
58
- const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
59
- const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
60
-
61
- use crate::sql2::filesystem_planner::{
62
- blob_ref_row, directory_path_resolvers_from_state_rows, file_descriptor_row,
63
- file_descriptor_write_row, filesystem_storage_scope_key, plan_file_delete,
64
- plan_file_path_update, BlobRefRowInput, DirectoryPathResolver, FileDeleteInput,
65
- FileDescriptorRowInput, FileDescriptorWriteIntent, FilePathWriteInput, FilesystemDeletePlan,
66
- FilesystemRowContext,
67
- };
68
- use crate::sql2::result_metadata::json_field;
69
- use crate::sql2::session::SqlWriteSessionOptions;
70
- use crate::sql2::{
71
- SqlWriteContext, WriteAccess, WriteContextBranchRefReader, WriteContextLiveStateReader,
72
- };
73
- use crate::transaction::types::{
74
- LogicalPrimaryKey, TransactionFileData, TransactionWrite, TransactionWriteMode,
75
- TransactionWriteOperation, TransactionWriteOrigin,
76
- };
77
-
78
- pub(super) async fn register_lix_file_active_provider(
79
- session: &SessionContext,
80
- surface_name: &str,
81
- active_branch_id: &str,
82
- live_state: Arc<dyn LiveStateReader>,
83
- branch_ref: Arc<dyn BranchRefReader>,
84
- blob_reader: Arc<dyn BlobDataReader>,
85
- functions: FunctionProviderHandle,
86
- ) -> Result<(), LixError> {
87
- session
88
- .register_table(
89
- surface_name,
90
- Arc::new(LixFileProvider::active_branch(
91
- active_branch_id,
92
- live_state,
93
- branch_ref,
94
- blob_reader,
95
- functions,
96
- )),
97
- )
98
- .map_err(datafusion_error_to_lix_error)?;
99
- Ok(())
100
- }
101
-
102
- pub(super) async fn register_lix_file_by_branch_provider(
103
- session: &SessionContext,
104
- surface_name: &str,
105
- live_state: Arc<dyn LiveStateReader>,
106
- branch_ref: Arc<dyn BranchRefReader>,
107
- blob_reader: Arc<dyn BlobDataReader>,
108
- functions: FunctionProviderHandle,
109
- ) -> Result<(), LixError> {
110
- session
111
- .register_table(
112
- surface_name,
113
- Arc::new(LixFileProvider::by_branch(
114
- live_state,
115
- branch_ref,
116
- blob_reader,
117
- functions,
118
- )),
119
- )
120
- .map_err(datafusion_error_to_lix_error)?;
121
- Ok(())
122
- }
123
-
124
- pub(super) async fn register_by_branch_write_provider(
125
- session: &SessionContext,
126
- surface_name: &str,
127
- write_ctx: SqlWriteContext,
128
- options: SqlWriteSessionOptions,
129
- ) -> Result<(), LixError> {
130
- session
131
- .register_table(
132
- surface_name,
133
- Arc::new(LixFileProvider::by_branch_with_write(write_ctx, options)),
134
- )
135
- .map_err(datafusion_error_to_lix_error)?;
136
- Ok(())
137
- }
138
-
139
- pub(super) async fn register_active_write_provider(
140
- session: &SessionContext,
141
- surface_name: &str,
142
- write_ctx: SqlWriteContext,
143
- options: SqlWriteSessionOptions,
144
- ) -> Result<(), LixError> {
145
- session
146
- .register_table(
147
- surface_name,
148
- Arc::new(LixFileProvider::active_branch_with_write(
149
- write_ctx, options,
150
- )),
151
- )
152
- .map_err(datafusion_error_to_lix_error)?;
153
- Ok(())
154
- }
155
-
156
- pub(crate) struct LixFileProvider {
157
- schema: SchemaRef,
158
- live_state: Arc<dyn LiveStateReader>,
159
- branch_ref: Arc<dyn BranchRefReader>,
160
- blob_reader: Arc<dyn BlobDataReader>,
161
- write_access: WriteAccess,
162
- functions: FunctionProviderHandle,
163
- branch_binding: BranchBinding,
164
- options: SqlWriteSessionOptions,
165
- }
166
-
167
- impl std::fmt::Debug for LixFileProvider {
168
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
169
- f.debug_struct("LixFileProvider").finish()
170
- }
171
- }
172
-
173
- impl LixFileProvider {
174
- pub(crate) fn active_branch(
175
- active_branch_id: impl Into<String>,
176
- live_state: Arc<dyn LiveStateReader>,
177
- branch_ref: Arc<dyn BranchRefReader>,
178
- blob_reader: Arc<dyn BlobDataReader>,
179
- functions: FunctionProviderHandle,
180
- ) -> Self {
181
- Self {
182
- schema: lix_file_schema(),
183
- live_state,
184
- branch_ref,
185
- blob_reader,
186
- write_access: WriteAccess::read_only(),
187
- functions,
188
- branch_binding: BranchBinding::active(active_branch_id),
189
- options: SqlWriteSessionOptions::default(),
190
- }
191
- }
192
-
193
- pub(crate) fn active_branch_with_write(
194
- write_ctx: SqlWriteContext,
195
- options: SqlWriteSessionOptions,
196
- ) -> Self {
197
- let active_branch_id = write_ctx.active_branch_id();
198
- let functions = write_ctx.functions();
199
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
200
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
201
- let blob_reader = write_ctx.blob_reader();
202
- Self {
203
- schema: lix_file_schema(),
204
- live_state,
205
- branch_ref,
206
- blob_reader,
207
- write_access: WriteAccess::write(write_ctx),
208
- functions,
209
- branch_binding: BranchBinding::active(active_branch_id),
210
- options,
211
- }
212
- }
213
-
214
- pub(crate) fn by_branch(
215
- live_state: Arc<dyn LiveStateReader>,
216
- branch_ref: Arc<dyn BranchRefReader>,
217
- blob_reader: Arc<dyn BlobDataReader>,
218
- functions: FunctionProviderHandle,
219
- ) -> Self {
220
- Self {
221
- schema: lix_file_by_branch_schema(),
222
- live_state,
223
- branch_ref,
224
- blob_reader,
225
- write_access: WriteAccess::read_only(),
226
- functions,
227
- branch_binding: BranchBinding::explicit(),
228
- options: SqlWriteSessionOptions::default(),
229
- }
230
- }
231
-
232
- pub(crate) fn by_branch_with_write(
233
- write_ctx: SqlWriteContext,
234
- options: SqlWriteSessionOptions,
235
- ) -> Self {
236
- let functions = write_ctx.functions();
237
- let live_state = Arc::new(WriteContextLiveStateReader::new(write_ctx.clone()));
238
- let branch_ref = Arc::new(WriteContextBranchRefReader::new(write_ctx.clone()));
239
- let blob_reader = write_ctx.blob_reader();
240
- Self {
241
- schema: lix_file_by_branch_schema(),
242
- live_state,
243
- branch_ref,
244
- blob_reader,
245
- write_access: WriteAccess::write(write_ctx),
246
- functions,
247
- branch_binding: BranchBinding::explicit(),
248
- options,
249
- }
250
- }
251
- }
252
-
253
- #[async_trait]
254
- impl TableProvider for LixFileProvider {
255
- fn as_any(&self) -> &dyn Any {
256
- self
257
- }
258
-
259
- fn schema(&self) -> SchemaRef {
260
- Arc::clone(&self.schema)
261
- }
262
-
263
- fn table_type(&self) -> TableType {
264
- TableType::Base
265
- }
266
-
267
- fn supports_filters_pushdown(
268
- &self,
269
- filters: &[&Expr],
270
- ) -> Result<Vec<TableProviderFilterPushDown>> {
271
- let analyzer = LixFileIdFilterAnalyzer;
272
- Ok(filters
273
- .iter()
274
- .map(|filter| {
275
- if ExactStringColumnFilterAnalyzer::new("lixcol_branch_id").supports(filter)
276
- || analyzer.supports(filter)
277
- || contains_column(filter, "path")
278
- {
279
- TableProviderFilterPushDown::Exact
280
- } else {
281
- TableProviderFilterPushDown::Unsupported
282
- }
283
- })
284
- .collect())
285
- }
286
-
287
- async fn scan(
288
- &self,
289
- _state: &dyn Session,
290
- projection: Option<&Vec<usize>>,
291
- filters: &[Expr],
292
- limit: Option<usize>,
293
- ) -> Result<Arc<dyn ExecutionPlan>> {
294
- let projected_schema = projected_schema(&self.schema, projection)?;
295
- let scan_limit = if filters.is_empty() { limit } else { None };
296
- let mut request = lix_file_scan_request(
297
- self.branch_binding.active_branch_id(),
298
- Some(projected_schema.as_ref()),
299
- scan_limit,
300
- );
301
- request.filter.branch_ids = resolve_provider_branch_ids(
302
- self.branch_ref.as_ref(),
303
- &self.branch_binding,
304
- request.filter.branch_ids,
305
- )
306
- .await
307
- .map_err(lix_error_to_datafusion_error)?;
308
- let filters = canonicalize_filesystem_path_filters(filters, FilesystemPathKind::File)?;
309
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
310
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
311
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
312
- let physical_filters = filters
313
- .iter()
314
- .map(|expr| create_physical_expr(expr, &df_schema, _state.execution_props()))
315
- .collect::<Result<Vec<_>>>()?;
316
- Ok(Arc::new(LixFileScanExec::new(
317
- Arc::clone(&self.live_state),
318
- Arc::clone(&self.blob_reader),
319
- Arc::clone(&self.schema),
320
- projected_schema,
321
- projection.cloned(),
322
- request,
323
- target_file_ids,
324
- physical_filters,
325
- limit,
326
- )))
327
- }
328
-
329
- async fn insert_into(
330
- &self,
331
- _state: &dyn Session,
332
- input: Arc<dyn ExecutionPlan>,
333
- insert_op: InsertOp,
334
- ) -> Result<Arc<dyn ExecutionPlan>> {
335
- if insert_op != InsertOp::Append {
336
- return not_impl_err!("{insert_op} not implemented for lix_file yet");
337
- }
338
- let write_ctx = self.write_access.require_write("INSERT into lix_file")?;
339
- self.schema
340
- .logically_equivalent_names_and_types(&input.schema())?;
341
- let insert_intents = InsertColumnIntents::from_input(&input);
342
- let include_data_writes = self.schema.field_with_name("data").is_ok()
343
- && insert_intents.includes_column("data")
344
- && !self.options.omitted_insert_columns.contains("data");
345
- if include_data_writes {
346
- reject_non_binary_casts_for_insert_column(&input, "data", "INSERT into lix_file")?;
347
- }
348
- let sink = LixFileInsertSink::new(
349
- write_ctx,
350
- self.functions.clone(),
351
- self.branch_binding.clone(),
352
- include_data_writes,
353
- );
354
- Ok(Arc::new(InsertExec::new(input, Arc::new(sink))))
355
- }
356
-
357
- async fn delete_from(
358
- &self,
359
- state: &dyn Session,
360
- filters: Vec<Expr>,
361
- ) -> Result<Arc<dyn ExecutionPlan>> {
362
- let write_ctx = self.write_access.require_write("DELETE FROM lix_file")?;
363
- let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
364
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
365
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
366
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
367
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
368
- let physical_filters = filters
369
- .iter()
370
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
371
- .collect::<Result<Vec<_>>>()?;
372
- let mut request = lix_file_scan_request(self.branch_binding.active_branch_id(), None, None);
373
- request.filter.branch_ids = explicit_branch_ids_from_dml_filters(&filters);
374
- request.filter.branch_ids = resolve_provider_branch_ids(
375
- self.branch_ref.as_ref(),
376
- &self.branch_binding,
377
- request.filter.branch_ids,
378
- )
379
- .await
380
- .map_err(lix_error_to_datafusion_error)?;
381
- Ok(Arc::new(LixFileDeleteExec::new(
382
- Arc::clone(&self.blob_reader),
383
- write_ctx,
384
- Arc::clone(&self.schema),
385
- self.branch_binding.clone(),
386
- request,
387
- target_file_ids,
388
- physical_filters,
389
- )))
390
- }
391
-
392
- async fn update(
393
- &self,
394
- state: &dyn Session,
395
- assignments: Vec<(String, Expr)>,
396
- filters: Vec<Expr>,
397
- ) -> Result<Arc<dyn ExecutionPlan>> {
398
- let write_ctx = self.write_access.require_write("UPDATE lix_file")?;
399
- validate_lix_file_update_assignments(&self.schema, &assignments)?;
400
- let filters = canonicalize_filesystem_path_filters(&filters, FilesystemPathKind::File)?;
401
- let filters = canonicalize_json_identity_text_filters(self.schema.as_ref(), &filters)?;
402
- let target_file_ids = file_id_constraint_from_filters(&filters)?;
403
- let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
404
- validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
405
- let physical_assignments = assignments
406
- .iter()
407
- .map(|(column_name, expr)| {
408
- Ok((
409
- column_name.clone(),
410
- create_physical_expr(expr, &df_schema, state.execution_props())?,
411
- ))
412
- })
413
- .collect::<Result<Vec<_>>>()?;
414
- let physical_filters = filters
415
- .iter()
416
- .map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
417
- .collect::<Result<Vec<_>>>()?;
418
- let mut request = lix_file_scan_request(self.branch_binding.active_branch_id(), None, None);
419
- request.filter.branch_ids = explicit_branch_ids_from_dml_filters(&filters);
420
- request.filter.branch_ids = resolve_provider_branch_ids(
421
- self.branch_ref.as_ref(),
422
- &self.branch_binding,
423
- request.filter.branch_ids,
424
- )
425
- .await
426
- .map_err(lix_error_to_datafusion_error)?;
427
- Ok(Arc::new(LixFileUpdateExec::new(
428
- Arc::clone(&self.blob_reader),
429
- write_ctx,
430
- Arc::clone(&self.schema),
431
- self.branch_binding.clone(),
432
- self.functions.clone(),
433
- request,
434
- target_file_ids,
435
- physical_assignments,
436
- physical_filters,
437
- )))
438
- }
439
- }
440
-
441
- #[allow(dead_code)]
442
- struct LixFileInsertSink {
443
- write_ctx: SqlWriteContext,
444
- functions: FunctionProviderHandle,
445
- branch_binding: BranchBinding,
446
- surface_name: &'static str,
447
- include_data_writes: bool,
448
- }
449
-
450
- impl std::fmt::Debug for LixFileInsertSink {
451
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
452
- f.debug_struct("LixFileInsertSink").finish()
453
- }
454
- }
455
-
456
- impl LixFileInsertSink {
457
- fn new(
458
- write_ctx: SqlWriteContext,
459
- functions: FunctionProviderHandle,
460
- branch_binding: BranchBinding,
461
- include_data_writes: bool,
462
- ) -> Self {
463
- let surface_name = lix_file_surface_name(&branch_binding);
464
- Self {
465
- write_ctx,
466
- functions,
467
- branch_binding,
468
- surface_name,
469
- include_data_writes,
470
- }
471
- }
472
- }
473
-
474
- impl DisplayAs for LixFileInsertSink {
475
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476
- match t {
477
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
478
- write!(f, "LixFileInsertSink")
479
- }
480
- DisplayFormatType::TreeRender => write!(f, "LixFileInsertSink"),
481
- }
482
- }
483
- }
484
-
485
- #[async_trait]
486
- impl InsertSink for LixFileInsertSink {
487
- async fn write_batches(
488
- &self,
489
- batches: Vec<RecordBatch>,
490
- _context: &Arc<TaskContext>,
491
- ) -> Result<u64> {
492
- let mut staged = LixFileStagedBatch::default();
493
- let mut path_resolvers = None;
494
- for batch in batches {
495
- if path_resolvers.is_none() {
496
- path_resolvers = Some(
497
- file_path_resolvers_from_live_state(
498
- Arc::new(WriteContextLiveStateReader::new(self.write_ctx.clone())),
499
- self.branch_binding.active_branch_id(),
500
- )
501
- .await
502
- .map_err(lix_error_to_datafusion_error)?,
503
- );
504
- }
505
- if record_batch_has_non_null_column(&batch, "path")? {
506
- staged.extend(lix_file_insert_stage_from_batch_with_path_resolvers(
507
- &batch,
508
- self.branch_binding.active_branch_id(),
509
- self.surface_name,
510
- path_resolvers
511
- .as_mut()
512
- .expect("path resolver should be initialized"),
513
- &mut || self.functions.call_uuid_v7(),
514
- self.include_data_writes,
515
- )?);
516
- } else {
517
- staged.extend(
518
- lix_file_insert_stage_from_batch_with_id_generator_and_path_resolvers(
519
- &batch,
520
- self.branch_binding.active_branch_id(),
521
- self.surface_name,
522
- path_resolvers
523
- .as_mut()
524
- .expect("path resolver should be initialized"),
525
- &mut || self.functions.call_uuid_v7(),
526
- self.include_data_writes,
527
- )?,
528
- );
529
- }
530
- }
531
-
532
- if !staged.state_rows.is_empty() || !staged.file_data_writes.is_empty() {
533
- let intent = if staged.file_data_writes.is_empty() {
534
- TransactionWrite::Rows {
535
- mode: TransactionWriteMode::Insert,
536
- rows: staged.state_rows,
537
- }
538
- } else {
539
- TransactionWrite::RowsWithFileData {
540
- mode: TransactionWriteMode::Insert,
541
- rows: staged.state_rows,
542
- file_data: staged.file_data_writes,
543
- count: staged.count,
544
- }
545
- };
546
- self.write_ctx
547
- .stage_write(intent)
548
- .await
549
- .map_err(lix_error_to_datafusion_error)?;
550
- }
551
-
552
- Ok(staged.count)
553
- }
554
- }
555
-
556
- fn lix_file_surface_name(branch_binding: &BranchBinding) -> &'static str {
557
- match branch_binding {
558
- BranchBinding::Active { .. } => "lix_file",
559
- BranchBinding::Explicit => "lix_file_by_branch",
560
- }
561
- }
562
-
563
- #[allow(dead_code)]
564
- struct LixFileDeleteExec {
565
- blob_reader: Arc<dyn BlobDataReader>,
566
- write_ctx: SqlWriteContext,
567
- table_schema: SchemaRef,
568
- branch_binding: BranchBinding,
569
- request: LiveStateScanRequest,
570
- target_file_ids: FileIdConstraint,
571
- filters: Vec<Arc<dyn PhysicalExpr>>,
572
- result_schema: SchemaRef,
573
- properties: Arc<PlanProperties>,
574
- }
575
-
576
- impl std::fmt::Debug for LixFileDeleteExec {
577
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
578
- f.debug_struct("LixFileDeleteExec").finish()
579
- }
580
- }
581
-
582
- impl LixFileDeleteExec {
583
- fn new(
584
- blob_reader: Arc<dyn BlobDataReader>,
585
- write_ctx: SqlWriteContext,
586
- table_schema: SchemaRef,
587
- branch_binding: BranchBinding,
588
- request: LiveStateScanRequest,
589
- target_file_ids: FileIdConstraint,
590
- filters: Vec<Arc<dyn PhysicalExpr>>,
591
- ) -> Self {
592
- let result_schema = dml_count_schema();
593
- let properties = PlanProperties::new(
594
- EquivalenceProperties::new(Arc::clone(&result_schema)),
595
- Partitioning::UnknownPartitioning(1),
596
- EmissionType::Final,
597
- Boundedness::Bounded,
598
- );
599
- Self {
600
- blob_reader,
601
- write_ctx,
602
- table_schema,
603
- branch_binding,
604
- request,
605
- target_file_ids,
606
- filters,
607
- result_schema,
608
- properties: Arc::new(properties),
609
- }
610
- }
611
- }
612
-
613
- impl DisplayAs for LixFileDeleteExec {
614
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
615
- match t {
616
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
617
- write!(f, "LixFileDeleteExec(filters={})", self.filters.len())
618
- }
619
- DisplayFormatType::TreeRender => write!(f, "LixFileDeleteExec"),
620
- }
621
- }
622
- }
623
-
624
- impl ExecutionPlan for LixFileDeleteExec {
625
- fn name(&self) -> &str {
626
- "LixFileDeleteExec"
627
- }
628
-
629
- fn as_any(&self) -> &dyn Any {
630
- self
631
- }
632
-
633
- fn properties(&self) -> &Arc<PlanProperties> {
634
- &self.properties
635
- }
636
-
637
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
638
- Vec::new()
639
- }
640
-
641
- fn with_new_children(
642
- self: Arc<Self>,
643
- children: Vec<Arc<dyn ExecutionPlan>>,
644
- ) -> Result<Arc<dyn ExecutionPlan>> {
645
- if !children.is_empty() {
646
- return Err(DataFusionError::Execution(
647
- "LixFileDeleteExec does not accept children".to_string(),
648
- ));
649
- }
650
- Ok(self)
651
- }
652
-
653
- fn execute(
654
- &self,
655
- partition: usize,
656
- _context: Arc<TaskContext>,
657
- ) -> Result<SendableRecordBatchStream> {
658
- if partition != 0 {
659
- return Err(DataFusionError::Execution(format!(
660
- "LixFileDeleteExec only exposes one partition, got {partition}"
661
- )));
662
- }
663
-
664
- let blob_reader = Arc::clone(&self.blob_reader);
665
- let write_ctx = self.write_ctx.clone();
666
- let table_schema = Arc::clone(&self.table_schema);
667
- let branch_binding = self.branch_binding.clone();
668
- let request = self.request.clone();
669
- let target_file_ids = self.target_file_ids.clone();
670
- let filters = self.filters.clone();
671
- let result_schema = Arc::clone(&self.result_schema);
672
- let stream_schema = Arc::clone(&result_schema);
673
-
674
- let stream = stream::once(async move {
675
- let rows = scan_lix_file_live_rows(
676
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
677
- &request,
678
- &target_file_ids,
679
- )
680
- .await
681
- .map_err(lix_error_to_datafusion_error)?;
682
- let blob_ref_file_ids =
683
- blob_ref_file_ids_from_live_rows(&rows).map_err(lix_error_to_datafusion_error)?;
684
- let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
685
- .await
686
- .map_err(lix_error_to_datafusion_error)?;
687
- let matched_batch = filter_lix_file_batch(source_batch, &filters)?;
688
- let staged = lix_file_delete_stage_from_batch(
689
- &matched_batch,
690
- branch_binding.active_branch_id(),
691
- &blob_ref_file_ids,
692
- )?;
693
- let count = staged.count;
694
-
695
- if count > 0 {
696
- write_ctx
697
- .stage_write(TransactionWrite::Rows {
698
- mode: TransactionWriteMode::Replace,
699
- rows: staged.state_rows,
700
- })
701
- .await
702
- .map_err(lix_error_to_datafusion_error)?;
703
- }
704
-
705
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
706
- dml_count_batch(Arc::clone(&stream_schema), count)?,
707
- )]))
708
- })
709
- .try_flatten();
710
-
711
- Ok(Box::pin(RecordBatchStreamAdapter::new(
712
- result_schema,
713
- stream,
714
- )))
715
- }
716
- }
717
-
718
- #[allow(dead_code)]
719
- struct LixFileUpdateExec {
720
- blob_reader: Arc<dyn BlobDataReader>,
721
- write_ctx: SqlWriteContext,
722
- table_schema: SchemaRef,
723
- branch_binding: BranchBinding,
724
- functions: FunctionProviderHandle,
725
- request: LiveStateScanRequest,
726
- target_file_ids: FileIdConstraint,
727
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
728
- filters: Vec<Arc<dyn PhysicalExpr>>,
729
- result_schema: SchemaRef,
730
- properties: Arc<PlanProperties>,
731
- }
732
-
733
- impl std::fmt::Debug for LixFileUpdateExec {
734
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
735
- f.debug_struct("LixFileUpdateExec").finish()
736
- }
737
- }
738
-
739
- impl LixFileUpdateExec {
740
- fn new(
741
- blob_reader: Arc<dyn BlobDataReader>,
742
- write_ctx: SqlWriteContext,
743
- table_schema: SchemaRef,
744
- branch_binding: BranchBinding,
745
- functions: FunctionProviderHandle,
746
- request: LiveStateScanRequest,
747
- target_file_ids: FileIdConstraint,
748
- assignments: Vec<(String, Arc<dyn PhysicalExpr>)>,
749
- filters: Vec<Arc<dyn PhysicalExpr>>,
750
- ) -> Self {
751
- let result_schema = dml_count_schema();
752
- let properties = PlanProperties::new(
753
- EquivalenceProperties::new(Arc::clone(&result_schema)),
754
- Partitioning::UnknownPartitioning(1),
755
- EmissionType::Final,
756
- Boundedness::Bounded,
757
- );
758
- Self {
759
- blob_reader,
760
- write_ctx,
761
- table_schema,
762
- branch_binding,
763
- functions,
764
- request,
765
- target_file_ids,
766
- assignments,
767
- filters,
768
- result_schema,
769
- properties: Arc::new(properties),
770
- }
771
- }
772
- }
773
-
774
- impl DisplayAs for LixFileUpdateExec {
775
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
776
- match t {
777
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
778
- write!(
779
- f,
780
- "LixFileUpdateExec(assignments={}, filters={})",
781
- self.assignments.len(),
782
- self.filters.len()
783
- )
784
- }
785
- DisplayFormatType::TreeRender => write!(f, "LixFileUpdateExec"),
786
- }
787
- }
788
- }
789
-
790
- impl ExecutionPlan for LixFileUpdateExec {
791
- fn name(&self) -> &str {
792
- "LixFileUpdateExec"
793
- }
794
-
795
- fn as_any(&self) -> &dyn Any {
796
- self
797
- }
798
-
799
- fn properties(&self) -> &Arc<PlanProperties> {
800
- &self.properties
801
- }
802
-
803
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
804
- Vec::new()
805
- }
806
-
807
- fn with_new_children(
808
- self: Arc<Self>,
809
- children: Vec<Arc<dyn ExecutionPlan>>,
810
- ) -> Result<Arc<dyn ExecutionPlan>> {
811
- if !children.is_empty() {
812
- return Err(DataFusionError::Execution(
813
- "LixFileUpdateExec does not accept children".to_string(),
814
- ));
815
- }
816
- Ok(self)
817
- }
818
-
819
- fn execute(
820
- &self,
821
- partition: usize,
822
- _context: Arc<TaskContext>,
823
- ) -> Result<SendableRecordBatchStream> {
824
- if partition != 0 {
825
- return Err(DataFusionError::Execution(format!(
826
- "LixFileUpdateExec only exposes one partition, got {partition}"
827
- )));
828
- }
829
-
830
- let blob_reader = Arc::clone(&self.blob_reader);
831
- let write_ctx = self.write_ctx.clone();
832
- let table_schema = Arc::clone(&self.table_schema);
833
- let branch_binding = self.branch_binding.clone();
834
- let functions = self.functions.clone();
835
- let request = self.request.clone();
836
- let target_file_ids = self.target_file_ids.clone();
837
- let assignments = self.assignments.clone();
838
- let filters = self.filters.clone();
839
- let result_schema = Arc::clone(&self.result_schema);
840
- let stream_schema = Arc::clone(&result_schema);
841
-
842
- let stream = stream::once(async move {
843
- let rows = scan_lix_file_live_rows(
844
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
845
- &request,
846
- &target_file_ids,
847
- )
848
- .await
849
- .map_err(lix_error_to_datafusion_error)?;
850
- let source_batch = lix_file_record_batch(&table_schema, &blob_reader, rows)
851
- .await
852
- .map_err(lix_error_to_datafusion_error)?;
853
- let matched_batch = filter_lix_file_batch(source_batch, &filters)?;
854
- let assignment_values = UpdateAssignmentValues::evaluate(&matched_batch, &assignments)?;
855
- let update_columns = LixFileUpdateColumns::from_assignments(&assignments);
856
- let mut path_resolvers = None;
857
- if update_columns.path || update_columns.descriptor {
858
- path_resolvers = Some(
859
- file_path_resolvers_from_live_state(
860
- Arc::new(WriteContextLiveStateReader::new(write_ctx.clone())),
861
- branch_binding.active_branch_id(),
862
- )
863
- .await
864
- .map_err(lix_error_to_datafusion_error)?,
865
- );
866
- }
867
- let staged = lix_file_update_stage_from_batch(
868
- &matched_batch,
869
- &assignment_values,
870
- branch_binding.active_branch_id(),
871
- update_columns,
872
- path_resolvers.as_mut(),
873
- &mut || functions.call_uuid_v7(),
874
- )?;
875
- let count = staged.count;
876
-
877
- if count > 0 {
878
- let intent = if staged.file_data_writes.is_empty() {
879
- TransactionWrite::Rows {
880
- mode: TransactionWriteMode::Replace,
881
- rows: staged.state_rows,
882
- }
883
- } else {
884
- TransactionWrite::RowsWithFileData {
885
- mode: TransactionWriteMode::Replace,
886
- rows: staged.state_rows,
887
- file_data: staged.file_data_writes,
888
- count,
889
- }
890
- };
891
- write_ctx
892
- .stage_write(intent)
893
- .await
894
- .map_err(lix_error_to_datafusion_error)?;
895
- }
896
-
897
- Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
898
- dml_count_batch(Arc::clone(&stream_schema), count)?,
899
- )]))
900
- })
901
- .try_flatten();
902
-
903
- Ok(Box::pin(RecordBatchStreamAdapter::new(
904
- result_schema,
905
- stream,
906
- )))
907
- }
908
- }
909
-
910
- struct LixFileScanExec {
911
- live_state: Arc<dyn LiveStateReader>,
912
- blob_reader: Arc<dyn BlobDataReader>,
913
- batch_schema: SchemaRef,
914
- output_schema: SchemaRef,
915
- projection: Option<Vec<usize>>,
916
- request: LiveStateScanRequest,
917
- target_file_ids: FileIdConstraint,
918
- filters: Vec<Arc<dyn PhysicalExpr>>,
919
- limit: Option<usize>,
920
- properties: Arc<PlanProperties>,
921
- }
922
-
923
- impl std::fmt::Debug for LixFileScanExec {
924
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
925
- f.debug_struct("LixFileScanExec").finish()
926
- }
927
- }
928
-
929
- impl LixFileScanExec {
930
- fn new(
931
- live_state: Arc<dyn LiveStateReader>,
932
- blob_reader: Arc<dyn BlobDataReader>,
933
- batch_schema: SchemaRef,
934
- output_schema: SchemaRef,
935
- projection: Option<Vec<usize>>,
936
- request: LiveStateScanRequest,
937
- target_file_ids: FileIdConstraint,
938
- filters: Vec<Arc<dyn PhysicalExpr>>,
939
- limit: Option<usize>,
940
- ) -> Self {
941
- let properties = PlanProperties::new(
942
- EquivalenceProperties::new(output_schema.clone()),
943
- Partitioning::UnknownPartitioning(1),
944
- EmissionType::Incremental,
945
- Boundedness::Bounded,
946
- );
947
- Self {
948
- live_state,
949
- blob_reader,
950
- batch_schema,
951
- output_schema,
952
- projection,
953
- request,
954
- target_file_ids,
955
- filters,
956
- limit,
957
- properties: Arc::new(properties),
958
- }
959
- }
960
- }
961
-
962
- impl DisplayAs for LixFileScanExec {
963
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
964
- match t {
965
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
966
- write!(f, "LixFileScanExec(limit={:?})", self.limit)
967
- }
968
- DisplayFormatType::TreeRender => write!(f, "LixFileScanExec"),
969
- }
970
- }
971
- }
972
-
973
- impl ExecutionPlan for LixFileScanExec {
974
- fn name(&self) -> &str {
975
- "LixFileScanExec"
976
- }
977
-
978
- fn as_any(&self) -> &dyn Any {
979
- self
980
- }
981
-
982
- fn properties(&self) -> &Arc<PlanProperties> {
983
- &self.properties
984
- }
985
-
986
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
987
- Vec::new()
988
- }
989
-
990
- fn with_new_children(
991
- self: Arc<Self>,
992
- children: Vec<Arc<dyn ExecutionPlan>>,
993
- ) -> Result<Arc<dyn ExecutionPlan>> {
994
- if !children.is_empty() {
995
- return Err(DataFusionError::Execution(
996
- "LixFileScanExec does not accept children".to_string(),
997
- ));
998
- }
999
- Ok(self)
1000
- }
1001
-
1002
- fn execute(
1003
- &self,
1004
- partition: usize,
1005
- _context: Arc<TaskContext>,
1006
- ) -> Result<SendableRecordBatchStream> {
1007
- if partition != 0 {
1008
- return Err(DataFusionError::Execution(format!(
1009
- "LixFileScanExec only supports partition 0, got {partition}"
1010
- )));
1011
- }
1012
-
1013
- let live_state = Arc::clone(&self.live_state);
1014
- let blob_reader = Arc::clone(&self.blob_reader);
1015
- let request = self.request.clone();
1016
- let target_file_ids = self.target_file_ids.clone();
1017
- let filters = self.filters.clone();
1018
- let limit = self.limit;
1019
- let output_schema = Arc::clone(&self.output_schema);
1020
- let batch_schema = Arc::clone(&self.batch_schema);
1021
- let projection = self.projection.clone();
1022
- let fut = async move {
1023
- let rows = scan_lix_file_live_rows(live_state, &request, &target_file_ids)
1024
- .await
1025
- .map_err(|error| {
1026
- DataFusionError::Execution(format!("sql2 lix_file scan failed: {error}"))
1027
- })?;
1028
- let batch = lix_file_record_batch(&batch_schema, &blob_reader, rows)
1029
- .await
1030
- .map_err(|error| {
1031
- DataFusionError::Execution(format!("sql2 lix_file batch build failed: {error}"))
1032
- })?;
1033
- let filtered = filter_lix_file_batch(batch, &filters)?;
1034
- let projected = match projection {
1035
- Some(indices) => filtered.project(&indices).map_err(DataFusionError::from),
1036
- None => Ok(filtered),
1037
- }?;
1038
- match limit {
1039
- Some(limit) => Ok(projected.slice(0, limit.min(projected.num_rows()))),
1040
- None => Ok(projected),
1041
- }
1042
- };
1043
-
1044
- Ok(Box::pin(RecordBatchStreamAdapter::new(
1045
- output_schema,
1046
- stream::once(fut).map_ok(|batch| batch),
1047
- )))
1048
- }
1049
- }
1050
-
1051
- #[derive(Debug, Clone)]
1052
- struct FileDescriptorRecord {
1053
- id: String,
1054
- directory_id: Option<String>,
1055
- name: String,
1056
- hidden: bool,
1057
- live: MaterializedLiveStateRow,
1058
- }
1059
-
1060
- #[derive(Debug, Clone)]
1061
- struct BlobRefRecord {
1062
- blob_hash: String,
1063
- }
1064
-
1065
- #[derive(Debug, Clone)]
1066
- struct DirectoryDescriptorRecord {
1067
- id: String,
1068
- parent_id: Option<String>,
1069
- name: String,
1070
- branch_id: String,
1071
- }
1072
-
1073
- #[derive(Debug, Deserialize)]
1074
- struct FileDescriptorSnapshot {
1075
- id: String,
1076
- directory_id: Option<String>,
1077
- name: String,
1078
- hidden: bool,
1079
- }
1080
-
1081
- #[derive(Debug, Deserialize)]
1082
- struct BlobRefSnapshot {
1083
- id: String,
1084
- blob_hash: String,
1085
- }
1086
-
1087
- #[derive(Debug, Deserialize)]
1088
- struct DirectoryDescriptorSnapshot {
1089
- id: String,
1090
- parent_id: Option<String>,
1091
- name: String,
1092
- }
1093
-
1094
- #[derive(Debug, Default)]
1095
- struct LixFileStagedBatch {
1096
- state_rows: Vec<TransactionWriteRow>,
1097
- file_data_writes: Vec<TransactionFileData>,
1098
- count: u64,
1099
- }
1100
-
1101
- impl LixFileStagedBatch {
1102
- fn extend(&mut self, other: LixFileStagedBatch) {
1103
- self.state_rows.extend(other.state_rows);
1104
- self.file_data_writes.extend(other.file_data_writes);
1105
- self.count += other.count;
1106
- }
1107
-
1108
- fn extend_filesystem_plan(
1109
- &mut self,
1110
- plan: crate::sql2::filesystem_planner::FilesystemWritePlan,
1111
- ) {
1112
- self.state_rows.extend(plan.rows);
1113
- self.file_data_writes.extend(plan.file_data);
1114
- self.count += plan.count;
1115
- }
1116
-
1117
- fn extend_filesystem_delete_plan(&mut self, plan: FilesystemDeletePlan) {
1118
- self.state_rows.extend(plan.rows);
1119
- self.count += plan.count;
1120
- }
1121
- }
1122
-
1123
- #[cfg(test)]
1124
- fn lix_file_write_rows_from_batch(
1125
- batch: &RecordBatch,
1126
- branch_binding: Option<&str>,
1127
- ) -> Result<Vec<TransactionWriteRow>> {
1128
- Ok(lix_file_insert_stage_from_batch(batch, branch_binding)?.state_rows)
1129
- }
1130
-
1131
- fn lix_file_delete_stage_from_batch(
1132
- batch: &RecordBatch,
1133
- branch_binding: Option<&str>,
1134
- blob_ref_file_ids: &BTreeSet<String>,
1135
- ) -> Result<LixFileStagedBatch> {
1136
- let mut staged = LixFileStagedBatch::default();
1137
- for row_index in 0..batch.num_rows() {
1138
- let file_id = required_string_value(batch, row_index, "id")?;
1139
- let context = file_row_context_from_batch(batch, row_index, branch_binding)?;
1140
- staged.extend_filesystem_delete_plan(plan_file_delete(FileDeleteInput {
1141
- file_id: file_id.clone(),
1142
- has_blob_ref: blob_ref_file_ids.contains(&file_id),
1143
- context,
1144
- }));
1145
- }
1146
- Ok(staged)
1147
- }
1148
-
1149
- fn blob_ref_file_ids_from_live_rows(
1150
- rows: &[MaterializedLiveStateRow],
1151
- ) -> std::result::Result<BTreeSet<String>, LixError> {
1152
- let mut file_ids = BTreeSet::new();
1153
- for row in rows {
1154
- if row.schema_key != BLOB_REF_SCHEMA_KEY {
1155
- continue;
1156
- }
1157
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1158
- continue;
1159
- };
1160
- let snapshot: BlobRefSnapshot =
1161
- serde_json::from_str(snapshot_content).map_err(|error| {
1162
- LixError::new(
1163
- "LIX_ERROR_UNKNOWN",
1164
- format!("invalid lix_binary_blob_ref snapshot JSON: {error}"),
1165
- )
1166
- })?;
1167
- file_ids.insert(snapshot.id);
1168
- }
1169
- Ok(file_ids)
1170
- }
1171
-
1172
- #[cfg(test)]
1173
- fn lix_file_insert_stage_from_batch(
1174
- batch: &RecordBatch,
1175
- branch_binding: Option<&str>,
1176
- ) -> Result<LixFileStagedBatch> {
1177
- lix_file_stage_from_batch_with_options(batch, branch_binding, "lix_file", true, true, true)
1178
- }
1179
-
1180
- fn lix_file_insert_stage_from_batch_with_id_generator_and_path_resolvers(
1181
- batch: &RecordBatch,
1182
- branch_binding: Option<&str>,
1183
- surface_name: &str,
1184
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1185
- generate_id: &mut dyn FnMut() -> String,
1186
- include_data_writes: bool,
1187
- ) -> Result<LixFileStagedBatch> {
1188
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1189
- batch,
1190
- branch_binding,
1191
- surface_name,
1192
- true,
1193
- true,
1194
- include_data_writes,
1195
- Some(path_resolvers),
1196
- Some(generate_id),
1197
- )
1198
- }
1199
-
1200
- fn lix_file_insert_stage_from_batch_with_path_resolvers(
1201
- batch: &RecordBatch,
1202
- branch_binding: Option<&str>,
1203
- surface_name: &str,
1204
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1205
- generate_directory_id: &mut dyn FnMut() -> String,
1206
- include_data_writes: bool,
1207
- ) -> Result<LixFileStagedBatch> {
1208
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1209
- batch,
1210
- branch_binding,
1211
- surface_name,
1212
- true,
1213
- true,
1214
- include_data_writes,
1215
- Some(path_resolvers),
1216
- Some(generate_directory_id),
1217
- )
1218
- }
1219
-
1220
- fn lix_file_existing_update_stage_from_batch(
1221
- batch: &RecordBatch,
1222
- assignment_values: &UpdateAssignmentValues,
1223
- branch_binding: Option<&str>,
1224
- include_descriptor_writes: bool,
1225
- include_data_writes: bool,
1226
- path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1227
- ) -> Result<LixFileStagedBatch> {
1228
- let mut staged = LixFileStagedBatch::default();
1229
- let mut path_resolvers = path_resolvers;
1230
-
1231
- for row_index in 0..batch.num_rows() {
1232
- let id = required_string_value(batch, row_index, "id")?;
1233
- let hidden = update_optional_bool_value(batch, assignment_values, row_index, "hidden")?
1234
- .unwrap_or(false);
1235
- let context =
1236
- file_row_context_from_update(batch, assignment_values, row_index, branch_binding)?;
1237
-
1238
- if include_descriptor_writes {
1239
- let directory_id =
1240
- update_optional_string_value(batch, assignment_values, row_index, "directory_id")?;
1241
- let name = update_required_string_value(batch, assignment_values, row_index, "name")?;
1242
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1243
- let resolver = path_resolvers
1244
- .entry(file_path_resolver_key(&context))
1245
- .or_insert_with(DirectoryPathResolver::default);
1246
- resolver
1247
- .reserve_file(directory_id.clone(), name.clone(), id.clone())
1248
- .map_err(lix_error_to_datafusion_error)?;
1249
- }
1250
- staged
1251
- .state_rows
1252
- .push(file_descriptor_row(FileDescriptorRowInput {
1253
- id: id.clone(),
1254
- directory_id,
1255
- name,
1256
- hidden,
1257
- context: context.clone(),
1258
- }));
1259
- }
1260
-
1261
- if include_data_writes {
1262
- let data = update_required_binary_value(batch, assignment_values, row_index, "data")?;
1263
- stage_lix_file_data_write(&mut staged, id, data, context, None)?;
1264
- }
1265
-
1266
- staged.count = staged
1267
- .count
1268
- .checked_add(1)
1269
- .ok_or_else(|| DataFusionError::Execution("lix_file row count overflow".into()))?;
1270
- }
1271
-
1272
- Ok(staged)
1273
- }
1274
-
1275
- #[derive(Debug, Clone, Copy)]
1276
- struct LixFileUpdateColumns {
1277
- path: bool,
1278
- data: bool,
1279
- descriptor: bool,
1280
- }
1281
-
1282
- impl LixFileUpdateColumns {
1283
- fn from_assignments(assignments: &[(String, Arc<dyn PhysicalExpr>)]) -> Self {
1284
- let path = assignments
1285
- .iter()
1286
- .any(|(column_name, _)| column_name == "path");
1287
- let data = assignments
1288
- .iter()
1289
- .any(|(column_name, _)| column_name == "data");
1290
- let descriptor = assignments
1291
- .iter()
1292
- .any(|(column_name, _)| column_name != "path" && column_name != "data");
1293
- Self {
1294
- path,
1295
- data,
1296
- descriptor,
1297
- }
1298
- }
1299
- }
1300
-
1301
- fn lix_file_update_stage_from_batch(
1302
- batch: &RecordBatch,
1303
- assignment_values: &UpdateAssignmentValues,
1304
- branch_binding: Option<&str>,
1305
- update_columns: LixFileUpdateColumns,
1306
- path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1307
- generate_directory_id: &mut dyn FnMut() -> String,
1308
- ) -> Result<LixFileStagedBatch> {
1309
- if update_columns.path || update_columns.descriptor {
1310
- let Some(path_resolvers) = path_resolvers else {
1311
- return Err(DataFusionError::Execution(
1312
- "UPDATE lix_file requires filesystem path resolver".to_string(),
1313
- ));
1314
- };
1315
- return if update_columns.path {
1316
- lix_file_path_update_stage_from_batch(
1317
- batch,
1318
- assignment_values,
1319
- branch_binding,
1320
- update_columns,
1321
- path_resolvers,
1322
- generate_directory_id,
1323
- )
1324
- } else {
1325
- lix_file_existing_update_stage_from_batch(
1326
- batch,
1327
- assignment_values,
1328
- branch_binding,
1329
- update_columns.descriptor,
1330
- update_columns.data,
1331
- Some(path_resolvers),
1332
- )
1333
- };
1334
- }
1335
-
1336
- lix_file_existing_update_stage_from_batch(
1337
- batch,
1338
- assignment_values,
1339
- branch_binding,
1340
- update_columns.descriptor,
1341
- update_columns.data,
1342
- None,
1343
- )
1344
- }
1345
-
1346
- fn lix_file_path_update_stage_from_batch(
1347
- batch: &RecordBatch,
1348
- assignment_values: &UpdateAssignmentValues,
1349
- branch_binding: Option<&str>,
1350
- update_columns: LixFileUpdateColumns,
1351
- path_resolvers: &mut BTreeMap<String, DirectoryPathResolver>,
1352
- generate_directory_id: &mut dyn FnMut() -> String,
1353
- ) -> Result<LixFileStagedBatch> {
1354
- let mut staged = LixFileStagedBatch::default();
1355
-
1356
- for row_index in 0..batch.num_rows() {
1357
- let id = required_string_value(batch, row_index, "id")?;
1358
- let path = update_required_string_value(batch, assignment_values, row_index, "path")?;
1359
- let hidden = update_optional_bool_value(batch, assignment_values, row_index, "hidden")?
1360
- .unwrap_or(false);
1361
- let context =
1362
- file_row_context_from_update(batch, assignment_values, row_index, branch_binding)?;
1363
- let assigned_data = if update_columns.data {
1364
- Some(update_required_binary_value(
1365
- batch,
1366
- assignment_values,
1367
- row_index,
1368
- "data",
1369
- )?)
1370
- } else {
1371
- None
1372
- };
1373
-
1374
- let resolver = path_resolvers
1375
- .entry(file_path_resolver_key(&context))
1376
- .or_insert_with(DirectoryPathResolver::default);
1377
- let plan = plan_file_path_update(
1378
- resolver,
1379
- id.clone(),
1380
- path,
1381
- hidden,
1382
- None,
1383
- context.clone(),
1384
- generate_directory_id,
1385
- )
1386
- .map_err(lix_error_to_datafusion_error)?;
1387
- staged.extend_filesystem_plan(plan);
1388
-
1389
- if let Some(data) = assigned_data {
1390
- stage_lix_file_data_write(&mut staged, id, data, context, None)?;
1391
- }
1392
- }
1393
-
1394
- Ok(staged)
1395
- }
1396
-
1397
- #[cfg(test)]
1398
- fn lix_file_stage_from_batch_with_options(
1399
- batch: &RecordBatch,
1400
- branch_binding: Option<&str>,
1401
- surface_name: &str,
1402
- reject_read_only_fields: bool,
1403
- include_descriptor_writes: bool,
1404
- include_data_writes: bool,
1405
- ) -> Result<LixFileStagedBatch> {
1406
- lix_file_stage_from_batch_with_options_and_path_resolvers(
1407
- batch,
1408
- branch_binding,
1409
- surface_name,
1410
- reject_read_only_fields,
1411
- include_descriptor_writes,
1412
- include_data_writes,
1413
- None,
1414
- None,
1415
- )
1416
- }
1417
-
1418
- fn lix_file_stage_from_batch_with_options_and_path_resolvers(
1419
- batch: &RecordBatch,
1420
- branch_binding: Option<&str>,
1421
- surface_name: &str,
1422
- reject_read_only_fields: bool,
1423
- include_descriptor_writes: bool,
1424
- include_data_writes: bool,
1425
- mut path_resolvers: Option<&mut BTreeMap<String, DirectoryPathResolver>>,
1426
- mut generate_directory_id: Option<&mut dyn FnMut() -> String>,
1427
- ) -> Result<LixFileStagedBatch> {
1428
- let mut staged = LixFileStagedBatch::default();
1429
-
1430
- for row_index in 0..batch.num_rows() {
1431
- if reject_read_only_fields {
1432
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_entity_pk")?;
1433
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_schema_key")?;
1434
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_change_id")?;
1435
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_created_at")?;
1436
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_updated_at")?;
1437
- reject_read_only_lix_file_insert_field(batch, row_index, "lixcol_commit_id")?;
1438
- }
1439
-
1440
- let path = optional_string_value(batch, row_index, "path")?;
1441
- let id = optional_string_value(batch, row_index, "id")?;
1442
- let hidden = optional_bool_value(batch, row_index, "hidden")?;
1443
- let context = file_row_context_from_batch(batch, row_index, branch_binding)?;
1444
- let data = if include_data_writes {
1445
- insert_optional_binary_value(batch, row_index, "data")?
1446
- } else {
1447
- None
1448
- };
1449
-
1450
- if let Some(path) = path {
1451
- reject_read_only_lix_file_insert_field(batch, row_index, "directory_id")?;
1452
- reject_read_only_lix_file_insert_field(batch, row_index, "name")?;
1453
-
1454
- let Some(path_resolvers) = path_resolvers.as_deref_mut() else {
1455
- return Err(DataFusionError::Execution(
1456
- "INSERT into lix_file with path requires directory path resolver".to_string(),
1457
- ));
1458
- };
1459
- let resolver = path_resolvers
1460
- .entry(file_path_resolver_key(&context))
1461
- .or_insert_with(DirectoryPathResolver::default);
1462
- let Some(generate_directory_id) = generate_directory_id.as_deref_mut() else {
1463
- return Err(DataFusionError::Execution(
1464
- "INSERT into lix_file with path requires directory id generator".to_string(),
1465
- ));
1466
- };
1467
- let file_id = id.unwrap_or_else(|| generate_directory_id());
1468
- let mut plan = crate::sql2::filesystem_planner::plan_file_path_write(
1469
- resolver,
1470
- FilePathWriteInput {
1471
- id: Some(file_id.clone()),
1472
- path,
1473
- data,
1474
- hidden,
1475
- context,
1476
- },
1477
- generate_directory_id,
1478
- )
1479
- .map_err(lix_error_to_datafusion_error)?;
1480
- attach_lix_file_insert_origin(&mut plan.rows, surface_name, &file_id);
1481
- staged.extend_filesystem_plan(plan);
1482
- continue;
1483
- }
1484
-
1485
- let directory_id = optional_string_value(batch, row_index, "directory_id")?;
1486
- let name = required_string_value(batch, row_index, "name")?;
1487
-
1488
- let id = if data.is_some() {
1489
- match id {
1490
- Some(id) => Some(id),
1491
- None => {
1492
- let Some(generate_id) = generate_directory_id.as_deref_mut() else {
1493
- return Err(DataFusionError::Execution(
1494
- "INSERT into lix_file with data requires id generator".to_string(),
1495
- ));
1496
- };
1497
- Some(generate_id())
1498
- }
1499
- }
1500
- } else {
1501
- id
1502
- };
1503
-
1504
- if include_descriptor_writes {
1505
- if let Some(path_resolvers) = path_resolvers.as_deref_mut() {
1506
- if let Some(file_id) = id.as_ref() {
1507
- let resolver = path_resolvers
1508
- .entry(file_path_resolver_key(&context))
1509
- .or_insert_with(DirectoryPathResolver::default);
1510
- resolver
1511
- .reserve_file(directory_id.clone(), name.clone(), file_id.clone())
1512
- .map_err(lix_error_to_datafusion_error)?;
1513
- }
1514
- }
1515
- let mut row = file_descriptor_write_row(FileDescriptorWriteIntent {
1516
- id: id.clone(),
1517
- directory_id: directory_id.clone(),
1518
- name: name.clone(),
1519
- hidden,
1520
- context: context.clone(),
1521
- });
1522
- if let Some(file_id) = id.as_ref() {
1523
- row.origin = Some(lix_file_insert_origin(surface_name, file_id));
1524
- }
1525
- staged.state_rows.push(row);
1526
- }
1527
-
1528
- if let (Some(id), Some(data)) = (id, data) {
1529
- let origin = Some(lix_file_insert_origin(surface_name, &id));
1530
- stage_lix_file_data_write(&mut staged, id, data, context, origin)?;
1531
- }
1532
- staged.count = staged
1533
- .count
1534
- .checked_add(1)
1535
- .ok_or_else(|| DataFusionError::Execution("lix_file row count overflow".into()))?;
1536
- }
1537
-
1538
- Ok(staged)
1539
- }
1540
-
1541
- fn stage_lix_file_data_write(
1542
- staged: &mut LixFileStagedBatch,
1543
- file_id: String,
1544
- data: Vec<u8>,
1545
- context: FilesystemRowContext,
1546
- origin: Option<TransactionWriteOrigin>,
1547
- ) -> Result<()> {
1548
- let mut row = blob_ref_row(BlobRefRowInput {
1549
- file_id: file_id.clone(),
1550
- data: data.clone(),
1551
- context: FilesystemRowContext {
1552
- file_id: None,
1553
- metadata: None,
1554
- ..context.clone()
1555
- },
1556
- })
1557
- .map_err(lix_error_to_datafusion_error)?;
1558
- row.origin = origin;
1559
- staged.state_rows.push(row);
1560
- staged.file_data_writes.push(TransactionFileData {
1561
- file_id,
1562
- branch_id: context.branch_id,
1563
- untracked: context.untracked,
1564
- data,
1565
- });
1566
- Ok(())
1567
- }
1568
-
1569
- fn attach_lix_file_insert_origin(
1570
- rows: &mut [TransactionWriteRow],
1571
- surface_name: &str,
1572
- file_id: &str,
1573
- ) {
1574
- let origin = lix_file_insert_origin(surface_name, file_id);
1575
- for row in rows {
1576
- if row.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY || row.schema_key == BLOB_REF_SCHEMA_KEY {
1577
- row.origin = Some(origin.clone());
1578
- }
1579
- }
1580
- }
1581
-
1582
- fn lix_file_insert_origin(surface_name: &str, file_id: &str) -> TransactionWriteOrigin {
1583
- TransactionWriteOrigin {
1584
- surface: surface_name.to_string(),
1585
- operation: TransactionWriteOperation::Insert,
1586
- primary_key: Some(LogicalPrimaryKey {
1587
- columns: vec!["id".to_string()],
1588
- values: vec![file_id.to_string()],
1589
- }),
1590
- }
1591
- }
1592
-
1593
- fn file_row_context_from_batch(
1594
- batch: &RecordBatch,
1595
- row_index: usize,
1596
- branch_binding: Option<&str>,
1597
- ) -> Result<FilesystemRowContext> {
1598
- let explicit_branch_id = optional_string_value(batch, row_index, "lixcol_branch_id")?;
1599
- let scope = resolve_write_branch_scope(
1600
- optional_bool_value(batch, row_index, "lixcol_global")?,
1601
- explicit_branch_id,
1602
- branch_binding,
1603
- "INSERT into lix_file_by_branch",
1604
- "lix_file",
1605
- )?;
1606
-
1607
- Ok(FilesystemRowContext {
1608
- branch_id: scope.branch_id,
1609
- global: scope.global,
1610
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1611
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1612
- metadata: optional_metadata_value(batch, row_index, "lixcol_metadata", "lix_file")?,
1613
- })
1614
- }
1615
-
1616
- fn file_row_context_from_update(
1617
- batch: &RecordBatch,
1618
- assignment_values: &UpdateAssignmentValues,
1619
- row_index: usize,
1620
- branch_binding: Option<&str>,
1621
- ) -> Result<FilesystemRowContext> {
1622
- let explicit_branch_id = optional_string_value(batch, row_index, "lixcol_branch_id")?;
1623
- let scope = resolve_write_branch_scope(
1624
- optional_bool_value(batch, row_index, "lixcol_global")?,
1625
- explicit_branch_id,
1626
- branch_binding,
1627
- "UPDATE into lix_file_by_branch",
1628
- "lix_file",
1629
- )?;
1630
-
1631
- Ok(FilesystemRowContext {
1632
- branch_id: scope.branch_id,
1633
- global: scope.global,
1634
- untracked: optional_bool_value(batch, row_index, "lixcol_untracked")?.unwrap_or(false),
1635
- file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
1636
- metadata: update_optional_metadata_value(
1637
- batch,
1638
- assignment_values,
1639
- row_index,
1640
- "lixcol_metadata",
1641
- "lix_file",
1642
- )?,
1643
- })
1644
- }
1645
-
1646
- fn file_path_resolver_key(context: &FilesystemRowContext) -> String {
1647
- filesystem_storage_scope_key(
1648
- &context.branch_id,
1649
- context.global,
1650
- context.untracked,
1651
- context.file_id.as_deref(),
1652
- )
1653
- }
1654
-
1655
- async fn file_path_resolvers_from_live_state(
1656
- live_state: Arc<dyn LiveStateReader>,
1657
- branch_binding: Option<&str>,
1658
- ) -> std::result::Result<BTreeMap<String, DirectoryPathResolver>, LixError> {
1659
- let rows = live_state
1660
- .scan_rows(&LiveStateScanRequest {
1661
- filter: LiveStateFilter {
1662
- schema_keys: vec![
1663
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
1664
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1665
- ],
1666
- branch_ids: branch_binding
1667
- .map(|branch_id| vec![branch_id.to_string()])
1668
- .unwrap_or_default(),
1669
- ..Default::default()
1670
- },
1671
- ..Default::default()
1672
- })
1673
- .await?;
1674
- let mut resolvers = directory_path_resolvers_from_state_rows(rows)?;
1675
- if let Some(branch_id) = branch_binding {
1676
- let key = filesystem_storage_scope_key(branch_id, false, false, None);
1677
- resolvers
1678
- .entry(key)
1679
- .or_insert_with(DirectoryPathResolver::default);
1680
- }
1681
- Ok(resolvers)
1682
- }
1683
-
1684
- async fn lix_file_record_batch(
1685
- schema: &SchemaRef,
1686
- blob_reader: &Arc<dyn BlobDataReader>,
1687
- rows: Vec<MaterializedLiveStateRow>,
1688
- ) -> Result<RecordBatch, LixError> {
1689
- let projected_columns = schema
1690
- .fields()
1691
- .iter()
1692
- .map(|field| field.name().as_str())
1693
- .collect::<Vec<_>>();
1694
- let needs_data = projected_columns
1695
- .iter()
1696
- .any(|column_name| *column_name == "data");
1697
-
1698
- let mut file_rows = BTreeMap::<(String, String), FileDescriptorRecord>::new();
1699
- let mut blob_rows = BTreeMap::<(String, String), BlobRefRecord>::new();
1700
- let mut directory_rows = Vec::<DirectoryDescriptorRecord>::new();
1701
-
1702
- for row in rows {
1703
- match row.schema_key.as_str() {
1704
- FILE_DESCRIPTOR_SCHEMA_KEY => {
1705
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1706
- continue;
1707
- };
1708
- let snapshot: FileDescriptorSnapshot = serde_json::from_str(snapshot_content)
1709
- .map_err(|error| {
1710
- LixError::new(
1711
- "LIX_ERROR_UNKNOWN",
1712
- format!("invalid lix_file_descriptor snapshot JSON: {error}"),
1713
- )
1714
- })?;
1715
- file_rows.insert(
1716
- (row.branch_id.clone(), snapshot.id.clone()),
1717
- FileDescriptorRecord {
1718
- id: snapshot.id,
1719
- directory_id: snapshot.directory_id,
1720
- name: snapshot.name,
1721
- hidden: snapshot.hidden,
1722
- live: row,
1723
- },
1724
- );
1725
- }
1726
- BLOB_REF_SCHEMA_KEY => {
1727
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1728
- continue;
1729
- };
1730
- let snapshot: BlobRefSnapshot =
1731
- serde_json::from_str(snapshot_content).map_err(|error| {
1732
- LixError::new(
1733
- "LIX_ERROR_UNKNOWN",
1734
- format!("invalid lix_binary_blob_ref snapshot JSON: {error}"),
1735
- )
1736
- })?;
1737
- blob_rows.insert(
1738
- (row.branch_id.clone(), snapshot.id.clone()),
1739
- BlobRefRecord {
1740
- blob_hash: snapshot.blob_hash,
1741
- },
1742
- );
1743
- }
1744
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY => {
1745
- let Some(snapshot_content) = row.snapshot_content.as_deref() else {
1746
- continue;
1747
- };
1748
- let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
1749
- .map_err(|error| {
1750
- LixError::new(
1751
- "LIX_ERROR_UNKNOWN",
1752
- format!("invalid lix_directory_descriptor snapshot JSON: {error}"),
1753
- )
1754
- })?;
1755
- directory_rows.push(DirectoryDescriptorRecord {
1756
- id: snapshot.id,
1757
- parent_id: snapshot.parent_id,
1758
- name: snapshot.name,
1759
- branch_id: row.branch_id,
1760
- });
1761
- }
1762
- _ => {}
1763
- }
1764
- }
1765
-
1766
- let directory_paths = derive_directory_paths(&directory_rows)?;
1767
- let mut ids = Vec::new();
1768
- let mut paths = Vec::new();
1769
- let mut directory_ids = Vec::new();
1770
- let mut names = Vec::new();
1771
- let mut hiddens = Vec::new();
1772
- let mut data_values = Vec::new();
1773
- let mut entity_pks = Vec::new();
1774
- let mut schema_keys = Vec::new();
1775
- let mut file_ids = Vec::new();
1776
- let mut globals = Vec::new();
1777
- let mut change_ids = Vec::new();
1778
- let mut created_ats = Vec::new();
1779
- let mut updated_ats = Vec::new();
1780
- let mut commit_ids = Vec::new();
1781
- let mut untracked_values = Vec::new();
1782
- let mut metadata_values = Vec::new();
1783
- let mut branch_ids = Vec::new();
1784
-
1785
- for ((branch_id, _), file) in file_rows {
1786
- let directory_path = match file.directory_id.as_ref() {
1787
- Some(directory_id) => {
1788
- let key = (branch_id.clone(), directory_id.clone());
1789
- let Some(path) = directory_paths.get(&key).cloned() else {
1790
- return Err(LixError::new(
1791
- LixError::CODE_FOREIGN_KEY,
1792
- format!(
1793
- "lix_file_descriptor '{}' references missing directory_id '{}' in branch '{}'",
1794
- file.id, directory_id, branch_id
1795
- ),
1796
- ));
1797
- };
1798
- Some(path)
1799
- }
1800
- None => None,
1801
- };
1802
- let path = match directory_path {
1803
- Some(directory_path) => format!("{directory_path}{}", file.name),
1804
- None => format!("/{}", file.name),
1805
- };
1806
- let data = if needs_data {
1807
- match blob_rows.get(&(branch_id.clone(), file.id.clone())) {
1808
- Some(blob_ref) => load_single_blob_bytes(blob_reader, &blob_ref.blob_hash).await?,
1809
- None => None,
1810
- }
1811
- } else {
1812
- None
1813
- };
1814
-
1815
- ids.push(Some(file.id));
1816
- paths.push(Some(path));
1817
- directory_ids.push(file.directory_id);
1818
- names.push(Some(file.name));
1819
- hiddens.push(Some(file.hidden));
1820
- data_values.push(data);
1821
- entity_pks.push(Some(file.live.entity_pk.as_json_array_text()?));
1822
- schema_keys.push(Some(file.live.schema_key));
1823
- file_ids.push(file.live.file_id);
1824
- globals.push(Some(file.live.global));
1825
- change_ids.push(file.live.change_id);
1826
- created_ats.push(file.live.created_at);
1827
- updated_ats.push(file.live.updated_at);
1828
- commit_ids.push(file.live.commit_id);
1829
- untracked_values.push(Some(file.live.untracked));
1830
- metadata_values.push(file.live.metadata.as_ref().map(serialize_row_metadata));
1831
- branch_ids.push(Some(branch_id));
1832
- }
1833
-
1834
- let mut columns = Vec::<ArrayRef>::with_capacity(schema.fields().len());
1835
- for field in schema.fields() {
1836
- let array: ArrayRef = match field.name().as_str() {
1837
- "id" => Arc::new(StringArray::from(ids.clone())),
1838
- "path" => Arc::new(StringArray::from(paths.clone())),
1839
- "directory_id" => Arc::new(StringArray::from(directory_ids.clone())),
1840
- "name" => Arc::new(StringArray::from(names.clone())),
1841
- "hidden" => Arc::new(BooleanArray::from(hiddens.clone())),
1842
- "data" => Arc::new(BinaryArray::from(
1843
- data_values
1844
- .iter()
1845
- .map(|value| value.as_deref())
1846
- .collect::<Vec<_>>(),
1847
- )),
1848
- "lixcol_entity_pk" => Arc::new(StringArray::from(entity_pks.clone())),
1849
- "lixcol_schema_key" => Arc::new(StringArray::from(schema_keys.clone())),
1850
- "lixcol_file_id" => Arc::new(StringArray::from(file_ids.clone())),
1851
- "lixcol_global" => Arc::new(BooleanArray::from(globals.clone())),
1852
- "lixcol_change_id" => Arc::new(StringArray::from(change_ids.clone())),
1853
- "lixcol_created_at" => Arc::new(StringArray::from(created_ats.clone())),
1854
- "lixcol_updated_at" => Arc::new(StringArray::from(updated_ats.clone())),
1855
- "lixcol_commit_id" => Arc::new(StringArray::from(commit_ids.clone())),
1856
- "lixcol_untracked" => Arc::new(BooleanArray::from(untracked_values.clone())),
1857
- "lixcol_metadata" => Arc::new(StringArray::from(metadata_values.clone())),
1858
- "lixcol_branch_id" => Arc::new(StringArray::from(branch_ids.clone())),
1859
- other => {
1860
- return Err(LixError::new(
1861
- "LIX_ERROR_UNKNOWN",
1862
- format!("sql2 lix_file provider does not support projected column '{other}'"),
1863
- ))
1864
- }
1865
- };
1866
- columns.push(array);
1867
- }
1868
-
1869
- let options = RecordBatchOptions::new().with_row_count(Some(ids.len()));
1870
- RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
1871
- LixError::new(
1872
- "LIX_ERROR_UNKNOWN",
1873
- format!("sql2 failed to build lix_file record batch: {error}"),
1874
- )
1875
- })
1876
- }
1877
-
1878
- async fn load_single_blob_bytes(
1879
- blob_reader: &Arc<dyn BlobDataReader>,
1880
- blob_hash: &str,
1881
- ) -> Result<Option<Vec<u8>>, LixError> {
1882
- let hash = BlobHash::from_hex(blob_hash)?;
1883
- Ok(blob_reader
1884
- .load_bytes_many(&[hash])
1885
- .await?
1886
- .into_vec()
1887
- .into_iter()
1888
- .next()
1889
- .flatten())
1890
- }
1891
-
1892
- fn derive_directory_paths(
1893
- rows: &[DirectoryDescriptorRecord],
1894
- ) -> Result<BTreeMap<(String, String), String>, LixError> {
1895
- let mut by_branch = BTreeMap::<String, BTreeMap<String, &DirectoryDescriptorRecord>>::new();
1896
- for row in rows {
1897
- by_branch
1898
- .entry(row.branch_id.clone())
1899
- .or_default()
1900
- .insert(row.id.clone(), row);
1901
- }
1902
-
1903
- let mut paths = BTreeMap::<(String, String), String>::new();
1904
- for (branch_id, records) in by_branch {
1905
- for directory_id in records.keys() {
1906
- derive_directory_path_for(
1907
- &branch_id,
1908
- directory_id,
1909
- &records,
1910
- &mut paths,
1911
- &mut BTreeSet::new(),
1912
- )?;
1913
- }
1914
- }
1915
- Ok(paths)
1916
- }
1917
-
1918
- fn derive_directory_path_for(
1919
- branch_id: &str,
1920
- directory_id: &str,
1921
- records: &BTreeMap<String, &DirectoryDescriptorRecord>,
1922
- paths: &mut BTreeMap<(String, String), String>,
1923
- visiting: &mut BTreeSet<String>,
1924
- ) -> Result<Option<String>, LixError> {
1925
- if let Some(path) = paths.get(&(branch_id.to_string(), directory_id.to_string())) {
1926
- return Ok(Some(path.clone()));
1927
- }
1928
- if !visiting.insert(directory_id.to_string()) {
1929
- return Err(directory_parent_cycle_error(branch_id, directory_id));
1930
- }
1931
- let Some(row) = records.get(directory_id) else {
1932
- visiting.remove(directory_id);
1933
- return Ok(None);
1934
- };
1935
- let path = match row.parent_id.as_deref() {
1936
- Some(parent_id) => {
1937
- let Some(parent_path) =
1938
- derive_directory_path_for(branch_id, parent_id, records, paths, visiting)?
1939
- else {
1940
- visiting.remove(directory_id);
1941
- return Ok(None);
1942
- };
1943
- format!("{parent_path}{}/", row.name)
1944
- }
1945
- None => format!("/{}/", row.name),
1946
- };
1947
- visiting.remove(directory_id);
1948
- paths.insert(
1949
- (branch_id.to_string(), directory_id.to_string()),
1950
- path.clone(),
1951
- );
1952
- Ok(Some(path))
1953
- }
1954
-
1955
- fn directory_parent_cycle_error(branch_id: &str, directory_id: &str) -> LixError {
1956
- LixError::new(
1957
- LixError::CODE_CONSTRAINT_VIOLATION,
1958
- format!(
1959
- "lix_directory_descriptor parent_id cycle in branch '{branch_id}' while resolving directory '{directory_id}'"
1960
- ),
1961
- )
1962
- }
1963
-
1964
- fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
1965
- let fields = match projection {
1966
- Some(indices) => indices
1967
- .iter()
1968
- .map(|index| base_schema.field(*index).as_ref().clone())
1969
- .collect::<Vec<_>>(),
1970
- None => base_schema
1971
- .fields()
1972
- .iter()
1973
- .map(|field| field.as_ref().clone())
1974
- .collect::<Vec<_>>(),
1975
- };
1976
- Ok(Arc::new(Schema::new(fields)))
1977
- }
1978
-
1979
- fn lix_file_scan_request(
1980
- branch_binding: Option<&str>,
1981
- projected_schema: Option<&Schema>,
1982
- limit: Option<usize>,
1983
- ) -> LiveStateScanRequest {
1984
- LiveStateScanRequest {
1985
- filter: LiveStateFilter {
1986
- schema_keys: vec![
1987
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
1988
- BLOB_REF_SCHEMA_KEY.to_string(),
1989
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
1990
- ],
1991
- branch_ids: branch_binding
1992
- .map(|branch_id| vec![branch_id.to_string()])
1993
- .unwrap_or_default(),
1994
- ..LiveStateFilter::default()
1995
- },
1996
- projection: lix_file_live_state_projection(projected_schema),
1997
- limit,
1998
- }
1999
- }
2000
-
2001
- fn lix_file_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
2002
- let Some(schema) = projected_schema else {
2003
- return LiveStateProjection::default();
2004
- };
2005
- let mut columns = Vec::new();
2006
- let needs_snapshot = schema.fields().iter().any(|field| {
2007
- matches!(
2008
- field.name().as_str(),
2009
- "path" | "directory_id" | "name" | "hidden" | "data"
2010
- )
2011
- });
2012
- if needs_snapshot {
2013
- columns.push("snapshot_content".to_string());
2014
- }
2015
- if schema
2016
- .fields()
2017
- .iter()
2018
- .any(|field| field.name() == "lixcol_metadata")
2019
- {
2020
- columns.push("metadata".to_string());
2021
- }
2022
- LiveStateProjection { columns }
2023
- }
2024
-
2025
- async fn scan_lix_file_live_rows(
2026
- live_state: Arc<dyn LiveStateReader>,
2027
- request: &LiveStateScanRequest,
2028
- target_file_ids: &FileIdConstraint,
2029
- ) -> std::result::Result<Vec<MaterializedLiveStateRow>, LixError> {
2030
- let target_file_ids = match target_file_ids {
2031
- FileIdConstraint::All => return live_state.scan_rows(request).await,
2032
- FileIdConstraint::None => return Ok(Vec::new()),
2033
- FileIdConstraint::Ids(target_file_ids) => target_file_ids,
2034
- };
2035
-
2036
- let mut file_request = request.clone();
2037
- file_request.filter.schema_keys = vec![
2038
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2039
- BLOB_REF_SCHEMA_KEY.to_string(),
2040
- ];
2041
- file_request.filter.entity_pks = target_file_ids
2042
- .iter()
2043
- .map(|file_id| EntityPk::single(file_id.clone()))
2044
- .collect();
2045
-
2046
- let mut rows = live_state.scan_rows(&file_request).await?;
2047
-
2048
- let mut directory_request = request.clone();
2049
- directory_request.filter.schema_keys = vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()];
2050
- directory_request.filter.entity_pks.clear();
2051
- directory_request.limit = None;
2052
- rows.extend(live_state.scan_rows(&directory_request).await?);
2053
-
2054
- Ok(rows)
2055
- }
2056
-
2057
- #[derive(Debug, Clone, PartialEq, Eq)]
2058
- enum FileIdConstraint {
2059
- All,
2060
- None,
2061
- Ids(BTreeSet<String>),
2062
- }
2063
-
2064
- impl FileIdConstraint {
2065
- fn from_ids(ids: Vec<String>) -> Self {
2066
- let ids = ids.into_iter().collect::<BTreeSet<_>>();
2067
- if ids.is_empty() {
2068
- Self::None
2069
- } else {
2070
- Self::Ids(ids)
2071
- }
2072
- }
2073
-
2074
- fn intersect(self, other: Self) -> Self {
2075
- match (self, other) {
2076
- (Self::None, _) | (_, Self::None) => Self::None,
2077
- (Self::All, constraint) | (constraint, Self::All) => constraint,
2078
- (Self::Ids(left), Self::Ids(right)) => {
2079
- let ids = left.intersection(&right).cloned().collect::<BTreeSet<_>>();
2080
- if ids.is_empty() {
2081
- Self::None
2082
- } else {
2083
- Self::Ids(ids)
2084
- }
2085
- }
2086
- }
2087
- }
2088
-
2089
- fn union(self, other: Self) -> Self {
2090
- match (self, other) {
2091
- (Self::All, _) | (_, Self::All) => Self::All,
2092
- (Self::None, constraint) | (constraint, Self::None) => constraint,
2093
- (Self::Ids(mut left), Self::Ids(right)) => {
2094
- left.extend(right);
2095
- Self::Ids(left)
2096
- }
2097
- }
2098
- }
2099
- }
2100
-
2101
- fn file_id_constraint_from_filters(filters: &[Expr]) -> Result<FileIdConstraint> {
2102
- let analyzer = LixFileIdFilterAnalyzer;
2103
- let mut constraint = FileIdConstraint::All;
2104
- for filter in filters {
2105
- if let Some(filter_constraint) = analyzer.analyze(filter)? {
2106
- constraint = constraint.intersect(filter_constraint);
2107
- }
2108
- }
2109
- Ok(constraint)
2110
- }
2111
-
2112
- struct LixFileIdFilterAnalyzer;
2113
-
2114
- impl LixFileIdFilterAnalyzer {
2115
- fn supports(&self, expr: &Expr) -> bool {
2116
- self.analyze(expr)
2117
- .is_ok_and(|constraint| constraint.is_some())
2118
- }
2119
-
2120
- fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2121
- ExactStringColumnFilterAnalyzer::new("id").analyze(expr)
2122
- }
2123
- }
2124
-
2125
- struct ExactStringColumnFilterAnalyzer {
2126
- column_name: &'static str,
2127
- }
2128
-
2129
- impl ExactStringColumnFilterAnalyzer {
2130
- fn new(column_name: &'static str) -> Self {
2131
- Self { column_name }
2132
- }
2133
-
2134
- fn supports(&self, expr: &Expr) -> bool {
2135
- self.analyze(expr)
2136
- .is_ok_and(|constraint| constraint.is_some())
2137
- }
2138
-
2139
- fn analyze(&self, expr: &Expr) -> Result<Option<FileIdConstraint>> {
2140
- match expr {
2141
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
2142
- let Some(left) = self.analyze(&binary_expr.left)? else {
2143
- return Ok(None);
2144
- };
2145
- let Some(right) = self.analyze(&binary_expr.right)? else {
2146
- return Ok(None);
2147
- };
2148
- Ok(Some(left.intersect(right)))
2149
- }
2150
- Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
2151
- let Some(left) = self.analyze(&binary_expr.left)? else {
2152
- return Ok(None);
2153
- };
2154
- let Some(right) = self.analyze(&binary_expr.right)? else {
2155
- return Ok(None);
2156
- };
2157
- Ok(Some(left.union(right)))
2158
- }
2159
- Expr::BinaryExpr(binary_expr) => Ok(self
2160
- .value_from_binary_filter(binary_expr)
2161
- .map(|value| FileIdConstraint::Ids(BTreeSet::from([value])))),
2162
- Expr::InList(in_list) => Ok(self
2163
- .values_from_in_list_filter(in_list)
2164
- .map(FileIdConstraint::from_ids)),
2165
- _ => Ok(None),
2166
- }
2167
- }
2168
-
2169
- fn value_from_binary_filter(&self, binary_expr: &BinaryExpr) -> Option<String> {
2170
- if binary_expr.op != Operator::Eq {
2171
- return None;
2172
- }
2173
- self.value_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
2174
- .or_else(|| {
2175
- self.value_from_column_literal_filter(&binary_expr.right, &binary_expr.left)
2176
- })
2177
- }
2178
-
2179
- fn values_from_in_list_filter(&self, in_list: &InList) -> Option<Vec<String>> {
2180
- if in_list.negated {
2181
- return None;
2182
- }
2183
- let Expr::Column(column) = in_list.expr.as_ref() else {
2184
- return None;
2185
- };
2186
- if column.name != self.column_name {
2187
- return None;
2188
- }
2189
- let values = in_list
2190
- .list
2191
- .iter()
2192
- .map(string_expr_literal)
2193
- .collect::<Option<Vec<_>>>()?;
2194
- Some(values)
2195
- }
2196
-
2197
- fn value_from_column_literal_filter(
2198
- &self,
2199
- column_expr: &Expr,
2200
- literal_expr: &Expr,
2201
- ) -> Option<String> {
2202
- let Expr::Column(column) = column_expr else {
2203
- return None;
2204
- };
2205
- if column.name != self.column_name {
2206
- return None;
2207
- }
2208
- string_expr_literal(literal_expr)
2209
- }
2210
- }
2211
-
2212
- fn string_expr_literal(expr: &Expr) -> Option<String> {
2213
- let Expr::Literal(literal, _) = expr else {
2214
- return None;
2215
- };
2216
- match literal {
2217
- ScalarValue::Utf8(Some(value))
2218
- | ScalarValue::Utf8View(Some(value))
2219
- | ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
2220
- _ => None,
2221
- }
2222
- }
2223
-
2224
- fn contains_column(expr: &Expr, column_name: &str) -> bool {
2225
- match expr {
2226
- Expr::Column(column) => column.name == column_name,
2227
- Expr::BinaryExpr(binary_expr) => {
2228
- contains_column(&binary_expr.left, column_name)
2229
- || contains_column(&binary_expr.right, column_name)
2230
- }
2231
- Expr::InList(in_list) => {
2232
- contains_column(&in_list.expr, column_name)
2233
- || in_list
2234
- .list
2235
- .iter()
2236
- .any(|expr| contains_column(expr, column_name))
2237
- }
2238
- Expr::Between(between) => {
2239
- contains_column(&between.expr, column_name)
2240
- || contains_column(&between.low, column_name)
2241
- || contains_column(&between.high, column_name)
2242
- }
2243
- Expr::Not(expr) | Expr::IsNull(expr) | Expr::IsNotNull(expr) => {
2244
- contains_column(expr, column_name)
2245
- }
2246
- Expr::Negative(expr) => contains_column(expr, column_name),
2247
- _ => false,
2248
- }
2249
- }
2250
-
2251
- fn validate_lix_file_update_assignments(
2252
- schema: &SchemaRef,
2253
- assignments: &[(String, Expr)],
2254
- ) -> Result<()> {
2255
- for (column_name, expr) in assignments {
2256
- schema.field_with_name(column_name).map_err(|_| {
2257
- DataFusionError::Plan(format!(
2258
- "UPDATE lix_file failed: column '{column_name}' does not exist"
2259
- ))
2260
- })?;
2261
- if !matches!(
2262
- column_name.as_str(),
2263
- "path" | "directory_id" | "name" | "hidden" | "data" | "lixcol_metadata"
2264
- ) {
2265
- return Err(DataFusionError::Execution(format!(
2266
- "UPDATE lix_file cannot stage read-only column '{column_name}'"
2267
- )));
2268
- }
2269
- if column_name == "data" {
2270
- reject_non_binary_lix_file_data_assignment(expr)?;
2271
- }
2272
- }
2273
- Ok(())
2274
- }
2275
-
2276
- fn reject_non_binary_lix_file_data_assignment(expr: &Expr) -> Result<()> {
2277
- match expr {
2278
- Expr::Literal(value, _) => {
2279
- if !scalar_is_binary_or_null(value) {
2280
- return Err(non_binary_lix_file_data_assignment_error());
2281
- }
2282
- }
2283
- Expr::Cast(cast) if is_binary_type(&cast.data_type) => {
2284
- if !logical_expr_is_binary_or_null(&cast.expr) {
2285
- return Err(non_binary_lix_file_data_assignment_error());
2286
- }
2287
- }
2288
- _ => {}
2289
- }
2290
-
2291
- Ok(())
2292
- }
2293
-
2294
- fn non_binary_lix_file_data_assignment_error() -> DataFusionError {
2295
- lix_file_data_type_error(
2296
- "UPDATE lix_file",
2297
- "data",
2298
- "use X'...' or a binary parameter for file contents",
2299
- )
2300
- }
2301
-
2302
- fn filter_lix_file_batch(
2303
- batch: RecordBatch,
2304
- filters: &[Arc<dyn PhysicalExpr>],
2305
- ) -> Result<RecordBatch> {
2306
- let Some(mask) = evaluate_lix_file_filters(&batch, filters)? else {
2307
- return Ok(batch);
2308
- };
2309
- Ok(filter_record_batch(&batch, &mask)?)
2310
- }
2311
-
2312
- fn evaluate_lix_file_filters(
2313
- batch: &RecordBatch,
2314
- filters: &[Arc<dyn PhysicalExpr>],
2315
- ) -> Result<Option<BooleanArray>> {
2316
- if filters.is_empty() {
2317
- return Ok(None);
2318
- }
2319
-
2320
- let mut combined_mask: Option<BooleanArray> = None;
2321
- for filter in filters {
2322
- let result = filter.evaluate(batch)?;
2323
- let array = result.into_array(batch.num_rows())?;
2324
- let bool_array = array
2325
- .as_any()
2326
- .downcast_ref::<BooleanArray>()
2327
- .ok_or_else(|| {
2328
- DataFusionError::Execution("lix_file filter was not boolean".to_string())
2329
- })?;
2330
- let normalized = bool_array
2331
- .iter()
2332
- .map(|value| Some(value == Some(true)))
2333
- .collect::<BooleanArray>();
2334
- combined_mask = Some(match combined_mask {
2335
- Some(existing) => and(&existing, &normalized)?,
2336
- None => normalized,
2337
- });
2338
- }
2339
- Ok(combined_mask)
2340
- }
2341
-
2342
- fn dml_count_schema() -> SchemaRef {
2343
- Arc::new(Schema::new(vec![Field::new(
2344
- "count",
2345
- DataType::UInt64,
2346
- false,
2347
- )]))
2348
- }
2349
-
2350
- fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
2351
- RecordBatch::try_new(
2352
- schema,
2353
- vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
2354
- )
2355
- .map_err(DataFusionError::from)
2356
- }
2357
-
2358
- fn record_batch_has_non_null_column(batch: &RecordBatch, column_name: &str) -> Result<bool> {
2359
- for row_index in 0..batch.num_rows() {
2360
- if optional_scalar_value(batch, row_index, column_name)?
2361
- .is_some_and(|value| !value.is_null())
2362
- {
2363
- return Ok(true);
2364
- }
2365
- }
2366
- Ok(false)
2367
- }
2368
-
2369
- fn reject_read_only_lix_file_insert_field(
2370
- batch: &RecordBatch,
2371
- row_index: usize,
2372
- column_name: &str,
2373
- ) -> Result<()> {
2374
- if optional_scalar_value(batch, row_index, column_name)?.is_some_and(|value| !value.is_null()) {
2375
- return Err(DataFusionError::Execution(format!(
2376
- "INSERT into lix_file cannot stage read-only column '{column_name}'"
2377
- )));
2378
- }
2379
- Ok(())
2380
- }
2381
-
2382
- fn required_string_value(
2383
- batch: &RecordBatch,
2384
- row_index: usize,
2385
- column_name: &str,
2386
- ) -> Result<String> {
2387
- optional_string_value(batch, row_index, column_name)?.ok_or_else(|| {
2388
- DataFusionError::Execution(format!(
2389
- "INSERT into lix_file requires non-null text column '{column_name}'"
2390
- ))
2391
- })
2392
- }
2393
-
2394
- fn update_required_string_value(
2395
- batch: &RecordBatch,
2396
- assignment_values: &UpdateAssignmentValues,
2397
- row_index: usize,
2398
- column_name: &str,
2399
- ) -> Result<String> {
2400
- update_optional_string_value(batch, assignment_values, row_index, column_name)?.ok_or_else(
2401
- || {
2402
- DataFusionError::Execution(format!(
2403
- "UPDATE lix_file requires non-null text column '{column_name}'"
2404
- ))
2405
- },
2406
- )
2407
- }
2408
-
2409
- fn update_optional_string_value(
2410
- batch: &RecordBatch,
2411
- assignment_values: &UpdateAssignmentValues,
2412
- row_index: usize,
2413
- column_name: &str,
2414
- ) -> Result<Option<String>> {
2415
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
2416
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
2417
- InsertCell::Provided(SqlCell::Value(
2418
- ScalarValue::Utf8(Some(value))
2419
- | ScalarValue::Utf8View(Some(value))
2420
- | ScalarValue::LargeUtf8(Some(value)),
2421
- )) => Ok(Some(value)),
2422
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
2423
- "UPDATE lix_file expected text-compatible column '{column_name}', got {other:?}"
2424
- ))),
2425
- }
2426
- }
2427
-
2428
- fn update_optional_metadata_value(
2429
- batch: &RecordBatch,
2430
- assignment_values: &UpdateAssignmentValues,
2431
- row_index: usize,
2432
- column_name: &str,
2433
- context: &str,
2434
- ) -> Result<Option<TransactionJson>> {
2435
- update_optional_string_value(batch, assignment_values, row_index, column_name)?
2436
- .map(|value| {
2437
- let metadata = parse_row_metadata_value(&value, context)
2438
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
2439
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
2440
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
2441
- })
2442
- .transpose()
2443
- }
2444
-
2445
- fn update_optional_bool_value(
2446
- batch: &RecordBatch,
2447
- assignment_values: &UpdateAssignmentValues,
2448
- row_index: usize,
2449
- column_name: &str,
2450
- ) -> Result<Option<bool>> {
2451
- match assignment_values.assigned_or_existing_cell(batch, row_index, column_name)? {
2452
- InsertCell::Omitted | InsertCell::Provided(SqlCell::Null) => Ok(None),
2453
- InsertCell::Provided(SqlCell::Value(ScalarValue::Boolean(Some(value)))) => Ok(Some(value)),
2454
- InsertCell::Provided(SqlCell::Value(other)) => Err(DataFusionError::Execution(format!(
2455
- "UPDATE lix_file expected boolean column '{column_name}', got {other:?}"
2456
- ))),
2457
- }
2458
- }
2459
-
2460
- fn update_required_binary_value(
2461
- _batch: &RecordBatch,
2462
- assignment_values: &UpdateAssignmentValues,
2463
- row_index: usize,
2464
- column_name: &str,
2465
- ) -> Result<Vec<u8>> {
2466
- match assignment_values.assigned_cell(row_index, column_name)? {
2467
- UpdateCell::Unassigned | UpdateCell::Assigned(SqlCell::Null) => {
2468
- Err(lix_file_data_type_error(
2469
- "UPDATE lix_file",
2470
- column_name,
2471
- "use X'' for an empty file or omit data to leave contents unchanged",
2472
- ))
2473
- }
2474
- UpdateCell::Assigned(SqlCell::Value(ScalarValue::Binary(Some(value))))
2475
- | UpdateCell::Assigned(SqlCell::Value(ScalarValue::LargeBinary(Some(value)))) => Ok(value),
2476
- UpdateCell::Assigned(SqlCell::Value(ScalarValue::FixedSizeBinary(_, Some(value)))) => {
2477
- Ok(value)
2478
- }
2479
- UpdateCell::Assigned(SqlCell::Value(other)) => Err(lix_file_data_type_error_with_value(
2480
- "UPDATE lix_file",
2481
- column_name,
2482
- &other,
2483
- "use X'...' or a binary parameter for file contents",
2484
- )),
2485
- }
2486
- }
2487
-
2488
- fn optional_string_value(
2489
- batch: &RecordBatch,
2490
- row_index: usize,
2491
- column_name: &str,
2492
- ) -> Result<Option<String>> {
2493
- match optional_scalar_value(batch, row_index, column_name)? {
2494
- None
2495
- | Some(ScalarValue::Null)
2496
- | Some(ScalarValue::Utf8(None))
2497
- | Some(ScalarValue::Utf8View(None))
2498
- | Some(ScalarValue::LargeUtf8(None)) => Ok(None),
2499
- Some(ScalarValue::Utf8(Some(value)))
2500
- | Some(ScalarValue::Utf8View(Some(value)))
2501
- | Some(ScalarValue::LargeUtf8(Some(value))) => Ok(Some(value)),
2502
- Some(other) => Err(DataFusionError::Execution(format!(
2503
- "INSERT into lix_file expected text-compatible column '{column_name}', got {other:?}"
2504
- ))),
2505
- }
2506
- }
2507
-
2508
- fn optional_metadata_value(
2509
- batch: &RecordBatch,
2510
- row_index: usize,
2511
- column_name: &str,
2512
- context: &str,
2513
- ) -> Result<Option<TransactionJson>> {
2514
- optional_string_value(batch, row_index, column_name)?
2515
- .map(|value| {
2516
- let metadata = parse_row_metadata_value(&value, context)
2517
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)?;
2518
- TransactionJson::from_value(metadata, &format!("{context} metadata"))
2519
- .map_err(crate::sql2::error::lix_error_to_datafusion_error)
2520
- })
2521
- .transpose()
2522
- }
2523
-
2524
- fn optional_bool_value(
2525
- batch: &RecordBatch,
2526
- row_index: usize,
2527
- column_name: &str,
2528
- ) -> Result<Option<bool>> {
2529
- match optional_scalar_value(batch, row_index, column_name)? {
2530
- None | Some(ScalarValue::Null) | Some(ScalarValue::Boolean(None)) => Ok(None),
2531
- Some(ScalarValue::Boolean(Some(value))) => Ok(Some(value)),
2532
- Some(other) => Err(DataFusionError::Execution(format!(
2533
- "INSERT into lix_file expected boolean column '{column_name}', got {other:?}"
2534
- ))),
2535
- }
2536
- }
2537
-
2538
- fn insert_optional_binary_value(
2539
- batch: &RecordBatch,
2540
- row_index: usize,
2541
- column_name: &str,
2542
- ) -> Result<Option<Vec<u8>>> {
2543
- match optional_scalar_value(batch, row_index, column_name)? {
2544
- None => Ok(None),
2545
- Some(ScalarValue::Null)
2546
- | Some(ScalarValue::Binary(None))
2547
- | Some(ScalarValue::LargeBinary(None))
2548
- | Some(ScalarValue::FixedSizeBinary(_, None)) => Err(lix_file_data_type_error(
2549
- "INSERT into lix_file",
2550
- column_name,
2551
- "use X'' for an empty file or omit data to create a descriptor without contents",
2552
- )),
2553
- Some(ScalarValue::Binary(Some(value))) | Some(ScalarValue::LargeBinary(Some(value))) => {
2554
- Ok(Some(value))
2555
- }
2556
- Some(ScalarValue::FixedSizeBinary(_, Some(value))) => Ok(Some(value)),
2557
- Some(other) => Err(lix_file_data_type_error_with_value(
2558
- "INSERT into lix_file",
2559
- column_name,
2560
- &other,
2561
- "use X'...' or a binary parameter for file contents",
2562
- )),
2563
- }
2564
- }
2565
-
2566
- fn optional_scalar_value(
2567
- batch: &RecordBatch,
2568
- row_index: usize,
2569
- column_name: &str,
2570
- ) -> Result<Option<ScalarValue>> {
2571
- let schema = batch.schema();
2572
- let column_index = match schema.index_of(column_name) {
2573
- Ok(column_index) => column_index,
2574
- Err(_) => return Ok(None),
2575
- };
2576
- if row_index >= batch.num_rows() {
2577
- return Err(DataFusionError::Execution(format!(
2578
- "row index {row_index} out of bounds for lix_file batch with {} rows",
2579
- batch.num_rows()
2580
- )));
2581
- }
2582
- ScalarValue::try_from_array(batch.column(column_index).as_ref(), row_index)
2583
- .map(Some)
2584
- .map_err(|error| {
2585
- DataFusionError::Execution(format!(
2586
- "failed to decode lix_file column '{column_name}' at row {row_index}: {error}"
2587
- ))
2588
- })
2589
- }
2590
-
2591
- pub(super) fn lix_file_schema() -> SchemaRef {
2592
- Arc::new(Schema::new(vec![
2593
- Field::new("id", DataType::Utf8, true),
2594
- Field::new("path", DataType::Utf8, false),
2595
- Field::new("directory_id", DataType::Utf8, true),
2596
- Field::new("name", DataType::Utf8, false),
2597
- Field::new("hidden", DataType::Boolean, true),
2598
- Field::new("data", DataType::Binary, true),
2599
- json_field("lixcol_entity_pk", false),
2600
- Field::new("lixcol_schema_key", DataType::Utf8, false),
2601
- Field::new("lixcol_file_id", DataType::Utf8, true),
2602
- Field::new("lixcol_global", DataType::Boolean, true),
2603
- Field::new("lixcol_change_id", DataType::Utf8, true),
2604
- Field::new("lixcol_created_at", DataType::Utf8, true),
2605
- Field::new("lixcol_updated_at", DataType::Utf8, true),
2606
- Field::new("lixcol_commit_id", DataType::Utf8, true),
2607
- Field::new("lixcol_untracked", DataType::Boolean, true),
2608
- json_field("lixcol_metadata", true),
2609
- ]))
2610
- }
2611
-
2612
- pub(super) fn lix_file_by_branch_schema() -> SchemaRef {
2613
- let mut fields = lix_file_schema()
2614
- .fields()
2615
- .iter()
2616
- .map(|field| field.as_ref().clone())
2617
- .collect::<Vec<_>>();
2618
- fields.push(Field::new("lixcol_branch_id", DataType::Utf8, false));
2619
- Arc::new(Schema::new(fields))
2620
- }
2621
-
2622
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
2623
- crate::sql2::error::datafusion_error_to_lix_error(error)
2624
- }
2625
-
2626
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
2627
- crate::sql2::error::lix_error_to_datafusion_error(error)
2628
- }
2629
-
2630
- #[cfg(test)]
2631
- mod tests {
2632
- use std::collections::{BTreeMap, BTreeSet};
2633
- use std::sync::Arc;
2634
-
2635
- use async_trait::async_trait;
2636
- use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, StringArray};
2637
- use datafusion::arrow::datatypes::{DataType, Field, Schema};
2638
- use datafusion::arrow::record_batch::RecordBatch;
2639
- use datafusion::common::{Column, ScalarValue};
2640
- use datafusion::execution::TaskContext;
2641
- use datafusion::logical_expr::expr::InList;
2642
- use datafusion::logical_expr::lit;
2643
- use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
2644
- use serde_json::Value as JsonValue;
2645
-
2646
- use crate::binary_cas::BlobDataReader;
2647
- use crate::functions::{
2648
- FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
2649
- };
2650
- use crate::live_state::MaterializedLiveStateRow;
2651
- use crate::live_state::{LiveStateReader, LiveStateRowRequest, LiveStateScanRequest};
2652
- use crate::sql2::dml::InsertSink;
2653
- use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
2654
- use crate::transaction::types::{
2655
- TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
2656
- };
2657
- use crate::LixError;
2658
-
2659
- use super::{
2660
- derive_directory_path_for, lix_file_delete_stage_from_batch,
2661
- lix_file_insert_stage_from_batch, lix_file_insert_stage_from_batch_with_path_resolvers,
2662
- lix_file_write_rows_from_batch, BranchBinding, DirectoryDescriptorRecord,
2663
- LixFileInsertSink,
2664
- };
2665
-
2666
- fn test_id_generator(ids: &'static [&'static str]) -> impl FnMut() -> String {
2667
- let mut ids = ids.iter();
2668
- move || ids.next().expect("test id should exist").to_string()
2669
- }
2670
-
2671
- fn test_functions() -> FunctionProviderHandle {
2672
- SharedFunctionProvider::new(
2673
- Box::new(SystemFunctionProvider) as Box<dyn FunctionProvider + Send>
2674
- )
2675
- }
2676
-
2677
- fn string_literal(value: &str) -> Expr {
2678
- Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
2679
- }
2680
-
2681
- fn column(name: &str) -> Expr {
2682
- Expr::Column(Column::from_name(name))
2683
- }
2684
-
2685
- fn eq_filter(column_name: &str, value: &str) -> Expr {
2686
- Expr::BinaryExpr(BinaryExpr::new(
2687
- Box::new(column(column_name)),
2688
- Operator::Eq,
2689
- Box::new(string_literal(value)),
2690
- ))
2691
- }
2692
-
2693
- #[test]
2694
- fn file_id_filters_support_string_id_predicates() {
2695
- let analyzer = super::LixFileIdFilterAnalyzer;
2696
- let constraint = analyzer
2697
- .analyze(&Expr::InList(InList::new(
2698
- Box::new(column("id")),
2699
- vec![string_literal("file-b"), string_literal("file-a")],
2700
- false,
2701
- )))
2702
- .unwrap()
2703
- .unwrap();
2704
-
2705
- assert_eq!(
2706
- constraint,
2707
- super::FileIdConstraint::Ids(BTreeSet::from([
2708
- "file-a".to_string(),
2709
- "file-b".to_string()
2710
- ]))
2711
- );
2712
- assert!(analyzer.supports(&eq_filter("id", "file-a")));
2713
- assert!(analyzer.supports(&Expr::BinaryExpr(BinaryExpr::new(
2714
- Box::new(string_literal("file-a")),
2715
- Operator::Eq,
2716
- Box::new(column("id")),
2717
- ))));
2718
- }
2719
-
2720
- #[test]
2721
- fn file_id_filters_intersect_and_union_boolean_predicates() {
2722
- let analyzer = super::LixFileIdFilterAnalyzer;
2723
- let left = Expr::InList(InList::new(
2724
- Box::new(column("id")),
2725
- vec![string_literal("file-a"), string_literal("file-b")],
2726
- false,
2727
- ));
2728
- let right = Expr::InList(InList::new(
2729
- Box::new(column("id")),
2730
- vec![string_literal("file-b"), string_literal("file-c")],
2731
- false,
2732
- ));
2733
-
2734
- let and_constraint = analyzer
2735
- .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2736
- Box::new(left.clone()),
2737
- Operator::And,
2738
- Box::new(right.clone()),
2739
- )))
2740
- .unwrap()
2741
- .unwrap();
2742
- assert_eq!(
2743
- and_constraint,
2744
- super::FileIdConstraint::Ids(BTreeSet::from(["file-b".to_string()]))
2745
- );
2746
-
2747
- let or_constraint = analyzer
2748
- .analyze(&Expr::BinaryExpr(BinaryExpr::new(
2749
- Box::new(left),
2750
- Operator::Or,
2751
- Box::new(right),
2752
- )))
2753
- .unwrap()
2754
- .unwrap();
2755
- assert_eq!(
2756
- or_constraint,
2757
- super::FileIdConstraint::Ids(BTreeSet::from([
2758
- "file-a".to_string(),
2759
- "file-b".to_string(),
2760
- "file-c".to_string()
2761
- ]))
2762
- );
2763
- }
2764
-
2765
- #[test]
2766
- fn file_id_filters_detect_contradictions() {
2767
- let filters = vec![Expr::BinaryExpr(BinaryExpr::new(
2768
- Box::new(eq_filter("id", "file-a")),
2769
- Operator::And,
2770
- Box::new(eq_filter("id", "file-b")),
2771
- ))];
2772
-
2773
- assert_eq!(
2774
- super::file_id_constraint_from_filters(&filters).unwrap(),
2775
- super::FileIdConstraint::None
2776
- );
2777
- }
2778
-
2779
- #[test]
2780
- fn file_id_filters_ignore_non_id_and_negated_predicates() {
2781
- let analyzer = super::LixFileIdFilterAnalyzer;
2782
-
2783
- assert!(!analyzer.supports(&eq_filter("name", "readme.md")));
2784
- assert!(!analyzer.supports(&Expr::InList(InList::new(
2785
- Box::new(column("id")),
2786
- vec![string_literal("file-a")],
2787
- true,
2788
- ))));
2789
- }
2790
-
2791
- fn lix_file_update_stage_from_batch_for_test(
2792
- batch: &RecordBatch,
2793
- branch_binding: Option<&str>,
2794
- update_columns: super::LixFileUpdateColumns,
2795
- path_resolvers: Option<&mut BTreeMap<String, super::DirectoryPathResolver>>,
2796
- generate_directory_id: &mut dyn FnMut() -> String,
2797
- ) -> datafusion::common::Result<super::LixFileStagedBatch> {
2798
- let mut columns = Vec::new();
2799
- if update_columns.path {
2800
- columns.extend(["path", "hidden"]);
2801
- }
2802
- if update_columns.data {
2803
- columns.push("data");
2804
- }
2805
- if update_columns.descriptor {
2806
- columns.extend(["directory_id", "name", "hidden"]);
2807
- }
2808
- let assignment_values = super::UpdateAssignmentValues::from_batch_columns(batch, &columns);
2809
- super::lix_file_update_stage_from_batch(
2810
- batch,
2811
- &assignment_values,
2812
- branch_binding,
2813
- update_columns,
2814
- path_resolvers,
2815
- generate_directory_id,
2816
- )
2817
- }
2818
-
2819
- #[derive(Default)]
2820
- struct CapturingWriteContext {
2821
- rows: Vec<MaterializedLiveStateRow>,
2822
- writes: Vec<TransactionWrite>,
2823
- }
2824
-
2825
- #[async_trait]
2826
- impl BlobDataReader for CapturingWriteContext {
2827
- async fn load_bytes_many(
2828
- &self,
2829
- hashes: &[crate::binary_cas::BlobHash],
2830
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2831
- Ok(crate::binary_cas::BlobBytesBatch::new(vec![
2832
- None;
2833
- hashes.len()
2834
- ]))
2835
- }
2836
- }
2837
-
2838
- #[async_trait]
2839
- impl SqlWriteExecutionContext for CapturingWriteContext {
2840
- fn active_branch_id(&self) -> &str {
2841
- "branch-b"
2842
- }
2843
-
2844
- fn functions(&self) -> FunctionProviderHandle {
2845
- test_functions()
2846
- }
2847
-
2848
- fn list_visible_schemas(&self) -> Result<Vec<JsonValue>, LixError> {
2849
- Ok(Vec::new())
2850
- }
2851
-
2852
- async fn load_bytes_many(
2853
- &mut self,
2854
- hashes: &[crate::binary_cas::BlobHash],
2855
- ) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
2856
- BlobDataReader::load_bytes_many(self, hashes).await
2857
- }
2858
-
2859
- async fn scan_live_state(
2860
- &mut self,
2861
- _request: &LiveStateScanRequest,
2862
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2863
- Ok(self.rows.clone())
2864
- }
2865
-
2866
- async fn load_branch_head(&mut self, branch_id: &str) -> Result<Option<String>, LixError> {
2867
- if branch_id == "ghost-branch" {
2868
- return Ok(None);
2869
- }
2870
- Ok(Some(format!("commit-{branch_id}")))
2871
- }
2872
-
2873
- async fn stage_write(
2874
- &mut self,
2875
- write: TransactionWrite,
2876
- ) -> Result<TransactionWriteOutcome, LixError> {
2877
- self.writes.push(write);
2878
- Ok(TransactionWriteOutcome { count: 0 })
2879
- }
2880
- }
2881
-
2882
- #[derive(Default)]
2883
- struct RowsLiveStateReader {
2884
- rows: Vec<MaterializedLiveStateRow>,
2885
- }
2886
-
2887
- #[async_trait]
2888
- impl LiveStateReader for RowsLiveStateReader {
2889
- async fn scan_rows(
2890
- &self,
2891
- _request: &LiveStateScanRequest,
2892
- ) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
2893
- Ok(self.rows.clone())
2894
- }
2895
-
2896
- async fn load_row(
2897
- &self,
2898
- _request: &LiveStateRowRequest,
2899
- ) -> Result<Option<MaterializedLiveStateRow>, LixError> {
2900
- Ok(None)
2901
- }
2902
- }
2903
-
2904
- fn live_directory_row(
2905
- entity_pk: &str,
2906
- branch_id: &str,
2907
- snapshot_content: &str,
2908
- ) -> MaterializedLiveStateRow {
2909
- MaterializedLiveStateRow {
2910
- entity_pk: crate::entity_pk::EntityPk::single(entity_pk),
2911
- schema_key: super::DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
2912
- file_id: None,
2913
- snapshot_content: Some(snapshot_content.to_string()),
2914
- metadata: None,
2915
- deleted: false,
2916
- branch_id: branch_id.to_string(),
2917
- change_id: Some(format!("change-{entity_pk}")),
2918
- commit_id: Some(format!("commit-{entity_pk}")),
2919
- global: false,
2920
- untracked: false,
2921
- created_at: "2026-04-23T00:00:00Z".to_string(),
2922
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2923
- }
2924
- }
2925
-
2926
- fn live_file_row(
2927
- entity_pk: &str,
2928
- branch_id: &str,
2929
- snapshot_content: &str,
2930
- ) -> MaterializedLiveStateRow {
2931
- MaterializedLiveStateRow {
2932
- entity_pk: crate::entity_pk::EntityPk::single(entity_pk),
2933
- schema_key: super::FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
2934
- file_id: None,
2935
- snapshot_content: Some(snapshot_content.to_string()),
2936
- metadata: None,
2937
- deleted: false,
2938
- branch_id: branch_id.to_string(),
2939
- change_id: Some(format!("change-{entity_pk}")),
2940
- commit_id: Some(format!("commit-{entity_pk}")),
2941
- global: false,
2942
- untracked: false,
2943
- created_at: "2026-04-23T00:00:00Z".to_string(),
2944
- updated_at: "2026-04-23T01:00:00Z".to_string(),
2945
- }
2946
- }
2947
-
2948
- fn string_column(values: Vec<Option<&str>>) -> ArrayRef {
2949
- Arc::new(StringArray::from(values)) as ArrayRef
2950
- }
2951
-
2952
- fn file_insert_batch(include_branch: bool, global: bool) -> RecordBatch {
2953
- let mut fields = vec![
2954
- Field::new("id", DataType::Utf8, false),
2955
- Field::new("directory_id", DataType::Utf8, true),
2956
- Field::new("name", DataType::Utf8, false),
2957
- Field::new("hidden", DataType::Boolean, false),
2958
- Field::new("lixcol_global", DataType::Boolean, false),
2959
- Field::new("lixcol_metadata", DataType::Utf8, true),
2960
- ];
2961
- let mut columns = vec![
2962
- string_column(vec![Some("file-readme")]),
2963
- string_column(vec![Some("dir-docs")]),
2964
- string_column(vec![Some("readme.md")]),
2965
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2966
- Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
2967
- string_column(vec![Some("{\"source\":\"file\"}")]),
2968
- ];
2969
- if include_branch {
2970
- fields.push(Field::new("lixcol_branch_id", DataType::Utf8, false));
2971
- columns.push(string_column(vec![Some("branch-b")]));
2972
- }
2973
- RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).expect("file insert batch")
2974
- }
2975
-
2976
- fn data_insert_batch() -> RecordBatch {
2977
- RecordBatch::try_new(
2978
- Arc::new(Schema::new(vec![
2979
- Field::new("id", DataType::Utf8, false),
2980
- Field::new("directory_id", DataType::Utf8, true),
2981
- Field::new("name", DataType::Utf8, false),
2982
- Field::new("hidden", DataType::Boolean, false),
2983
- Field::new("data", DataType::Binary, true),
2984
- Field::new("lixcol_branch_id", DataType::Utf8, false),
2985
- ])),
2986
- vec![
2987
- string_column(vec![Some("file-readme")]),
2988
- string_column(vec![Some("dir-docs")]),
2989
- string_column(vec![Some("readme.md")]),
2990
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
2991
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
2992
- string_column(vec![Some("branch-b")]),
2993
- ],
2994
- )
2995
- .expect("file data batch")
2996
- }
2997
-
2998
- fn path_data_insert_batch() -> RecordBatch {
2999
- RecordBatch::try_new(
3000
- Arc::new(Schema::new(vec![
3001
- Field::new("id", DataType::Utf8, false),
3002
- Field::new("path", DataType::Utf8, false),
3003
- Field::new("hidden", DataType::Boolean, false),
3004
- Field::new("data", DataType::Binary, true),
3005
- Field::new("lixcol_branch_id", DataType::Utf8, false),
3006
- ])),
3007
- vec![
3008
- string_column(vec![Some("file-readme")]),
3009
- string_column(vec![Some("/docs/guides/readme.md")]),
3010
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
3011
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
3012
- string_column(vec![Some("branch-b")]),
3013
- ],
3014
- )
3015
- .expect("file path data batch")
3016
- }
3017
-
3018
- fn path_update_batch() -> RecordBatch {
3019
- RecordBatch::try_new(
3020
- Arc::new(Schema::new(vec![
3021
- Field::new("id", DataType::Utf8, false),
3022
- Field::new("path", DataType::Utf8, false),
3023
- Field::new("hidden", DataType::Boolean, false),
3024
- Field::new("data", DataType::Binary, true),
3025
- Field::new("lixcol_branch_id", DataType::Utf8, false),
3026
- ])),
3027
- vec![
3028
- string_column(vec![Some("file-readme")]),
3029
- string_column(vec![Some("/docs/renamed.md")]),
3030
- Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
3031
- Arc::new(BinaryArray::from_vec(vec![b"hello"])) as ArrayRef,
3032
- string_column(vec![Some("branch-b")]),
3033
- ],
3034
- )
3035
- .expect("file path update batch")
3036
- }
3037
-
3038
- fn file_delete_batch() -> RecordBatch {
3039
- RecordBatch::try_new(
3040
- Arc::new(Schema::new(vec![
3041
- Field::new("id", DataType::Utf8, false),
3042
- Field::new("lixcol_branch_id", DataType::Utf8, false),
3043
- ])),
3044
- vec![
3045
- string_column(vec![Some("file-readme")]),
3046
- string_column(vec![Some("branch-b")]),
3047
- ],
3048
- )
3049
- .expect("file delete batch")
3050
- }
3051
-
3052
- #[test]
3053
- fn derives_nested_directory_paths() {
3054
- let root = DirectoryDescriptorRecord {
3055
- id: "dir-docs".to_string(),
3056
- parent_id: None,
3057
- name: "docs".to_string(),
3058
- branch_id: "branch-a".to_string(),
3059
- };
3060
- let child = DirectoryDescriptorRecord {
3061
- id: "dir-guides".to_string(),
3062
- parent_id: Some("dir-docs".to_string()),
3063
- name: "guides".to_string(),
3064
- branch_id: "branch-a".to_string(),
3065
- };
3066
- let mut records = BTreeMap::new();
3067
- records.insert(root.id.clone(), &root);
3068
- records.insert(child.id.clone(), &child);
3069
- let mut paths = BTreeMap::new();
3070
-
3071
- assert_eq!(
3072
- derive_directory_path_for(
3073
- "branch-a",
3074
- "dir-guides",
3075
- &records,
3076
- &mut paths,
3077
- &mut BTreeSet::new()
3078
- )
3079
- .expect("path derivation should succeed"),
3080
- Some("/docs/guides/".to_string())
3081
- );
3082
- }
3083
-
3084
- #[tokio::test]
3085
- async fn file_projection_rejects_unresolved_non_root_directory_id() {
3086
- let blob_reader = Arc::new(CapturingWriteContext::default()) as Arc<dyn BlobDataReader>;
3087
- let error = super::lix_file_record_batch(
3088
- &super::lix_file_schema(),
3089
- &blob_reader,
3090
- vec![live_file_row(
3091
- "file-readme",
3092
- "branch-b",
3093
- "{\"id\":\"file-readme\",\"directory_id\":\"missing-dir\",\"name\":\"readme.md\",\"hidden\":false}",
3094
- )],
3095
- )
3096
- .await
3097
- .expect_err("unresolved non-root directory_id should not project as root path");
3098
-
3099
- assert_eq!(error.code, LixError::CODE_FOREIGN_KEY);
3100
- assert!(error.message.contains("missing-dir"));
3101
- }
3102
-
3103
- #[test]
3104
- fn decodes_file_insert_into_lix_state_write_row() {
3105
- let batch = file_insert_batch(true, false);
3106
-
3107
- let rows = lix_file_write_rows_from_batch(&batch, None).expect("decode file insert");
3108
-
3109
- assert_eq!(rows.len(), 1);
3110
- assert_eq!(
3111
- rows[0].entity_pk.as_ref(),
3112
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3113
- );
3114
- assert_eq!(rows[0].schema_key, "lix_file_descriptor");
3115
- assert_eq!(rows[0].branch_id, "branch-b");
3116
- assert_eq!(
3117
- rows[0].metadata.as_ref(),
3118
- Some(&TransactionJson::from_value_for_test(
3119
- serde_json::json!({"source": "file"})
3120
- ))
3121
- );
3122
- let snapshot = rows[0].snapshot.as_ref().expect("descriptor snapshot JSON");
3123
- assert_eq!(snapshot["id"], "file-readme");
3124
- assert_eq!(snapshot["directory_id"], "dir-docs");
3125
- assert_eq!(snapshot["name"], "readme.md");
3126
- assert_eq!(snapshot["hidden"], false);
3127
- }
3128
-
3129
- #[test]
3130
- fn active_file_insert_defaults_branch_id() {
3131
- let batch = file_insert_batch(false, false);
3132
-
3133
- let rows =
3134
- lix_file_write_rows_from_batch(&batch, Some("branch-a")).expect("decode file insert");
3135
-
3136
- assert_eq!(rows.len(), 1);
3137
- assert_eq!(rows[0].branch_id, "branch-a");
3138
- }
3139
-
3140
- #[test]
3141
- fn by_branch_file_insert_requires_branch_id_for_non_global_rows() {
3142
- let batch = file_insert_batch(false, false);
3143
-
3144
- let error =
3145
- lix_file_write_rows_from_batch(&batch, None).expect_err("branch id is required");
3146
-
3147
- assert!(
3148
- error.to_string().contains("requires lixcol_branch_id"),
3149
- "unexpected error: {error}"
3150
- );
3151
- }
3152
-
3153
- #[test]
3154
- fn file_insert_rejects_global_with_non_global_branch_id() {
3155
- let error = lix_file_write_rows_from_batch(&file_insert_batch(true, true), None)
3156
- .expect_err("global file write should reject conflicting branch id");
3157
-
3158
- assert!(
3159
- error
3160
- .to_string()
3161
- .contains("cannot set lixcol_global=true with non-global lixcol_branch_id"),
3162
- "unexpected error: {error}"
3163
- );
3164
- }
3165
-
3166
- #[test]
3167
- fn file_update_accepts_path_assignment() {
3168
- super::validate_lix_file_update_assignments(
3169
- &super::lix_file_schema(),
3170
- &[("path".to_string(), lit("/docs/renamed.md"))],
3171
- )
3172
- .expect("path should be writable for update");
3173
- }
3174
-
3175
- #[test]
3176
- fn file_path_update_stages_descriptor_from_new_path() {
3177
- let mut resolvers = BTreeMap::new();
3178
- resolvers.insert(
3179
- super::filesystem_storage_scope_key("branch-b", false, false, None),
3180
- super::DirectoryPathResolver::from_existing([(
3181
- "/docs/".to_string(),
3182
- "dir-docs".to_string(),
3183
- )])
3184
- .expect("directory resolver should seed"),
3185
- );
3186
-
3187
- let staged = lix_file_update_stage_from_batch_for_test(
3188
- &path_update_batch(),
3189
- None,
3190
- super::LixFileUpdateColumns {
3191
- path: true,
3192
- data: false,
3193
- descriptor: false,
3194
- },
3195
- Some(&mut resolvers),
3196
- &mut test_id_generator(&["should-not-be-used"]),
3197
- )
3198
- .expect("decode file path update");
3199
-
3200
- assert_eq!(staged.count, 1);
3201
- assert_eq!(staged.file_data_writes.len(), 0);
3202
- assert_eq!(staged.state_rows.len(), 1);
3203
- let descriptor = staged
3204
- .state_rows
3205
- .iter()
3206
- .find(|row| row.schema_key == "lix_file_descriptor")
3207
- .expect("file descriptor row should be staged");
3208
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3209
- assert_eq!(snapshot["id"], "file-readme");
3210
- assert_eq!(snapshot["directory_id"], "dir-docs");
3211
- assert_eq!(snapshot["name"], "renamed.md");
3212
- assert_eq!(snapshot["hidden"], false);
3213
- }
3214
-
3215
- #[test]
3216
- fn file_path_update_preserves_existing_data_unless_data_is_assigned() {
3217
- let mut resolvers = BTreeMap::new();
3218
- resolvers.insert(
3219
- super::filesystem_storage_scope_key("branch-b", false, false, None),
3220
- super::DirectoryPathResolver::from_existing([(
3221
- "/docs/".to_string(),
3222
- "dir-docs".to_string(),
3223
- )])
3224
- .expect("directory resolver should seed"),
3225
- );
3226
-
3227
- let staged = lix_file_update_stage_from_batch_for_test(
3228
- &path_update_batch(),
3229
- None,
3230
- super::LixFileUpdateColumns {
3231
- path: true,
3232
- data: false,
3233
- descriptor: false,
3234
- },
3235
- Some(&mut resolvers),
3236
- &mut test_id_generator(&["should-not-be-used"]),
3237
- )
3238
- .expect("decode file path update");
3239
-
3240
- assert!(
3241
- staged.file_data_writes.is_empty(),
3242
- "path-only update should not rewrite file data"
3243
- );
3244
- assert!(
3245
- staged
3246
- .state_rows
3247
- .iter()
3248
- .all(|row| row.schema_key != "lix_binary_blob_ref"),
3249
- "path-only update should not rewrite the blob ref"
3250
- );
3251
- }
3252
-
3253
- #[tokio::test]
3254
- async fn file_path_update_seeds_resolver_from_visible_directory_state() {
3255
- let mut resolvers = super::file_path_resolvers_from_live_state(
3256
- Arc::new(RowsLiveStateReader {
3257
- rows: vec![live_directory_row(
3258
- "dir-docs",
3259
- "branch-b",
3260
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\"}",
3261
- )],
3262
- }) as Arc<dyn LiveStateReader>,
3263
- Some("branch-b"),
3264
- )
3265
- .await
3266
- .expect("directory state should seed path resolver");
3267
-
3268
- let staged = lix_file_update_stage_from_batch_for_test(
3269
- &path_update_batch(),
3270
- None,
3271
- super::LixFileUpdateColumns {
3272
- path: true,
3273
- data: false,
3274
- descriptor: false,
3275
- },
3276
- Some(&mut resolvers),
3277
- &mut test_id_generator(&["should-not-be-used"]),
3278
- )
3279
- .expect("decode file path update");
3280
-
3281
- assert_eq!(staged.count, 1);
3282
- assert_eq!(staged.state_rows.len(), 1);
3283
- assert!(staged
3284
- .state_rows
3285
- .iter()
3286
- .all(|row| row.schema_key != "lix_directory_descriptor"));
3287
-
3288
- let snapshot: JsonValue = staged.state_rows[0]
3289
- .snapshot
3290
- .as_ref()
3291
- .unwrap()
3292
- .value()
3293
- .clone();
3294
- assert_eq!(snapshot["directory_id"], "dir-docs");
3295
- assert_eq!(snapshot["name"], "renamed.md");
3296
- }
3297
-
3298
- #[tokio::test]
3299
- async fn file_path_update_stages_only_missing_parent_directories() {
3300
- let mut resolvers = super::file_path_resolvers_from_live_state(
3301
- Arc::new(RowsLiveStateReader::default()) as Arc<dyn LiveStateReader>,
3302
- Some("branch-b"),
3303
- )
3304
- .await
3305
- .expect("empty directory state should seed path resolver");
3306
-
3307
- let staged = lix_file_update_stage_from_batch_for_test(
3308
- &path_update_batch(),
3309
- None,
3310
- super::LixFileUpdateColumns {
3311
- path: true,
3312
- data: false,
3313
- descriptor: false,
3314
- },
3315
- Some(&mut resolvers),
3316
- &mut test_id_generator(&["dir-generated-docs"]),
3317
- )
3318
- .expect("decode file path update");
3319
-
3320
- assert_eq!(staged.count, 1);
3321
- assert_eq!(staged.state_rows.len(), 2);
3322
- assert_eq!(
3323
- staged
3324
- .state_rows
3325
- .iter()
3326
- .filter(|row| row.schema_key == "lix_directory_descriptor")
3327
- .count(),
3328
- 1
3329
- );
3330
-
3331
- let directory = staged
3332
- .state_rows
3333
- .iter()
3334
- .find(|row| row.schema_key == "lix_directory_descriptor")
3335
- .expect("missing /docs/ directory should be staged");
3336
- assert_eq!(
3337
- directory.entity_pk.as_ref(),
3338
- Some(&crate::entity_pk::EntityPk::single("dir-generated-docs"))
3339
- );
3340
-
3341
- let descriptor = staged
3342
- .state_rows
3343
- .iter()
3344
- .find(|row| row.schema_key == "lix_file_descriptor")
3345
- .expect("file descriptor should be staged");
3346
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3347
- assert_eq!(snapshot["directory_id"], "dir-generated-docs");
3348
- }
3349
-
3350
- #[test]
3351
- fn file_path_update_with_data_assignment_stages_blob_ref_and_payload() {
3352
- let mut resolvers = BTreeMap::new();
3353
- resolvers.insert(
3354
- super::filesystem_storage_scope_key("branch-b", false, false, None),
3355
- super::DirectoryPathResolver::from_existing([(
3356
- "/docs/".to_string(),
3357
- "dir-docs".to_string(),
3358
- )])
3359
- .expect("directory resolver should seed"),
3360
- );
3361
-
3362
- let staged = lix_file_update_stage_from_batch_for_test(
3363
- &path_update_batch(),
3364
- None,
3365
- super::LixFileUpdateColumns {
3366
- path: true,
3367
- data: true,
3368
- descriptor: false,
3369
- },
3370
- Some(&mut resolvers),
3371
- &mut test_id_generator(&["should-not-be-used"]),
3372
- )
3373
- .expect("decode file path and data update");
3374
-
3375
- assert_eq!(staged.count, 1);
3376
- assert_eq!(staged.file_data_writes.len(), 1);
3377
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3378
- assert_eq!(staged.file_data_writes[0].data, b"hello");
3379
- assert!(staged
3380
- .state_rows
3381
- .iter()
3382
- .any(|row| row.schema_key == "lix_file_descriptor"));
3383
- assert!(staged
3384
- .state_rows
3385
- .iter()
3386
- .any(|row| row.schema_key == "lix_binary_blob_ref"));
3387
- }
3388
-
3389
- #[test]
3390
- fn file_data_update_without_path_ignores_materialized_path_column() {
3391
- let staged = lix_file_update_stage_from_batch_for_test(
3392
- &path_update_batch(),
3393
- None,
3394
- super::LixFileUpdateColumns {
3395
- path: false,
3396
- data: true,
3397
- descriptor: false,
3398
- },
3399
- None,
3400
- &mut test_id_generator(&["should-not-be-used"]),
3401
- )
3402
- .expect("decode file data update");
3403
-
3404
- assert_eq!(staged.count, 1);
3405
- assert_eq!(staged.file_data_writes.len(), 1);
3406
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3407
- assert_eq!(staged.state_rows.len(), 1);
3408
- assert_eq!(staged.state_rows[0].schema_key, "lix_binary_blob_ref");
3409
- }
3410
-
3411
- #[test]
3412
- fn file_insert_stages_non_null_data() {
3413
- let batch = data_insert_batch();
3414
-
3415
- let staged = lix_file_insert_stage_from_batch(&batch, None).expect("decode file data");
3416
-
3417
- assert_eq!(staged.count, 1);
3418
- assert_eq!(staged.state_rows.len(), 2);
3419
- assert!(staged
3420
- .state_rows
3421
- .iter()
3422
- .any(|row| row.schema_key == "lix_file_descriptor"));
3423
- let blob_ref_row = staged
3424
- .state_rows
3425
- .iter()
3426
- .find(|row| row.schema_key == "lix_binary_blob_ref")
3427
- .expect("data insert should stage blob ref row");
3428
- assert_eq!(
3429
- blob_ref_row.entity_pk.as_ref(),
3430
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3431
- );
3432
- assert_eq!(blob_ref_row.file_id.as_deref(), Some("file-readme"));
3433
- assert_eq!(staged.file_data_writes.len(), 1);
3434
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3435
- assert_eq!(staged.file_data_writes[0].branch_id, "branch-b");
3436
- assert_eq!(staged.file_data_writes[0].data, b"hello");
3437
- }
3438
-
3439
- #[test]
3440
- fn file_delete_with_blob_ref_stages_descriptor_and_blob_ref_tombstones() {
3441
- let batch = file_delete_batch();
3442
- let staged = lix_file_delete_stage_from_batch(
3443
- &batch,
3444
- None,
3445
- &BTreeSet::from(["file-readme".to_string()]),
3446
- )
3447
- .expect("decode file delete");
3448
-
3449
- assert_eq!(staged.count, 1);
3450
- assert_eq!(staged.state_rows.len(), 2);
3451
- let descriptor = staged
3452
- .state_rows
3453
- .iter()
3454
- .find(|row| row.schema_key == "lix_file_descriptor")
3455
- .expect("file descriptor tombstone should be staged");
3456
- assert_eq!(
3457
- descriptor.entity_pk.as_ref(),
3458
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3459
- );
3460
- assert_eq!(descriptor.file_id, None);
3461
- assert_eq!(descriptor.snapshot, None);
3462
-
3463
- let blob_ref = staged
3464
- .state_rows
3465
- .iter()
3466
- .find(|row| row.schema_key == "lix_binary_blob_ref")
3467
- .expect("blob ref tombstone should be staged");
3468
- assert_eq!(
3469
- blob_ref.entity_pk.as_ref(),
3470
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3471
- );
3472
- assert_eq!(blob_ref.file_id.as_deref(), Some("file-readme"));
3473
- assert_eq!(blob_ref.snapshot, None);
3474
- }
3475
-
3476
- #[test]
3477
- fn file_delete_without_blob_ref_stages_only_descriptor_tombstone() {
3478
- let batch = file_delete_batch();
3479
- let staged = lix_file_delete_stage_from_batch(&batch, None, &BTreeSet::new())
3480
- .expect("decode file delete");
3481
-
3482
- assert_eq!(staged.count, 1);
3483
- assert_eq!(staged.state_rows.len(), 1);
3484
- assert_eq!(staged.state_rows[0].schema_key, "lix_file_descriptor");
3485
- assert_eq!(
3486
- staged.state_rows[0].entity_pk.as_ref(),
3487
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3488
- );
3489
- assert_eq!(staged.state_rows[0].snapshot, None);
3490
- }
3491
-
3492
- #[test]
3493
- fn file_path_insert_reuses_existing_parent_directory() {
3494
- let mut resolvers = BTreeMap::new();
3495
- resolvers.insert(
3496
- super::filesystem_storage_scope_key("branch-b", false, false, None),
3497
- super::DirectoryPathResolver::from_existing([
3498
- ("/docs/".to_string(), "dir-docs".to_string()),
3499
- ("/docs/guides/".to_string(), "dir-guides".to_string()),
3500
- ])
3501
- .expect("directory resolver should seed"),
3502
- );
3503
-
3504
- let staged = lix_file_insert_stage_from_batch_with_path_resolvers(
3505
- &path_data_insert_batch(),
3506
- None,
3507
- "lix_file",
3508
- &mut resolvers,
3509
- &mut test_id_generator(&["should-not-be-used"]),
3510
- true,
3511
- )
3512
- .expect("decode file path data");
3513
-
3514
- assert_eq!(staged.count, 1);
3515
- assert_eq!(staged.file_data_writes.len(), 1);
3516
- assert_eq!(staged.file_data_writes[0].file_id, "file-readme");
3517
- assert_eq!(staged.state_rows.len(), 2);
3518
- let descriptor = staged
3519
- .state_rows
3520
- .iter()
3521
- .find(|row| row.schema_key == "lix_file_descriptor")
3522
- .expect("file descriptor row should be staged");
3523
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3524
- assert_eq!(snapshot["id"], "file-readme");
3525
- assert_eq!(snapshot["directory_id"], "dir-guides");
3526
- assert_eq!(snapshot["name"], "readme.md");
3527
- }
3528
-
3529
- #[test]
3530
- fn file_path_insert_stages_missing_parent_directories_once() {
3531
- let mut resolvers = BTreeMap::new();
3532
-
3533
- let staged = lix_file_insert_stage_from_batch_with_path_resolvers(
3534
- &path_data_insert_batch(),
3535
- None,
3536
- "lix_file",
3537
- &mut resolvers,
3538
- &mut test_id_generator(&["dir-generated-docs", "dir-generated-guides"]),
3539
- true,
3540
- )
3541
- .expect("decode file path data");
3542
-
3543
- assert_eq!(staged.count, 1);
3544
- assert_eq!(staged.state_rows.len(), 4);
3545
- let directory_rows = staged
3546
- .state_rows
3547
- .iter()
3548
- .filter(|row| row.schema_key == "lix_directory_descriptor")
3549
- .collect::<Vec<_>>();
3550
- assert_eq!(directory_rows.len(), 2);
3551
-
3552
- let descriptor = staged
3553
- .state_rows
3554
- .iter()
3555
- .find(|row| row.schema_key == "lix_file_descriptor")
3556
- .expect("file descriptor row should be staged");
3557
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3558
- assert_eq!(snapshot["directory_id"], "dir-generated-guides");
3559
- }
3560
-
3561
- #[tokio::test]
3562
- async fn file_insert_sink_stages_decoded_lix_state_rows() {
3563
- let batch = file_insert_batch(true, false);
3564
- let mut write_context = CapturingWriteContext::default();
3565
- let write_ctx = SqlWriteContext::new(&mut write_context);
3566
- let sink = LixFileInsertSink::new(
3567
- write_ctx,
3568
- test_functions(),
3569
- BranchBinding::explicit(),
3570
- false,
3571
- );
3572
-
3573
- let count = sink
3574
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3575
- .await
3576
- .expect("file insert sink should stage");
3577
-
3578
- assert_eq!(count, 1);
3579
- let writes = &write_context.writes;
3580
- assert_eq!(writes.len(), 1);
3581
- match &writes[0] {
3582
- TransactionWrite::Rows { mode, rows } => {
3583
- assert_eq!(*mode, TransactionWriteMode::Insert);
3584
- assert_eq!(rows.len(), 1);
3585
- assert_eq!(
3586
- rows[0].entity_pk.as_ref(),
3587
- Some(&crate::entity_pk::EntityPk::single("file-readme"))
3588
- );
3589
- assert_eq!(rows[0].schema_key, "lix_file_descriptor");
3590
- }
3591
- other => panic!("expected insert staged write, got {other:?}"),
3592
- }
3593
- }
3594
-
3595
- #[tokio::test]
3596
- async fn file_insert_sink_stages_file_data_writes() {
3597
- let batch = data_insert_batch();
3598
- let mut write_context = CapturingWriteContext::default();
3599
- let write_ctx = SqlWriteContext::new(&mut write_context);
3600
- let sink =
3601
- LixFileInsertSink::new(write_ctx, test_functions(), BranchBinding::explicit(), true);
3602
-
3603
- let count = sink
3604
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3605
- .await
3606
- .expect("file insert sink should stage data");
3607
-
3608
- assert_eq!(count, 1);
3609
- let writes = &write_context.writes;
3610
- assert_eq!(writes.len(), 1);
3611
- match &writes[0] {
3612
- TransactionWrite::RowsWithFileData {
3613
- mode,
3614
- rows,
3615
- file_data,
3616
- count,
3617
- ..
3618
- } => {
3619
- assert_eq!(*mode, TransactionWriteMode::Insert);
3620
- assert_eq!(*count, 1);
3621
- assert_eq!(rows.len(), 2);
3622
- assert!(rows
3623
- .iter()
3624
- .any(|row| row.schema_key == "lix_file_descriptor"));
3625
- assert!(rows
3626
- .iter()
3627
- .any(|row| row.schema_key == "lix_binary_blob_ref"));
3628
- assert_eq!(file_data.len(), 1);
3629
- assert_eq!(file_data[0].file_id, "file-readme");
3630
- assert_eq!(file_data[0].data, b"hello");
3631
- }
3632
- other => panic!("expected insert with file data staged write, got {other:?}"),
3633
- }
3634
- }
3635
-
3636
- #[tokio::test]
3637
- async fn file_insert_sink_seeds_path_resolver_from_live_state() {
3638
- let batch = path_data_insert_batch();
3639
- let mut write_context = CapturingWriteContext {
3640
- rows: vec![
3641
- live_directory_row(
3642
- "dir-docs",
3643
- "branch-b",
3644
- "{\"id\":\"dir-docs\",\"parent_id\":null,\"name\":\"docs\"}",
3645
- ),
3646
- live_directory_row(
3647
- "dir-guides",
3648
- "branch-b",
3649
- "{\"id\":\"dir-guides\",\"parent_id\":\"dir-docs\",\"name\":\"guides\"}",
3650
- ),
3651
- ],
3652
- writes: Vec::new(),
3653
- };
3654
- let write_ctx = SqlWriteContext::new(&mut write_context);
3655
- let sink =
3656
- LixFileInsertSink::new(write_ctx, test_functions(), BranchBinding::explicit(), true);
3657
-
3658
- let count = sink
3659
- .write_batches(vec![batch], &Arc::new(TaskContext::default()))
3660
- .await
3661
- .expect("file insert sink should stage path data");
3662
-
3663
- assert_eq!(count, 1);
3664
- let writes = &write_context.writes;
3665
- assert_eq!(writes.len(), 1);
3666
- match &writes[0] {
3667
- TransactionWrite::RowsWithFileData {
3668
- rows,
3669
- file_data,
3670
- count,
3671
- ..
3672
- } => {
3673
- assert_eq!(*count, 1);
3674
- assert_eq!(file_data.len(), 1);
3675
- assert_eq!(file_data[0].file_id, "file-readme");
3676
- let descriptor = rows
3677
- .iter()
3678
- .find(|row| row.schema_key == "lix_file_descriptor")
3679
- .expect("file descriptor row should be staged");
3680
- let snapshot: JsonValue = descriptor.snapshot.as_ref().unwrap().value().clone();
3681
- assert_eq!(snapshot["directory_id"], "dir-guides");
3682
- }
3683
- other => panic!("expected insert with file data staged write, got {other:?}"),
3684
- }
3685
- }
3686
- }