@lix-js/sdk 0.6.0-preview.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +76 -4
  2. package/dist/errors.d.ts +7 -0
  3. package/dist/errors.js +19 -0
  4. package/dist/index.d.ts +4 -5
  5. package/dist/index.js +3 -3
  6. package/dist/native.d.ts +1 -0
  7. package/dist/native.js +47 -0
  8. package/dist/open-lix.d.ts +38 -207
  9. package/dist/open-lix.js +59 -284
  10. package/dist/result.d.ts +18 -0
  11. package/dist/result.js +48 -0
  12. package/dist/types.d.ts +114 -1
  13. package/dist/value.d.ts +28 -0
  14. package/dist/value.js +245 -0
  15. package/package.json +38 -71
  16. package/SKILL.md +0 -507
  17. package/dist/builtin-schemas.d.ts +0 -1
  18. package/dist/builtin-schemas.js +0 -1
  19. package/dist/engine-wasm/index.d.ts +0 -87
  20. package/dist/engine-wasm/index.js +0 -339
  21. package/dist/engine-wasm/wasm/lix_engine.d.ts +0 -79
  22. package/dist/engine-wasm/wasm/lix_engine.js +0 -833
  23. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  24. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +0 -27
  25. package/dist/generated/builtin-schemas.d.ts +0 -427
  26. package/dist/generated/builtin-schemas.js +0 -643
  27. package/dist/sqlite/index.d.ts +0 -12
  28. package/dist/sqlite/index.js +0 -359
  29. package/dist-engine-src/README.md +0 -18
  30. package/dist-engine-src/src/backend/capabilities.rs +0 -67
  31. package/dist-engine-src/src/backend/conformance/baseline.rs +0 -1127
  32. package/dist-engine-src/src/backend/conformance/factory.rs +0 -93
  33. package/dist-engine-src/src/backend/conformance/failure_tests.rs +0 -608
  34. package/dist-engine-src/src/backend/conformance/fixtures.rs +0 -26
  35. package/dist-engine-src/src/backend/conformance/mod.rs +0 -75
  36. package/dist-engine-src/src/backend/conformance/model.rs +0 -28
  37. package/dist-engine-src/src/backend/conformance/model_based.rs +0 -257
  38. package/dist-engine-src/src/backend/conformance/persistence.rs +0 -204
  39. package/dist-engine-src/src/backend/conformance/projection.rs +0 -21
  40. package/dist-engine-src/src/backend/conformance/pushdown.rs +0 -24
  41. package/dist-engine-src/src/backend/conformance/runner.rs +0 -90
  42. package/dist-engine-src/src/backend/conformance/scan.rs +0 -24
  43. package/dist-engine-src/src/backend/conformance/write.rs +0 -16
  44. package/dist-engine-src/src/backend/error.rs +0 -94
  45. package/dist-engine-src/src/backend/in_memory.rs +0 -670
  46. package/dist-engine-src/src/backend/mod.rs +0 -39
  47. package/dist-engine-src/src/backend/predicate.rs +0 -80
  48. package/dist-engine-src/src/backend/traits.rs +0 -260
  49. package/dist-engine-src/src/backend/types.rs +0 -239
  50. package/dist-engine-src/src/binary_cas/chunking.rs +0 -31
  51. package/dist-engine-src/src/binary_cas/codec.rs +0 -346
  52. package/dist-engine-src/src/binary_cas/context.rs +0 -139
  53. package/dist-engine-src/src/binary_cas/kv.rs +0 -1038
  54. package/dist-engine-src/src/binary_cas/mod.rs +0 -11
  55. package/dist-engine-src/src/binary_cas/types.rs +0 -121
  56. package/dist-engine-src/src/branch/context.rs +0 -40
  57. package/dist-engine-src/src/branch/lifecycle.rs +0 -221
  58. package/dist-engine-src/src/branch/mod.rs +0 -13
  59. package/dist-engine-src/src/branch/refs.rs +0 -321
  60. package/dist-engine-src/src/branch/stage_rows.rs +0 -67
  61. package/dist-engine-src/src/branch/types.rs +0 -21
  62. package/dist-engine-src/src/catalog/context.rs +0 -412
  63. package/dist-engine-src/src/catalog/mod.rs +0 -10
  64. package/dist-engine-src/src/catalog/schema.rs +0 -4
  65. package/dist-engine-src/src/catalog/snapshot.rs +0 -1114
  66. package/dist-engine-src/src/cel/context.rs +0 -86
  67. package/dist-engine-src/src/cel/error.rs +0 -19
  68. package/dist-engine-src/src/cel/mod.rs +0 -8
  69. package/dist-engine-src/src/cel/provider.rs +0 -9
  70. package/dist-engine-src/src/cel/runtime.rs +0 -167
  71. package/dist-engine-src/src/cel/value.rs +0 -50
  72. package/dist-engine-src/src/changelog/bench_support.rs +0 -785
  73. package/dist-engine-src/src/changelog/change.rs +0 -1
  74. package/dist-engine-src/src/changelog/codec.rs +0 -497
  75. package/dist-engine-src/src/changelog/commit.rs +0 -1
  76. package/dist-engine-src/src/changelog/context.rs +0 -1614
  77. package/dist-engine-src/src/changelog/mod.rs +0 -29
  78. package/dist-engine-src/src/changelog/store.rs +0 -163
  79. package/dist-engine-src/src/changelog/test_support.rs +0 -54
  80. package/dist-engine-src/src/changelog/types.rs +0 -213
  81. package/dist-engine-src/src/commit_graph/context.rs +0 -944
  82. package/dist-engine-src/src/commit_graph/mod.rs +0 -9
  83. package/dist-engine-src/src/commit_graph/types.rs +0 -89
  84. package/dist-engine-src/src/commit_graph/walker.rs +0 -786
  85. package/dist-engine-src/src/common/error.rs +0 -347
  86. package/dist-engine-src/src/common/fingerprint.rs +0 -3
  87. package/dist-engine-src/src/common/fs_path.rs +0 -1336
  88. package/dist-engine-src/src/common/identity.rs +0 -145
  89. package/dist-engine-src/src/common/json_pointer.rs +0 -67
  90. package/dist-engine-src/src/common/metadata.rs +0 -40
  91. package/dist-engine-src/src/common/mod.rs +0 -23
  92. package/dist-engine-src/src/common/types.rs +0 -105
  93. package/dist-engine-src/src/common/wire.rs +0 -222
  94. package/dist-engine-src/src/domain.rs +0 -320
  95. package/dist-engine-src/src/engine.rs +0 -203
  96. package/dist-engine-src/src/entity_pk.rs +0 -402
  97. package/dist-engine-src/src/functions/context.rs +0 -296
  98. package/dist-engine-src/src/functions/deterministic.rs +0 -113
  99. package/dist-engine-src/src/functions/mod.rs +0 -18
  100. package/dist-engine-src/src/functions/provider.rs +0 -130
  101. package/dist-engine-src/src/functions/state.rs +0 -335
  102. package/dist-engine-src/src/functions/types.rs +0 -37
  103. package/dist-engine-src/src/init.rs +0 -692
  104. package/dist-engine-src/src/json_store/compression.rs +0 -77
  105. package/dist-engine-src/src/json_store/context.rs +0 -172
  106. package/dist-engine-src/src/json_store/encoded.rs +0 -15
  107. package/dist-engine-src/src/json_store/mod.rs +0 -38
  108. package/dist-engine-src/src/json_store/store.rs +0 -494
  109. package/dist-engine-src/src/json_store/types.rs +0 -212
  110. package/dist-engine-src/src/lib.rs +0 -92
  111. package/dist-engine-src/src/live_state/context.rs +0 -1883
  112. package/dist-engine-src/src/live_state/mod.rs +0 -21
  113. package/dist-engine-src/src/live_state/overlay.rs +0 -75
  114. package/dist-engine-src/src/live_state/reader.rs +0 -23
  115. package/dist-engine-src/src/live_state/types.rs +0 -231
  116. package/dist-engine-src/src/live_state/visibility.rs +0 -666
  117. package/dist-engine-src/src/plugin/archive.rs +0 -438
  118. package/dist-engine-src/src/plugin/component.rs +0 -183
  119. package/dist-engine-src/src/plugin/install.rs +0 -619
  120. package/dist-engine-src/src/plugin/manifest.rs +0 -516
  121. package/dist-engine-src/src/plugin/materializer.rs +0 -202
  122. package/dist-engine-src/src/plugin/mod.rs +0 -33
  123. package/dist-engine-src/src/plugin/plugin_manifest.json +0 -119
  124. package/dist-engine-src/src/plugin/storage.rs +0 -74
  125. package/dist-engine-src/src/schema/annotations/defaults.rs +0 -275
  126. package/dist-engine-src/src/schema/annotations/mod.rs +0 -1
  127. package/dist-engine-src/src/schema/builtin/lix_account.json +0 -21
  128. package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -29
  129. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -29
  130. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +0 -34
  131. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +0 -48
  132. package/dist-engine-src/src/schema/builtin/lix_change.json +0 -63
  133. package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -45
  134. package/dist-engine-src/src/schema/builtin/lix_commit.json +0 -24
  135. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +0 -53
  136. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -52
  137. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -52
  138. package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -40
  139. package/dist-engine-src/src/schema/builtin/lix_label.json +0 -29
  140. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +0 -74
  141. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +0 -25
  142. package/dist-engine-src/src/schema/builtin/mod.rs +0 -220
  143. package/dist-engine-src/src/schema/compatibility.rs +0 -787
  144. package/dist-engine-src/src/schema/definition.json +0 -187
  145. package/dist-engine-src/src/schema/definition.rs +0 -742
  146. package/dist-engine-src/src/schema/key.rs +0 -138
  147. package/dist-engine-src/src/schema/mod.rs +0 -20
  148. package/dist-engine-src/src/schema/seed.rs +0 -14
  149. package/dist-engine-src/src/schema/tests.rs +0 -780
  150. package/dist-engine-src/src/session/context.rs +0 -1059
  151. package/dist-engine-src/src/session/create_branch.rs +0 -94
  152. package/dist-engine-src/src/session/execute.rs +0 -681
  153. package/dist-engine-src/src/session/merge/analysis.rs +0 -108
  154. package/dist-engine-src/src/session/merge/branch.rs +0 -417
  155. package/dist-engine-src/src/session/merge/conflicts.rs +0 -63
  156. package/dist-engine-src/src/session/merge/mod.rs +0 -10
  157. package/dist-engine-src/src/session/merge/stats.rs +0 -61
  158. package/dist-engine-src/src/session/mod.rs +0 -30
  159. package/dist-engine-src/src/session/switch_branch.rs +0 -113
  160. package/dist-engine-src/src/session/transaction.rs +0 -557
  161. package/dist-engine-src/src/sql2/bind/classify.rs +0 -102
  162. package/dist-engine-src/src/sql2/bind/error.rs +0 -5
  163. package/dist-engine-src/src/sql2/bind/expr.rs +0 -29
  164. package/dist-engine-src/src/sql2/bind/mod.rs +0 -12
  165. package/dist-engine-src/src/sql2/bind/public_udf.rs +0 -306
  166. package/dist-engine-src/src/sql2/bind/read.rs +0 -65
  167. package/dist-engine-src/src/sql2/bind/statement.rs +0 -2236
  168. package/dist-engine-src/src/sql2/bind/table.rs +0 -273
  169. package/dist-engine-src/src/sql2/bind/write.rs +0 -86
  170. package/dist-engine-src/src/sql2/branch_scope.rs +0 -436
  171. package/dist-engine-src/src/sql2/catalog/capability.rs +0 -20
  172. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +0 -296
  173. package/dist-engine-src/src/sql2/catalog/mod.rs +0 -15
  174. package/dist-engine-src/src/sql2/catalog/registry.rs +0 -556
  175. package/dist-engine-src/src/sql2/catalog/schema.rs +0 -88
  176. package/dist-engine-src/src/sql2/catalog/surface.rs +0 -41
  177. package/dist-engine-src/src/sql2/change_materialization.rs +0 -122
  178. package/dist-engine-src/src/sql2/context.rs +0 -317
  179. package/dist-engine-src/src/sql2/dml.rs +0 -148
  180. package/dist-engine-src/src/sql2/error.rs +0 -215
  181. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +0 -1593
  182. package/dist-engine-src/src/sql2/exec/datafusion.rs +0 -5266
  183. package/dist-engine-src/src/sql2/exec/fast_write.rs +0 -82
  184. package/dist-engine-src/src/sql2/exec/mod.rs +0 -24
  185. package/dist-engine-src/src/sql2/exec/write.rs +0 -661
  186. package/dist-engine-src/src/sql2/filesystem_planner.rs +0 -1485
  187. package/dist-engine-src/src/sql2/filesystem_predicates.rs +0 -159
  188. package/dist-engine-src/src/sql2/filesystem_visibility.rs +0 -383
  189. package/dist-engine-src/src/sql2/history_projection.rs +0 -56
  190. package/dist-engine-src/src/sql2/history_route.rs +0 -661
  191. package/dist-engine-src/src/sql2/mod.rs +0 -52
  192. package/dist-engine-src/src/sql2/optimize/datafusion.rs +0 -1
  193. package/dist-engine-src/src/sql2/optimize/mod.rs +0 -2
  194. package/dist-engine-src/src/sql2/optimize/simple_write.rs +0 -116
  195. package/dist-engine-src/src/sql2/parse/mod.rs +0 -69
  196. package/dist-engine-src/src/sql2/parse/normalize.rs +0 -1
  197. package/dist-engine-src/src/sql2/plan/branch_scope.rs +0 -24
  198. package/dist-engine-src/src/sql2/plan/mod.rs +0 -5
  199. package/dist-engine-src/src/sql2/plan/predicate.rs +0 -22
  200. package/dist-engine-src/src/sql2/plan/write.rs +0 -147
  201. package/dist-engine-src/src/sql2/predicate_typecheck.rs +0 -504
  202. package/dist-engine-src/src/sql2/providers/branch.rs +0 -1206
  203. package/dist-engine-src/src/sql2/providers/change.rs +0 -445
  204. package/dist-engine-src/src/sql2/providers/directory.rs +0 -2422
  205. package/dist-engine-src/src/sql2/providers/directory_history.rs +0 -645
  206. package/dist-engine-src/src/sql2/providers/entity.rs +0 -1484
  207. package/dist-engine-src/src/sql2/providers/entity_history.rs +0 -452
  208. package/dist-engine-src/src/sql2/providers/file.rs +0 -3686
  209. package/dist-engine-src/src/sql2/providers/file_history.rs +0 -924
  210. package/dist-engine-src/src/sql2/providers/history.rs +0 -426
  211. package/dist-engine-src/src/sql2/providers/lix_state.rs +0 -2542
  212. package/dist-engine-src/src/sql2/providers/mod.rs +0 -508
  213. package/dist-engine-src/src/sql2/read_only.rs +0 -63
  214. package/dist-engine-src/src/sql2/record_batch.rs +0 -17
  215. package/dist-engine-src/src/sql2/result_metadata.rs +0 -29
  216. package/dist-engine-src/src/sql2/runtime.rs +0 -60
  217. package/dist-engine-src/src/sql2/session.rs +0 -83
  218. package/dist-engine-src/src/sql2/storage/constraints.rs +0 -1
  219. package/dist-engine-src/src/sql2/storage/mod.rs +0 -1
  220. package/dist-engine-src/src/sql2/test_support/differential.rs +0 -712
  221. package/dist-engine-src/src/sql2/test_support/generators.rs +0 -354
  222. package/dist-engine-src/src/sql2/test_support/mod.rs +0 -2
  223. package/dist-engine-src/src/sql2/udfs/common.rs +0 -295
  224. package/dist-engine-src/src/sql2/udfs/lix_active_branch_commit_id.rs +0 -53
  225. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +0 -47
  226. package/dist-engine-src/src/sql2/udfs/lix_json.rs +0 -100
  227. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +0 -99
  228. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +0 -99
  229. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +0 -82
  230. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +0 -85
  231. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +0 -76
  232. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +0 -76
  233. package/dist-engine-src/src/sql2/udfs/mod.rs +0 -86
  234. package/dist-engine-src/src/sql2/write_normalization.rs +0 -368
  235. package/dist-engine-src/src/storage/conformance.rs +0 -399
  236. package/dist-engine-src/src/storage/context.rs +0 -620
  237. package/dist-engine-src/src/storage/mod.rs +0 -52
  238. package/dist-engine-src/src/storage/point.rs +0 -440
  239. package/dist-engine-src/src/storage/read_scope.rs +0 -67
  240. package/dist-engine-src/src/storage/reader.rs +0 -867
  241. package/dist-engine-src/src/storage/scan.rs +0 -784
  242. package/dist-engine-src/src/storage/spaces.rs +0 -236
  243. package/dist-engine-src/src/storage/stats.rs +0 -80
  244. package/dist-engine-src/src/storage/write_set.rs +0 -962
  245. package/dist-engine-src/src/storage_bench.rs +0 -171
  246. package/dist-engine-src/src/test_support.rs +0 -450
  247. package/dist-engine-src/src/tracked_state/bench_support.rs +0 -394
  248. package/dist-engine-src/src/tracked_state/codec.rs +0 -1183
  249. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +0 -358
  250. package/dist-engine-src/src/tracked_state/context.rs +0 -2801
  251. package/dist-engine-src/src/tracked_state/diff.rs +0 -2140
  252. package/dist-engine-src/src/tracked_state/merge.rs +0 -478
  253. package/dist-engine-src/src/tracked_state/mod.rs +0 -35
  254. package/dist-engine-src/src/tracked_state/row_materialization.rs +0 -275
  255. package/dist-engine-src/src/tracked_state/storage.rs +0 -427
  256. package/dist-engine-src/src/tracked_state/tree.rs +0 -3063
  257. package/dist-engine-src/src/tracked_state/types.rs +0 -238
  258. package/dist-engine-src/src/transaction/bench_support.rs +0 -407
  259. package/dist-engine-src/src/transaction/commit.rs +0 -1592
  260. package/dist-engine-src/src/transaction/context.rs +0 -1653
  261. package/dist-engine-src/src/transaction/mod.rs +0 -24
  262. package/dist-engine-src/src/transaction/normalization.rs +0 -877
  263. package/dist-engine-src/src/transaction/prep.rs +0 -37
  264. package/dist-engine-src/src/transaction/schema_resolver.rs +0 -163
  265. package/dist-engine-src/src/transaction/staging.rs +0 -1525
  266. package/dist-engine-src/src/transaction/types.rs +0 -403
  267. package/dist-engine-src/src/transaction/validation.rs +0 -5766
  268. package/dist-engine-src/src/untracked_state/codec.rs +0 -615
  269. package/dist-engine-src/src/untracked_state/context.rs +0 -98
  270. package/dist-engine-src/src/untracked_state/materialization.rs +0 -63
  271. package/dist-engine-src/src/untracked_state/mod.rs +0 -15
  272. package/dist-engine-src/src/untracked_state/storage.rs +0 -898
  273. package/dist-engine-src/src/untracked_state/types.rs +0 -146
  274. package/dist-engine-src/src/wasm/mod.rs +0 -60
@@ -1,924 +0,0 @@
1
- use std::any::Any;
2
- use std::collections::{BTreeMap, BTreeSet};
3
- use std::sync::Arc;
4
-
5
- use async_trait::async_trait;
6
- use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, Int64Array, StringArray};
7
- use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
- use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
- use datafusion::catalog::{Session, TableProvider};
10
- use datafusion::common::{DataFusionError, Result};
11
- use datafusion::datasource::TableType;
12
- use datafusion::execution::TaskContext;
13
- use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
- use datafusion::physical_expr::EquivalenceProperties;
15
- use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
- use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
- use datafusion::physical_plan::{
18
- DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
- };
20
- use futures_util::stream;
21
- use serde::Deserialize;
22
- use tokio::sync::Mutex;
23
-
24
- use crate::binary_cas::{BlobDataReader, BlobHash};
25
- use crate::commit_graph::CommitGraphReader;
26
- use crate::serialize_row_metadata;
27
- use crate::LixError;
28
-
29
- use crate::sql2::change_materialization::MaterializedChange;
30
- use crate::sql2::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
31
- use crate::sql2::history_route::{
32
- history_descriptor_event_matches, load_history_entries, parse_history_filter,
33
- HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
34
- HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_PK, HISTORY_COL_FILE_ID,
35
- HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
36
- HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
37
- };
38
- use crate::sql2::result_metadata::json_field;
39
- use crate::sql2::SqlHistoryQuerySource;
40
- use crate::storage::StorageRead;
41
-
42
- const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
43
- const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
44
- const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
45
-
46
- pub(super) async fn register_lix_file_history_surface<S>(
47
- session: &datafusion::prelude::SessionContext,
48
- surface_name: &str,
49
- commit_graph: Box<dyn CommitGraphReader>,
50
- query_source: SqlHistoryQuerySource<S>,
51
- blob_reader: Arc<dyn BlobDataReader>,
52
- ) -> Result<(), LixError>
53
- where
54
- S: StorageRead + Clone + Send + Sync + 'static,
55
- {
56
- session
57
- .register_table(
58
- surface_name,
59
- Arc::new(LixFileHistoryProvider::new(
60
- Arc::new(Mutex::new(commit_graph)),
61
- query_source,
62
- blob_reader,
63
- )),
64
- )
65
- .map_err(datafusion_error_to_lix_error)?;
66
- Ok(())
67
- }
68
-
69
- struct LixFileHistoryProvider<S> {
70
- schema: SchemaRef,
71
- commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
72
- query_source: SqlHistoryQuerySource<S>,
73
- blob_reader: Arc<dyn BlobDataReader>,
74
- }
75
-
76
- impl<S> std::fmt::Debug for LixFileHistoryProvider<S> {
77
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78
- f.debug_struct("LixFileHistoryProvider").finish()
79
- }
80
- }
81
-
82
- impl<S> LixFileHistoryProvider<S> {
83
- fn new(
84
- commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
85
- query_source: SqlHistoryQuerySource<S>,
86
- blob_reader: Arc<dyn BlobDataReader>,
87
- ) -> Self {
88
- Self {
89
- schema: lix_file_history_schema(),
90
- commit_graph,
91
- query_source,
92
- blob_reader,
93
- }
94
- }
95
- }
96
-
97
- #[async_trait]
98
- impl<S> TableProvider for LixFileHistoryProvider<S>
99
- where
100
- S: StorageRead + Clone + Send + Sync + 'static,
101
- {
102
- fn as_any(&self) -> &dyn Any {
103
- self
104
- }
105
-
106
- fn schema(&self) -> SchemaRef {
107
- Arc::clone(&self.schema)
108
- }
109
-
110
- fn table_type(&self) -> TableType {
111
- TableType::View
112
- }
113
-
114
- fn supports_filters_pushdown(
115
- &self,
116
- filters: &[&Expr],
117
- ) -> Result<Vec<TableProviderFilterPushDown>> {
118
- Ok(filters
119
- .iter()
120
- .map(|filter| {
121
- if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
122
- TableProviderFilterPushDown::Exact
123
- } else {
124
- TableProviderFilterPushDown::Unsupported
125
- }
126
- })
127
- .collect())
128
- }
129
-
130
- async fn scan(
131
- &self,
132
- _state: &dyn Session,
133
- projection: Option<&Vec<usize>>,
134
- filters: &[Expr],
135
- limit: Option<usize>,
136
- ) -> Result<Arc<dyn ExecutionPlan>> {
137
- let schema = projected_schema(&self.schema, projection)?;
138
- let needs_data = projection.is_none_or(|projection| {
139
- projection.iter().any(|index| {
140
- self.schema
141
- .field(*index)
142
- .name()
143
- .as_str()
144
- .eq_ignore_ascii_case("data")
145
- })
146
- });
147
- Ok(Arc::new(LixFileHistoryScanExec::new(
148
- Arc::clone(&self.commit_graph),
149
- self.query_source.clone(),
150
- Arc::clone(&self.blob_reader),
151
- schema,
152
- needs_data,
153
- HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
154
- limit,
155
- )))
156
- }
157
- }
158
-
159
- struct LixFileHistoryScanExec<S> {
160
- commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
161
- query_source: SqlHistoryQuerySource<S>,
162
- blob_reader: Arc<dyn BlobDataReader>,
163
- schema: SchemaRef,
164
- needs_data: bool,
165
- route: HistoryRoute,
166
- limit: Option<usize>,
167
- properties: Arc<PlanProperties>,
168
- }
169
-
170
- impl<S> std::fmt::Debug for LixFileHistoryScanExec<S> {
171
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172
- f.debug_struct("LixFileHistoryScanExec")
173
- .field("route", &self.route)
174
- .field("limit", &self.limit)
175
- .finish()
176
- }
177
- }
178
-
179
- impl<S> LixFileHistoryScanExec<S> {
180
- fn new(
181
- commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
182
- query_source: SqlHistoryQuerySource<S>,
183
- blob_reader: Arc<dyn BlobDataReader>,
184
- schema: SchemaRef,
185
- needs_data: bool,
186
- route: HistoryRoute,
187
- limit: Option<usize>,
188
- ) -> Self {
189
- let properties = PlanProperties::new(
190
- EquivalenceProperties::new(Arc::clone(&schema)),
191
- Partitioning::UnknownPartitioning(1),
192
- EmissionType::Incremental,
193
- Boundedness::Bounded,
194
- );
195
- Self {
196
- commit_graph,
197
- query_source,
198
- blob_reader,
199
- schema,
200
- needs_data,
201
- route,
202
- limit,
203
- properties: Arc::new(properties),
204
- }
205
- }
206
- }
207
-
208
- impl<S> DisplayAs for LixFileHistoryScanExec<S> {
209
- fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210
- match t {
211
- DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
212
- f,
213
- "LixFileHistoryScanExec(route={:?}, limit={:?})",
214
- self.route, self.limit
215
- ),
216
- DisplayFormatType::TreeRender => write!(f, "LixFileHistoryScanExec"),
217
- }
218
- }
219
- }
220
-
221
- impl<S> ExecutionPlan for LixFileHistoryScanExec<S>
222
- where
223
- S: StorageRead + Clone + Send + Sync + 'static,
224
- {
225
- fn name(&self) -> &str {
226
- "LixFileHistoryScanExec"
227
- }
228
-
229
- fn as_any(&self) -> &dyn Any {
230
- self
231
- }
232
-
233
- fn properties(&self) -> &Arc<PlanProperties> {
234
- &self.properties
235
- }
236
-
237
- fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
238
- Vec::new()
239
- }
240
-
241
- fn with_new_children(
242
- self: Arc<Self>,
243
- children: Vec<Arc<dyn ExecutionPlan>>,
244
- ) -> Result<Arc<dyn ExecutionPlan>> {
245
- if !children.is_empty() {
246
- return Err(DataFusionError::Execution(
247
- "LixFileHistoryScanExec does not accept children".to_string(),
248
- ));
249
- }
250
- Ok(self)
251
- }
252
-
253
- fn execute(
254
- &self,
255
- partition: usize,
256
- _context: Arc<TaskContext>,
257
- ) -> Result<SendableRecordBatchStream> {
258
- if partition != 0 {
259
- return Err(DataFusionError::Execution(format!(
260
- "LixFileHistoryScanExec only exposes one partition, got {partition}"
261
- )));
262
- }
263
-
264
- let commit_graph = Arc::clone(&self.commit_graph);
265
- let query_source = self.query_source.clone();
266
- let blob_reader = Arc::clone(&self.blob_reader);
267
- let schema = Arc::clone(&self.schema);
268
- let stream_schema = Arc::clone(&schema);
269
- let route = self.route.clone();
270
- let limit = self.limit;
271
- let needs_data = self.needs_data;
272
-
273
- let fut = async move {
274
- let mut rows = load_file_history_rows(
275
- commit_graph,
276
- query_source,
277
- &blob_reader,
278
- &route,
279
- needs_data,
280
- )
281
- .await
282
- .map_err(lix_error_to_datafusion_error)?;
283
- if let Some(limit) = limit {
284
- rows.truncate(limit);
285
- }
286
- file_history_record_batch(&stream_schema, &rows).map_err(lix_error_to_datafusion_error)
287
- };
288
-
289
- Ok(Box::pin(RecordBatchStreamAdapter::new(
290
- schema,
291
- stream::once(fut),
292
- )))
293
- }
294
- }
295
-
296
- #[derive(Debug, Clone)]
297
- struct FileHistoryDescriptorRecord {
298
- id: String,
299
- directory_id: Option<String>,
300
- name: Option<String>,
301
- hidden: Option<bool>,
302
- entry: HistoryEntry,
303
- }
304
-
305
- #[derive(Debug, Clone)]
306
- struct FileHistoryDirectoryRecord {
307
- id: String,
308
- parent_id: Option<String>,
309
- name: String,
310
- entry: HistoryEntry,
311
- }
312
-
313
- #[derive(Debug, Clone)]
314
- struct FileHistoryBlobRecord {
315
- file_id: String,
316
- blob_hash: Option<String>,
317
- entry: HistoryEntry,
318
- }
319
-
320
- #[derive(Debug, Clone)]
321
- struct FileHistoryEvent {
322
- file_id: String,
323
- start_commit_id: String,
324
- depth: u32,
325
- priority: u8,
326
- change: MaterializedChange,
327
- observed_commit_id: String,
328
- commit_created_at: String,
329
- }
330
-
331
- #[derive(Debug, Clone)]
332
- struct FileHistoryOutputRow {
333
- entity_pk: String,
334
- id: String,
335
- path: Option<String>,
336
- directory_id: Option<String>,
337
- name: Option<String>,
338
- hidden: Option<bool>,
339
- data: Option<Vec<u8>>,
340
- descriptor_change: MaterializedChange,
341
- event: FileHistoryEvent,
342
- }
343
-
344
- #[derive(Debug, Deserialize)]
345
- struct FileDescriptorSnapshot {
346
- id: String,
347
- directory_id: Option<String>,
348
- name: String,
349
- hidden: bool,
350
- }
351
-
352
- #[derive(Debug, Deserialize)]
353
- struct DirectoryDescriptorSnapshot {
354
- id: String,
355
- parent_id: Option<String>,
356
- name: String,
357
- }
358
-
359
- #[derive(Debug, Deserialize)]
360
- struct BlobRefSnapshot {
361
- id: String,
362
- blob_hash: String,
363
- }
364
-
365
- async fn load_file_history_rows<S>(
366
- commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
367
- query_source: SqlHistoryQuerySource<S>,
368
- blob_reader: &Arc<dyn BlobDataReader>,
369
- route: &HistoryRoute,
370
- needs_data: bool,
371
- ) -> Result<Vec<FileHistoryOutputRow>, LixError>
372
- where
373
- S: StorageRead + Clone + Send + Sync + 'static,
374
- {
375
- let event_route = route.traversal_only();
376
- let event_entries = load_history_entries(
377
- HistoryViewDescriptor {
378
- view_name: "lix_file_history",
379
- start_commit_column: HISTORY_COL_START_COMMIT_ID,
380
- },
381
- Arc::clone(&commit_graph),
382
- query_source.json_reader.clone(),
383
- &event_route,
384
- vec![
385
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
386
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
387
- BLOB_REF_SCHEMA_KEY.to_string(),
388
- ],
389
- )
390
- .await?;
391
- let context_route = route.starts_only();
392
- let context_entries = load_history_entries(
393
- HistoryViewDescriptor {
394
- view_name: "lix_file_history",
395
- start_commit_column: HISTORY_COL_START_COMMIT_ID,
396
- },
397
- commit_graph,
398
- query_source.json_reader,
399
- &context_route,
400
- vec![
401
- FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
402
- DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
403
- BLOB_REF_SCHEMA_KEY.to_string(),
404
- ],
405
- )
406
- .await?;
407
-
408
- let event_descriptors = parse_file_history_descriptors(&event_entries)?;
409
- let event_directories = parse_file_history_directories(&event_entries)?;
410
- let event_blobs = parse_file_history_blobs(&event_entries)?;
411
- let descriptors = parse_file_history_descriptors(&context_entries)?;
412
- let directories = parse_file_history_directories(&context_entries)?;
413
- let blobs = parse_file_history_blobs(&context_entries)?;
414
- let events = file_history_events(
415
- &event_descriptors,
416
- &event_directories,
417
- &event_blobs,
418
- &descriptors,
419
- );
420
-
421
- let mut output = Vec::new();
422
- for event in events {
423
- let Some(descriptor) = nearest_file_descriptor(&descriptors, &event) else {
424
- continue;
425
- };
426
- let blob = nearest_blob_ref(&blobs, &event);
427
- let data = if needs_data {
428
- match blob.and_then(|blob| blob.blob_hash.as_deref()) {
429
- Some(blob_hash) => load_single_blob_bytes(blob_reader, blob_hash).await?,
430
- None => None,
431
- }
432
- } else {
433
- None
434
- };
435
- let path = resolve_file_history_path(descriptor, &directories, event.depth);
436
- let id = tombstone_identity_column_value(
437
- "id",
438
- &descriptor.id,
439
- HistoryIdentityProjection::SingleColumn { column: "id" },
440
- )?
441
- .and_then(|value| value.as_str().map(ToOwned::to_owned))
442
- .unwrap_or_else(|| descriptor.id.clone());
443
-
444
- output.push(FileHistoryOutputRow {
445
- entity_pk: descriptor.id.clone(),
446
- id,
447
- path,
448
- directory_id: descriptor.directory_id.clone(),
449
- name: descriptor.name.clone(),
450
- hidden: descriptor.hidden,
451
- data,
452
- descriptor_change: descriptor.entry.change.clone(),
453
- event,
454
- });
455
- }
456
- output.retain(|row| {
457
- let entity_pk = entity_pk_json_array(&row.entity_pk).ok();
458
- route.matches_surface_row(
459
- FILE_DESCRIPTOR_SCHEMA_KEY,
460
- entity_pk.as_deref().unwrap_or(&row.entity_pk),
461
- Some(&row.entity_pk),
462
- row.event.depth,
463
- )
464
- });
465
-
466
- output.sort_by(|left, right| {
467
- left.entity_pk
468
- .cmp(&right.entity_pk)
469
- .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
470
- .then(left.event.depth.cmp(&right.event.depth))
471
- .then(
472
- left.event
473
- .observed_commit_id
474
- .cmp(&right.event.observed_commit_id),
475
- )
476
- .then(left.event.change.id.cmp(&right.event.change.id))
477
- });
478
- Ok(output)
479
- }
480
-
481
- async fn load_single_blob_bytes(
482
- blob_reader: &Arc<dyn BlobDataReader>,
483
- blob_hash: &str,
484
- ) -> Result<Option<Vec<u8>>, LixError> {
485
- let hash = BlobHash::from_hex(blob_hash)?;
486
- Ok(blob_reader
487
- .load_bytes_many(&[hash])
488
- .await?
489
- .into_vec()
490
- .into_iter()
491
- .next()
492
- .flatten())
493
- }
494
-
495
- fn file_history_events(
496
- event_descriptors: &[FileHistoryDescriptorRecord],
497
- event_directories: &[FileHistoryDirectoryRecord],
498
- event_blobs: &[FileHistoryBlobRecord],
499
- context_descriptors: &[FileHistoryDescriptorRecord],
500
- ) -> Vec<FileHistoryEvent> {
501
- let mut descriptor_ids_by_start = BTreeSet::<(String, String)>::new();
502
- let mut directory_ids_by_file_start = BTreeMap::<(String, String), BTreeSet<String>>::new();
503
-
504
- for descriptor in context_descriptors {
505
- let key = (
506
- descriptor.id.clone(),
507
- descriptor.entry.start_commit_id.clone(),
508
- );
509
- descriptor_ids_by_start.insert(key.clone());
510
- if let Some(directory_id) = &descriptor.directory_id {
511
- directory_ids_by_file_start
512
- .entry(key)
513
- .or_default()
514
- .insert(directory_id.clone());
515
- }
516
- }
517
-
518
- let mut candidates = Vec::new();
519
- for descriptor in event_descriptors {
520
- candidates.push(file_history_event_from_entry(
521
- descriptor.id.clone(),
522
- &descriptor.entry,
523
- 1,
524
- ));
525
- }
526
- for directory in event_directories {
527
- for ((file_id, start_commit_id), directory_ids) in &directory_ids_by_file_start {
528
- if start_commit_id == &directory.entry.start_commit_id
529
- && directory_ids.contains(&directory.id)
530
- {
531
- candidates.push(file_history_event_from_entry(
532
- file_id.clone(),
533
- &directory.entry,
534
- 2,
535
- ));
536
- }
537
- }
538
- }
539
- for blob in event_blobs {
540
- if descriptor_ids_by_start
541
- .contains(&(blob.file_id.clone(), blob.entry.start_commit_id.clone()))
542
- {
543
- candidates.push(file_history_event_from_entry(
544
- blob.file_id.clone(),
545
- &blob.entry,
546
- 3,
547
- ));
548
- }
549
- }
550
-
551
- candidates.sort_by(|left, right| {
552
- left.file_id
553
- .cmp(&right.file_id)
554
- .then(left.start_commit_id.cmp(&right.start_commit_id))
555
- .then(left.depth.cmp(&right.depth))
556
- .then(left.priority.cmp(&right.priority))
557
- .then(left.change.id.cmp(&right.change.id))
558
- });
559
- candidates.dedup_by(|left, right| {
560
- left.file_id == right.file_id
561
- && left.start_commit_id == right.start_commit_id
562
- && left.depth == right.depth
563
- });
564
- candidates
565
- }
566
-
567
- fn file_history_event_from_entry(
568
- file_id: String,
569
- entry: &HistoryEntry,
570
- priority: u8,
571
- ) -> FileHistoryEvent {
572
- FileHistoryEvent {
573
- file_id,
574
- start_commit_id: entry.start_commit_id.clone(),
575
- depth: entry.depth,
576
- priority,
577
- change: entry.change.clone(),
578
- observed_commit_id: entry.observed_commit_id.clone(),
579
- commit_created_at: entry.commit_created_at.clone(),
580
- }
581
- }
582
-
583
- fn parse_file_history_descriptors(
584
- entries: &[HistoryEntry],
585
- ) -> Result<Vec<FileHistoryDescriptorRecord>, LixError> {
586
- entries
587
- .iter()
588
- .filter(|entry| entry.change.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY)
589
- .map(|entry| {
590
- let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
591
- return Ok(FileHistoryDescriptorRecord {
592
- id: entry.change.entity_pk.as_single_string_owned()?,
593
- directory_id: None,
594
- name: None,
595
- hidden: None,
596
- entry: entry.clone(),
597
- });
598
- };
599
- let snapshot: FileDescriptorSnapshot =
600
- serde_json::from_str(snapshot_content).map_err(|error| {
601
- LixError::new(
602
- "LIX_ERROR_UNKNOWN",
603
- format!("invalid lix_file_descriptor history snapshot JSON: {error}"),
604
- )
605
- })?;
606
- Ok(FileHistoryDescriptorRecord {
607
- id: snapshot.id,
608
- directory_id: snapshot.directory_id,
609
- name: Some(snapshot.name),
610
- hidden: Some(snapshot.hidden),
611
- entry: entry.clone(),
612
- })
613
- })
614
- .collect()
615
- }
616
-
617
- fn parse_file_history_directories(
618
- entries: &[HistoryEntry],
619
- ) -> Result<Vec<FileHistoryDirectoryRecord>, LixError> {
620
- entries
621
- .iter()
622
- .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
623
- .filter_map(|entry| {
624
- let snapshot_content = entry.change.snapshot_content.clone()?;
625
- Some((entry, snapshot_content))
626
- })
627
- .map(|(entry, snapshot_content)| {
628
- let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(&snapshot_content)
629
- .map_err(|error| {
630
- LixError::new(
631
- "LIX_ERROR_UNKNOWN",
632
- format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
633
- )
634
- })?;
635
- Ok(FileHistoryDirectoryRecord {
636
- id: snapshot.id,
637
- parent_id: snapshot.parent_id,
638
- name: snapshot.name,
639
- entry: entry.clone(),
640
- })
641
- })
642
- .collect()
643
- }
644
-
645
- fn parse_file_history_blobs(
646
- entries: &[HistoryEntry],
647
- ) -> Result<Vec<FileHistoryBlobRecord>, LixError> {
648
- entries
649
- .iter()
650
- .filter(|entry| entry.change.schema_key == BLOB_REF_SCHEMA_KEY)
651
- .map(|entry| {
652
- let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
653
- return Ok(FileHistoryBlobRecord {
654
- file_id: entry.change.file_id.clone().unwrap_or_else(|| {
655
- entry
656
- .change
657
- .entity_pk
658
- .as_single_string_owned()
659
- .expect("canonical change entity primary key should project")
660
- }),
661
- blob_hash: None,
662
- entry: entry.clone(),
663
- });
664
- };
665
- let snapshot: BlobRefSnapshot =
666
- serde_json::from_str(snapshot_content).map_err(|error| {
667
- LixError::new(
668
- "LIX_ERROR_UNKNOWN",
669
- format!("invalid lix_binary_blob_ref history snapshot JSON: {error}"),
670
- )
671
- })?;
672
- Ok(FileHistoryBlobRecord {
673
- file_id: entry.change.file_id.clone().unwrap_or(snapshot.id),
674
- blob_hash: Some(snapshot.blob_hash),
675
- entry: entry.clone(),
676
- })
677
- })
678
- .collect()
679
- }
680
-
681
- fn nearest_file_descriptor<'a>(
682
- descriptors: &'a [FileHistoryDescriptorRecord],
683
- event: &FileHistoryEvent,
684
- ) -> Option<&'a FileHistoryDescriptorRecord> {
685
- descriptors
686
- .iter()
687
- .filter(|descriptor| {
688
- let exact_descriptor_event =
689
- history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
690
- (exact_descriptor_event || descriptor.name.is_some())
691
- && descriptor.id == event.file_id
692
- && descriptor.entry.start_commit_id == event.start_commit_id
693
- && descriptor.entry.depth >= event.depth
694
- })
695
- .min_by(|left, right| {
696
- left.entry
697
- .depth
698
- .cmp(&right.entry.depth)
699
- .then(left.entry.change.id.cmp(&right.entry.change.id))
700
- })
701
- }
702
-
703
- fn nearest_blob_ref<'a>(
704
- blobs: &'a [FileHistoryBlobRecord],
705
- event: &FileHistoryEvent,
706
- ) -> Option<&'a FileHistoryBlobRecord> {
707
- blobs
708
- .iter()
709
- .filter(|blob| {
710
- blob.file_id == event.file_id
711
- && blob.entry.start_commit_id == event.start_commit_id
712
- && blob.entry.depth >= event.depth
713
- })
714
- .min_by(|left, right| {
715
- left.entry
716
- .depth
717
- .cmp(&right.entry.depth)
718
- .then(left.entry.change.id.cmp(&right.entry.change.id))
719
- })
720
- }
721
-
722
- fn resolve_file_history_path(
723
- descriptor: &FileHistoryDescriptorRecord,
724
- directories: &[FileHistoryDirectoryRecord],
725
- target_depth: u32,
726
- ) -> Option<String> {
727
- let name = descriptor.name.as_ref()?;
728
- let Some(directory_id) = descriptor.directory_id.as_deref() else {
729
- return Some(format!("/{name}"));
730
- };
731
- let directory_path = resolve_directory_history_path(
732
- directory_id,
733
- &descriptor.entry.start_commit_id,
734
- target_depth,
735
- directories,
736
- &mut BTreeMap::new(),
737
- &mut BTreeSet::new(),
738
- )?;
739
- Some(format!("{directory_path}{name}"))
740
- }
741
-
742
- fn resolve_directory_history_path(
743
- directory_id: &str,
744
- start_commit_id: &str,
745
- target_depth: u32,
746
- directories: &[FileHistoryDirectoryRecord],
747
- cache: &mut BTreeMap<String, Option<String>>,
748
- visiting: &mut BTreeSet<String>,
749
- ) -> Option<String> {
750
- if let Some(path) = cache.get(directory_id) {
751
- return path.clone();
752
- }
753
- if !visiting.insert(directory_id.to_string()) {
754
- cache.insert(directory_id.to_string(), None);
755
- return None;
756
- }
757
- let directory = directories
758
- .iter()
759
- .filter(|directory| {
760
- directory.id == directory_id
761
- && directory.entry.start_commit_id == start_commit_id
762
- && directory.entry.depth >= target_depth
763
- })
764
- .min_by(|left, right| {
765
- left.entry
766
- .depth
767
- .cmp(&right.entry.depth)
768
- .then(left.entry.change.id.cmp(&right.entry.change.id))
769
- })?;
770
- let path = match directory.parent_id.as_deref() {
771
- Some(parent_id) => {
772
- let parent_path = resolve_directory_history_path(
773
- parent_id,
774
- start_commit_id,
775
- target_depth,
776
- directories,
777
- cache,
778
- visiting,
779
- )?;
780
- format!("{parent_path}{}/", directory.name)
781
- }
782
- None => format!("/{}/", directory.name),
783
- };
784
- visiting.remove(directory_id);
785
- cache.insert(directory_id.to_string(), Some(path.clone()));
786
- Some(path)
787
- }
788
-
789
- fn file_history_record_batch(
790
- schema: &SchemaRef,
791
- rows: &[FileHistoryOutputRow],
792
- ) -> Result<RecordBatch, LixError> {
793
- let columns = schema
794
- .fields()
795
- .iter()
796
- .map(|field| file_history_column_array(field.name(), rows))
797
- .collect::<Result<Vec<_>, _>>()?;
798
- let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
799
- RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
800
- LixError::new(
801
- "LIX_ERROR_UNKNOWN",
802
- format!("sql2 failed to build lix_file_history record batch: {error}"),
803
- )
804
- })
805
- }
806
-
807
- fn file_history_column_array(
808
- column_name: &str,
809
- rows: &[FileHistoryOutputRow],
810
- ) -> Result<ArrayRef, LixError> {
811
- Ok(match column_name {
812
- "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
813
- "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
814
- "directory_id" => string_array(rows.iter().map(|row| row.directory_id.as_deref())),
815
- "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
816
- "hidden" => Arc::new(BooleanArray::from(
817
- rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
818
- )) as ArrayRef,
819
- "data" => Arc::new(BinaryArray::from(
820
- rows.iter()
821
- .map(|row| row.data.as_deref())
822
- .collect::<Vec<_>>(),
823
- )) as ArrayRef,
824
- HISTORY_COL_ENTITY_PK => Arc::new(StringArray::from(
825
- rows.iter()
826
- .map(|row| entity_pk_json_array(&row.entity_pk).map(Some))
827
- .collect::<std::result::Result<Vec<_>, _>>()?,
828
- )) as ArrayRef,
829
- HISTORY_COL_SCHEMA_KEY => {
830
- string_array(rows.iter().map(|_| Some(FILE_DESCRIPTOR_SCHEMA_KEY)))
831
- }
832
- HISTORY_COL_FILE_ID => string_array(rows.iter().map(|row| Some(row.entity_pk.as_str()))),
833
- HISTORY_COL_CHANGE_ID => {
834
- string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
835
- }
836
- HISTORY_COL_SNAPSHOT_CONTENT => string_array(
837
- rows.iter()
838
- .map(|row| row.descriptor_change.snapshot_content.as_deref()),
839
- ),
840
- HISTORY_COL_METADATA => Arc::new(StringArray::from(
841
- rows.iter()
842
- .map(|row| {
843
- row.descriptor_change
844
- .metadata
845
- .as_ref()
846
- .map(serialize_row_metadata)
847
- })
848
- .collect::<Vec<_>>(),
849
- )),
850
- HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
851
- rows.iter()
852
- .map(|row| Some(row.event.observed_commit_id.as_str())),
853
- ),
854
- HISTORY_COL_COMMIT_CREATED_AT => string_array(
855
- rows.iter()
856
- .map(|row| Some(row.event.commit_created_at.as_str())),
857
- ),
858
- HISTORY_COL_START_COMMIT_ID => string_array(
859
- rows.iter()
860
- .map(|row| Some(row.event.start_commit_id.as_str())),
861
- ),
862
- HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
863
- rows.iter()
864
- .map(|row| i64::from(row.event.depth))
865
- .collect::<Vec<_>>(),
866
- )) as ArrayRef,
867
- other => {
868
- return Err(LixError::new(
869
- "LIX_ERROR_UNKNOWN",
870
- format!(
871
- "sql2 lix_file_history provider does not support projected column '{other}'"
872
- ),
873
- ))
874
- }
875
- })
876
- }
877
-
878
- pub(super) fn lix_file_history_schema() -> SchemaRef {
879
- Arc::new(Schema::new(vec![
880
- Field::new("id", DataType::Utf8, false),
881
- Field::new("path", DataType::Utf8, true),
882
- Field::new("directory_id", DataType::Utf8, true),
883
- Field::new("name", DataType::Utf8, true),
884
- Field::new("hidden", DataType::Boolean, true),
885
- Field::new("data", DataType::Binary, true),
886
- json_field(HISTORY_COL_ENTITY_PK, false),
887
- Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
888
- Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
889
- json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
890
- Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
891
- json_field(HISTORY_COL_METADATA, true),
892
- Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
893
- Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
894
- Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
895
- Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
896
- ]))
897
- }
898
-
899
- fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
900
- let Some(projection) = projection else {
901
- return Ok(Arc::clone(base_schema));
902
- };
903
- Ok(Arc::new(base_schema.project(projection)?))
904
- }
905
-
906
- fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
907
- Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
908
- }
909
-
910
- fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
911
- crate::sql2::error::datafusion_error_to_lix_error(error)
912
- }
913
-
914
- fn entity_pk_json_array(entity_pk: &str) -> Result<String, LixError> {
915
- serde_json::to_string(&[entity_pk]).map_err(|error| {
916
- LixError::unknown(format!(
917
- "failed to encode history entity pk as JSON: {error}"
918
- ))
919
- })
920
- }
921
-
922
- fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
923
- crate::sql2::error::lix_error_to_datafusion_error(error)
924
- }