@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,902 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{DataFusionError, Result};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
+ use datafusion::physical_expr::EquivalenceProperties;
15
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
+ use datafusion::physical_plan::{
18
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
+ };
20
+ use futures_util::stream;
21
+ use serde::Deserialize;
22
+ use tokio::sync::Mutex;
23
+
24
+ use crate::binary_cas::{BlobDataReader, BlobHash};
25
+ use crate::changelog::MaterializedCanonicalChange;
26
+ use crate::commit_graph::CommitGraphReader;
27
+ use crate::serialize_row_metadata;
28
+ use crate::LixError;
29
+
30
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
31
+ use super::history_route::{
32
+ history_descriptor_event_matches, load_history_entries, parse_history_filter,
33
+ HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
34
+ HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID, HISTORY_COL_FILE_ID,
35
+ HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
36
+ HISTORY_COL_SCHEMA_VERSION, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
37
+ };
38
+ use super::result_metadata::json_field;
39
+ use super::SqlChangelogQuerySource;
40
+
41
+ const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
42
+ const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
43
+ const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
44
+
45
+ pub(crate) async fn register_lix_file_history_provider(
46
+ session: &datafusion::prelude::SessionContext,
47
+ commit_graph: Box<dyn CommitGraphReader>,
48
+ query_source: SqlChangelogQuerySource,
49
+ blob_reader: Arc<dyn BlobDataReader>,
50
+ ) -> Result<(), LixError> {
51
+ session
52
+ .register_table(
53
+ "lix_file_history",
54
+ Arc::new(LixFileHistoryProvider::new(
55
+ Arc::new(Mutex::new(commit_graph)),
56
+ query_source,
57
+ blob_reader,
58
+ )),
59
+ )
60
+ .map_err(datafusion_error_to_lix_error)?;
61
+ Ok(())
62
+ }
63
+
64
+ struct LixFileHistoryProvider {
65
+ schema: SchemaRef,
66
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
67
+ query_source: SqlChangelogQuerySource,
68
+ blob_reader: Arc<dyn BlobDataReader>,
69
+ }
70
+
71
+ impl std::fmt::Debug for LixFileHistoryProvider {
72
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73
+ f.debug_struct("LixFileHistoryProvider").finish()
74
+ }
75
+ }
76
+
77
+ impl LixFileHistoryProvider {
78
+ fn new(
79
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
80
+ query_source: SqlChangelogQuerySource,
81
+ blob_reader: Arc<dyn BlobDataReader>,
82
+ ) -> Self {
83
+ Self {
84
+ schema: lix_file_history_schema(),
85
+ commit_graph,
86
+ query_source,
87
+ blob_reader,
88
+ }
89
+ }
90
+ }
91
+
92
+ #[async_trait]
93
+ impl TableProvider for LixFileHistoryProvider {
94
+ fn as_any(&self) -> &dyn Any {
95
+ self
96
+ }
97
+
98
+ fn schema(&self) -> SchemaRef {
99
+ Arc::clone(&self.schema)
100
+ }
101
+
102
+ fn table_type(&self) -> TableType {
103
+ TableType::View
104
+ }
105
+
106
+ fn supports_filters_pushdown(
107
+ &self,
108
+ filters: &[&Expr],
109
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
110
+ Ok(filters
111
+ .iter()
112
+ .map(|filter| {
113
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
114
+ TableProviderFilterPushDown::Exact
115
+ } else {
116
+ TableProviderFilterPushDown::Unsupported
117
+ }
118
+ })
119
+ .collect())
120
+ }
121
+
122
+ async fn scan(
123
+ &self,
124
+ _state: &dyn Session,
125
+ projection: Option<&Vec<usize>>,
126
+ filters: &[Expr],
127
+ limit: Option<usize>,
128
+ ) -> Result<Arc<dyn ExecutionPlan>> {
129
+ let schema = projected_schema(&self.schema, projection)?;
130
+ let needs_data = projection.is_none_or(|projection| {
131
+ projection.iter().any(|index| {
132
+ self.schema
133
+ .field(*index)
134
+ .name()
135
+ .as_str()
136
+ .eq_ignore_ascii_case("data")
137
+ })
138
+ });
139
+ Ok(Arc::new(LixFileHistoryScanExec::new(
140
+ Arc::clone(&self.commit_graph),
141
+ self.query_source.clone(),
142
+ Arc::clone(&self.blob_reader),
143
+ schema,
144
+ needs_data,
145
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
146
+ limit,
147
+ )))
148
+ }
149
+ }
150
+
151
+ struct LixFileHistoryScanExec {
152
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
153
+ query_source: SqlChangelogQuerySource,
154
+ blob_reader: Arc<dyn BlobDataReader>,
155
+ schema: SchemaRef,
156
+ needs_data: bool,
157
+ route: HistoryRoute,
158
+ limit: Option<usize>,
159
+ properties: Arc<PlanProperties>,
160
+ }
161
+
162
+ impl std::fmt::Debug for LixFileHistoryScanExec {
163
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164
+ f.debug_struct("LixFileHistoryScanExec")
165
+ .field("route", &self.route)
166
+ .field("limit", &self.limit)
167
+ .finish()
168
+ }
169
+ }
170
+
171
+ impl LixFileHistoryScanExec {
172
+ fn new(
173
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
174
+ query_source: SqlChangelogQuerySource,
175
+ blob_reader: Arc<dyn BlobDataReader>,
176
+ schema: SchemaRef,
177
+ needs_data: bool,
178
+ route: HistoryRoute,
179
+ limit: Option<usize>,
180
+ ) -> Self {
181
+ let properties = PlanProperties::new(
182
+ EquivalenceProperties::new(Arc::clone(&schema)),
183
+ Partitioning::UnknownPartitioning(1),
184
+ EmissionType::Incremental,
185
+ Boundedness::Bounded,
186
+ );
187
+ Self {
188
+ commit_graph,
189
+ query_source,
190
+ blob_reader,
191
+ schema,
192
+ needs_data,
193
+ route,
194
+ limit,
195
+ properties: Arc::new(properties),
196
+ }
197
+ }
198
+ }
199
+
200
+ impl DisplayAs for LixFileHistoryScanExec {
201
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202
+ match t {
203
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
204
+ f,
205
+ "LixFileHistoryScanExec(route={:?}, limit={:?})",
206
+ self.route, self.limit
207
+ ),
208
+ DisplayFormatType::TreeRender => write!(f, "LixFileHistoryScanExec"),
209
+ }
210
+ }
211
+ }
212
+
213
+ impl ExecutionPlan for LixFileHistoryScanExec {
214
+ fn name(&self) -> &str {
215
+ "LixFileHistoryScanExec"
216
+ }
217
+
218
+ fn as_any(&self) -> &dyn Any {
219
+ self
220
+ }
221
+
222
+ fn properties(&self) -> &Arc<PlanProperties> {
223
+ &self.properties
224
+ }
225
+
226
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
227
+ Vec::new()
228
+ }
229
+
230
+ fn with_new_children(
231
+ self: Arc<Self>,
232
+ children: Vec<Arc<dyn ExecutionPlan>>,
233
+ ) -> Result<Arc<dyn ExecutionPlan>> {
234
+ if !children.is_empty() {
235
+ return Err(DataFusionError::Execution(
236
+ "LixFileHistoryScanExec does not accept children".to_string(),
237
+ ));
238
+ }
239
+ Ok(self)
240
+ }
241
+
242
+ fn execute(
243
+ &self,
244
+ partition: usize,
245
+ _context: Arc<TaskContext>,
246
+ ) -> Result<SendableRecordBatchStream> {
247
+ if partition != 0 {
248
+ return Err(DataFusionError::Execution(format!(
249
+ "LixFileHistoryScanExec only exposes one partition, got {partition}"
250
+ )));
251
+ }
252
+
253
+ let commit_graph = Arc::clone(&self.commit_graph);
254
+ let query_source = self.query_source.clone();
255
+ let blob_reader = Arc::clone(&self.blob_reader);
256
+ let schema = Arc::clone(&self.schema);
257
+ let stream_schema = Arc::clone(&schema);
258
+ let route = self.route.clone();
259
+ let limit = self.limit;
260
+ let needs_data = self.needs_data;
261
+
262
+ let fut = async move {
263
+ let mut rows = load_file_history_rows(
264
+ commit_graph,
265
+ query_source,
266
+ &blob_reader,
267
+ &route,
268
+ needs_data,
269
+ )
270
+ .await
271
+ .map_err(lix_error_to_datafusion_error)?;
272
+ if let Some(limit) = limit {
273
+ rows.truncate(limit);
274
+ }
275
+ file_history_record_batch(&stream_schema, &rows).map_err(lix_error_to_datafusion_error)
276
+ };
277
+
278
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
279
+ schema,
280
+ stream::once(fut),
281
+ )))
282
+ }
283
+ }
284
+
285
+ #[derive(Debug, Clone)]
286
+ struct FileHistoryDescriptorRecord {
287
+ id: String,
288
+ directory_id: Option<String>,
289
+ name: Option<String>,
290
+ hidden: Option<bool>,
291
+ entry: HistoryEntry,
292
+ }
293
+
294
+ #[derive(Debug, Clone)]
295
+ struct FileHistoryDirectoryRecord {
296
+ id: String,
297
+ parent_id: Option<String>,
298
+ name: String,
299
+ entry: HistoryEntry,
300
+ }
301
+
302
+ #[derive(Debug, Clone)]
303
+ struct FileHistoryBlobRecord {
304
+ file_id: String,
305
+ blob_hash: Option<String>,
306
+ entry: HistoryEntry,
307
+ }
308
+
309
+ #[derive(Debug, Clone)]
310
+ struct FileHistoryEvent {
311
+ file_id: String,
312
+ start_commit_id: String,
313
+ depth: u32,
314
+ priority: u8,
315
+ change: MaterializedCanonicalChange,
316
+ observed_commit_id: String,
317
+ commit_created_at: String,
318
+ }
319
+
320
+ #[derive(Debug, Clone)]
321
+ struct FileHistoryOutputRow {
322
+ entity_id: String,
323
+ id: String,
324
+ path: Option<String>,
325
+ directory_id: Option<String>,
326
+ name: Option<String>,
327
+ hidden: Option<bool>,
328
+ data: Option<Vec<u8>>,
329
+ descriptor_change: MaterializedCanonicalChange,
330
+ event: FileHistoryEvent,
331
+ }
332
+
333
+ #[derive(Debug, Deserialize)]
334
+ struct FileDescriptorSnapshot {
335
+ id: String,
336
+ directory_id: Option<String>,
337
+ name: String,
338
+ hidden: bool,
339
+ }
340
+
341
+ #[derive(Debug, Deserialize)]
342
+ struct DirectoryDescriptorSnapshot {
343
+ id: String,
344
+ parent_id: Option<String>,
345
+ name: String,
346
+ }
347
+
348
+ #[derive(Debug, Deserialize)]
349
+ struct BlobRefSnapshot {
350
+ id: String,
351
+ blob_hash: String,
352
+ }
353
+
354
+ async fn load_file_history_rows(
355
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
356
+ query_source: SqlChangelogQuerySource,
357
+ blob_reader: &Arc<dyn BlobDataReader>,
358
+ route: &HistoryRoute,
359
+ needs_data: bool,
360
+ ) -> Result<Vec<FileHistoryOutputRow>, LixError> {
361
+ let event_route = route.traversal_only();
362
+ let event_entries = load_history_entries(
363
+ HistoryViewDescriptor {
364
+ view_name: "lix_file_history",
365
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
366
+ },
367
+ Arc::clone(&commit_graph),
368
+ query_source.json_reader.clone(),
369
+ &event_route,
370
+ vec![
371
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
372
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
373
+ BLOB_REF_SCHEMA_KEY.to_string(),
374
+ ],
375
+ )
376
+ .await?;
377
+ let context_route = route.starts_only();
378
+ let context_entries = load_history_entries(
379
+ HistoryViewDescriptor {
380
+ view_name: "lix_file_history",
381
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
382
+ },
383
+ commit_graph,
384
+ query_source.json_reader,
385
+ &context_route,
386
+ vec![
387
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
388
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
389
+ BLOB_REF_SCHEMA_KEY.to_string(),
390
+ ],
391
+ )
392
+ .await?;
393
+
394
+ let event_descriptors = parse_file_history_descriptors(&event_entries)?;
395
+ let event_directories = parse_file_history_directories(&event_entries)?;
396
+ let event_blobs = parse_file_history_blobs(&event_entries)?;
397
+ let descriptors = parse_file_history_descriptors(&context_entries)?;
398
+ let directories = parse_file_history_directories(&context_entries)?;
399
+ let blobs = parse_file_history_blobs(&context_entries)?;
400
+ let events = file_history_events(
401
+ &event_descriptors,
402
+ &event_directories,
403
+ &event_blobs,
404
+ &descriptors,
405
+ );
406
+
407
+ let mut output = Vec::new();
408
+ for event in events {
409
+ let Some(descriptor) = nearest_file_descriptor(&descriptors, &event) else {
410
+ continue;
411
+ };
412
+ let blob = nearest_blob_ref(&blobs, &event);
413
+ let data = if needs_data {
414
+ match blob.and_then(|blob| blob.blob_hash.as_deref()) {
415
+ Some(blob_hash) => load_single_blob_bytes(blob_reader, blob_hash).await?,
416
+ None => None,
417
+ }
418
+ } else {
419
+ None
420
+ };
421
+ let path = resolve_file_history_path(descriptor, &directories, event.depth);
422
+ let id = tombstone_identity_column_value(
423
+ "id",
424
+ &descriptor.id,
425
+ HistoryIdentityProjection::SingleColumn { column: "id" },
426
+ )?
427
+ .and_then(|value| value.as_str().map(ToOwned::to_owned))
428
+ .unwrap_or_else(|| descriptor.id.clone());
429
+
430
+ output.push(FileHistoryOutputRow {
431
+ entity_id: descriptor.id.clone(),
432
+ id,
433
+ path,
434
+ directory_id: descriptor.directory_id.clone(),
435
+ name: descriptor.name.clone(),
436
+ hidden: descriptor.hidden,
437
+ data,
438
+ descriptor_change: descriptor.entry.change.clone(),
439
+ event,
440
+ });
441
+ }
442
+ output.retain(|row| {
443
+ route.matches_surface_row(
444
+ FILE_DESCRIPTOR_SCHEMA_KEY,
445
+ &row.entity_id,
446
+ Some(&row.entity_id),
447
+ row.event.depth,
448
+ )
449
+ });
450
+
451
+ output.sort_by(|left, right| {
452
+ left.entity_id
453
+ .cmp(&right.entity_id)
454
+ .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
455
+ .then(left.event.depth.cmp(&right.event.depth))
456
+ .then(
457
+ left.event
458
+ .observed_commit_id
459
+ .cmp(&right.event.observed_commit_id),
460
+ )
461
+ .then(left.event.change.id.cmp(&right.event.change.id))
462
+ });
463
+ Ok(output)
464
+ }
465
+
466
+ async fn load_single_blob_bytes(
467
+ blob_reader: &Arc<dyn BlobDataReader>,
468
+ blob_hash: &str,
469
+ ) -> Result<Option<Vec<u8>>, LixError> {
470
+ let hash = BlobHash::from_hex(blob_hash)?;
471
+ Ok(blob_reader
472
+ .load_bytes_many(&[hash])
473
+ .await?
474
+ .into_vec()
475
+ .into_iter()
476
+ .next()
477
+ .flatten())
478
+ }
479
+
480
+ fn file_history_events(
481
+ event_descriptors: &[FileHistoryDescriptorRecord],
482
+ event_directories: &[FileHistoryDirectoryRecord],
483
+ event_blobs: &[FileHistoryBlobRecord],
484
+ context_descriptors: &[FileHistoryDescriptorRecord],
485
+ ) -> Vec<FileHistoryEvent> {
486
+ let mut descriptor_ids_by_start = BTreeSet::<(String, String)>::new();
487
+ let mut directory_ids_by_file_start = BTreeMap::<(String, String), BTreeSet<String>>::new();
488
+
489
+ for descriptor in context_descriptors {
490
+ let key = (
491
+ descriptor.id.clone(),
492
+ descriptor.entry.start_commit_id.clone(),
493
+ );
494
+ descriptor_ids_by_start.insert(key.clone());
495
+ if let Some(directory_id) = &descriptor.directory_id {
496
+ directory_ids_by_file_start
497
+ .entry(key)
498
+ .or_default()
499
+ .insert(directory_id.clone());
500
+ }
501
+ }
502
+
503
+ let mut candidates = Vec::new();
504
+ for descriptor in event_descriptors {
505
+ candidates.push(file_history_event_from_entry(
506
+ descriptor.id.clone(),
507
+ &descriptor.entry,
508
+ 1,
509
+ ));
510
+ }
511
+ for directory in event_directories {
512
+ for ((file_id, start_commit_id), directory_ids) in &directory_ids_by_file_start {
513
+ if start_commit_id == &directory.entry.start_commit_id
514
+ && directory_ids.contains(&directory.id)
515
+ {
516
+ candidates.push(file_history_event_from_entry(
517
+ file_id.clone(),
518
+ &directory.entry,
519
+ 2,
520
+ ));
521
+ }
522
+ }
523
+ }
524
+ for blob in event_blobs {
525
+ if descriptor_ids_by_start
526
+ .contains(&(blob.file_id.clone(), blob.entry.start_commit_id.clone()))
527
+ {
528
+ candidates.push(file_history_event_from_entry(
529
+ blob.file_id.clone(),
530
+ &blob.entry,
531
+ 3,
532
+ ));
533
+ }
534
+ }
535
+
536
+ candidates.sort_by(|left, right| {
537
+ left.file_id
538
+ .cmp(&right.file_id)
539
+ .then(left.start_commit_id.cmp(&right.start_commit_id))
540
+ .then(left.depth.cmp(&right.depth))
541
+ .then(left.priority.cmp(&right.priority))
542
+ .then(left.change.id.cmp(&right.change.id))
543
+ });
544
+ candidates.dedup_by(|left, right| {
545
+ left.file_id == right.file_id
546
+ && left.start_commit_id == right.start_commit_id
547
+ && left.depth == right.depth
548
+ });
549
+ candidates
550
+ }
551
+
552
+ fn file_history_event_from_entry(
553
+ file_id: String,
554
+ entry: &HistoryEntry,
555
+ priority: u8,
556
+ ) -> FileHistoryEvent {
557
+ FileHistoryEvent {
558
+ file_id,
559
+ start_commit_id: entry.start_commit_id.clone(),
560
+ depth: entry.depth,
561
+ priority,
562
+ change: entry.change.clone(),
563
+ observed_commit_id: entry.observed_commit_id.clone(),
564
+ commit_created_at: entry.commit_created_at.clone(),
565
+ }
566
+ }
567
+
568
+ fn parse_file_history_descriptors(
569
+ entries: &[HistoryEntry],
570
+ ) -> Result<Vec<FileHistoryDescriptorRecord>, LixError> {
571
+ entries
572
+ .iter()
573
+ .filter(|entry| entry.change.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY)
574
+ .map(|entry| {
575
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
576
+ return Ok(FileHistoryDescriptorRecord {
577
+ id: entry.change.entity_id.as_string()?,
578
+ directory_id: None,
579
+ name: None,
580
+ hidden: None,
581
+ entry: entry.clone(),
582
+ });
583
+ };
584
+ let snapshot: FileDescriptorSnapshot =
585
+ serde_json::from_str(snapshot_content).map_err(|error| {
586
+ LixError::new(
587
+ "LIX_ERROR_UNKNOWN",
588
+ format!("invalid lix_file_descriptor history snapshot JSON: {error}"),
589
+ )
590
+ })?;
591
+ Ok(FileHistoryDescriptorRecord {
592
+ id: snapshot.id,
593
+ directory_id: snapshot.directory_id,
594
+ name: Some(snapshot.name),
595
+ hidden: Some(snapshot.hidden),
596
+ entry: entry.clone(),
597
+ })
598
+ })
599
+ .collect()
600
+ }
601
+
602
+ fn parse_file_history_directories(
603
+ entries: &[HistoryEntry],
604
+ ) -> Result<Vec<FileHistoryDirectoryRecord>, LixError> {
605
+ entries
606
+ .iter()
607
+ .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
608
+ .filter_map(|entry| {
609
+ let snapshot_content = entry.change.snapshot_content.clone()?;
610
+ Some((entry, snapshot_content))
611
+ })
612
+ .map(|(entry, snapshot_content)| {
613
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(&snapshot_content)
614
+ .map_err(|error| {
615
+ LixError::new(
616
+ "LIX_ERROR_UNKNOWN",
617
+ format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
618
+ )
619
+ })?;
620
+ Ok(FileHistoryDirectoryRecord {
621
+ id: snapshot.id,
622
+ parent_id: snapshot.parent_id,
623
+ name: snapshot.name,
624
+ entry: entry.clone(),
625
+ })
626
+ })
627
+ .collect()
628
+ }
629
+
630
+ fn parse_file_history_blobs(
631
+ entries: &[HistoryEntry],
632
+ ) -> Result<Vec<FileHistoryBlobRecord>, LixError> {
633
+ entries
634
+ .iter()
635
+ .filter(|entry| entry.change.schema_key == BLOB_REF_SCHEMA_KEY)
636
+ .map(|entry| {
637
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
638
+ return Ok(FileHistoryBlobRecord {
639
+ file_id: entry.change.file_id.clone().unwrap_or_else(|| {
640
+ entry
641
+ .change
642
+ .entity_id
643
+ .as_string()
644
+ .expect("canonical change entity identity should project")
645
+ }),
646
+ blob_hash: None,
647
+ entry: entry.clone(),
648
+ });
649
+ };
650
+ let snapshot: BlobRefSnapshot =
651
+ serde_json::from_str(snapshot_content).map_err(|error| {
652
+ LixError::new(
653
+ "LIX_ERROR_UNKNOWN",
654
+ format!("invalid lix_binary_blob_ref history snapshot JSON: {error}"),
655
+ )
656
+ })?;
657
+ Ok(FileHistoryBlobRecord {
658
+ file_id: entry.change.file_id.clone().unwrap_or(snapshot.id),
659
+ blob_hash: Some(snapshot.blob_hash),
660
+ entry: entry.clone(),
661
+ })
662
+ })
663
+ .collect()
664
+ }
665
+
666
+ fn nearest_file_descriptor<'a>(
667
+ descriptors: &'a [FileHistoryDescriptorRecord],
668
+ event: &FileHistoryEvent,
669
+ ) -> Option<&'a FileHistoryDescriptorRecord> {
670
+ descriptors
671
+ .iter()
672
+ .filter(|descriptor| {
673
+ let exact_descriptor_event =
674
+ history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
675
+ (exact_descriptor_event || descriptor.name.is_some())
676
+ && descriptor.id == event.file_id
677
+ && descriptor.entry.start_commit_id == event.start_commit_id
678
+ && descriptor.entry.depth >= event.depth
679
+ })
680
+ .min_by(|left, right| {
681
+ left.entry
682
+ .depth
683
+ .cmp(&right.entry.depth)
684
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
685
+ })
686
+ }
687
+
688
+ fn nearest_blob_ref<'a>(
689
+ blobs: &'a [FileHistoryBlobRecord],
690
+ event: &FileHistoryEvent,
691
+ ) -> Option<&'a FileHistoryBlobRecord> {
692
+ blobs
693
+ .iter()
694
+ .filter(|blob| {
695
+ blob.file_id == event.file_id
696
+ && blob.entry.start_commit_id == event.start_commit_id
697
+ && blob.entry.depth >= event.depth
698
+ })
699
+ .min_by(|left, right| {
700
+ left.entry
701
+ .depth
702
+ .cmp(&right.entry.depth)
703
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
704
+ })
705
+ }
706
+
707
+ fn resolve_file_history_path(
708
+ descriptor: &FileHistoryDescriptorRecord,
709
+ directories: &[FileHistoryDirectoryRecord],
710
+ target_depth: u32,
711
+ ) -> Option<String> {
712
+ let name = descriptor.name.as_ref()?;
713
+ let Some(directory_id) = descriptor.directory_id.as_deref() else {
714
+ return Some(format!("/{name}"));
715
+ };
716
+ let directory_path = resolve_directory_history_path(
717
+ directory_id,
718
+ &descriptor.entry.start_commit_id,
719
+ target_depth,
720
+ directories,
721
+ &mut BTreeMap::new(),
722
+ &mut BTreeSet::new(),
723
+ )?;
724
+ Some(format!("{directory_path}{name}"))
725
+ }
726
+
727
+ fn resolve_directory_history_path(
728
+ directory_id: &str,
729
+ start_commit_id: &str,
730
+ target_depth: u32,
731
+ directories: &[FileHistoryDirectoryRecord],
732
+ cache: &mut BTreeMap<String, Option<String>>,
733
+ visiting: &mut BTreeSet<String>,
734
+ ) -> Option<String> {
735
+ if let Some(path) = cache.get(directory_id) {
736
+ return path.clone();
737
+ }
738
+ if !visiting.insert(directory_id.to_string()) {
739
+ cache.insert(directory_id.to_string(), None);
740
+ return None;
741
+ }
742
+ let directory = directories
743
+ .iter()
744
+ .filter(|directory| {
745
+ directory.id == directory_id
746
+ && directory.entry.start_commit_id == start_commit_id
747
+ && directory.entry.depth >= target_depth
748
+ })
749
+ .min_by(|left, right| {
750
+ left.entry
751
+ .depth
752
+ .cmp(&right.entry.depth)
753
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
754
+ })?;
755
+ let path = match directory.parent_id.as_deref() {
756
+ Some(parent_id) => {
757
+ let parent_path = resolve_directory_history_path(
758
+ parent_id,
759
+ start_commit_id,
760
+ target_depth,
761
+ directories,
762
+ cache,
763
+ visiting,
764
+ )?;
765
+ format!("{parent_path}{}/", directory.name)
766
+ }
767
+ None => format!("/{}/", directory.name),
768
+ };
769
+ visiting.remove(directory_id);
770
+ cache.insert(directory_id.to_string(), Some(path.clone()));
771
+ Some(path)
772
+ }
773
+
774
+ fn file_history_record_batch(
775
+ schema: &SchemaRef,
776
+ rows: &[FileHistoryOutputRow],
777
+ ) -> Result<RecordBatch, LixError> {
778
+ let columns = schema
779
+ .fields()
780
+ .iter()
781
+ .map(|field| file_history_column_array(field.name(), rows))
782
+ .collect::<Result<Vec<_>, _>>()?;
783
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
784
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
785
+ LixError::new(
786
+ "LIX_ERROR_UNKNOWN",
787
+ format!("sql2 failed to build lix_file_history record batch: {error}"),
788
+ )
789
+ })
790
+ }
791
+
792
+ fn file_history_column_array(
793
+ column_name: &str,
794
+ rows: &[FileHistoryOutputRow],
795
+ ) -> Result<ArrayRef, LixError> {
796
+ Ok(match column_name {
797
+ "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
798
+ "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
799
+ "directory_id" => string_array(rows.iter().map(|row| row.directory_id.as_deref())),
800
+ "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
801
+ "hidden" => Arc::new(BooleanArray::from(
802
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
803
+ )) as ArrayRef,
804
+ "data" => Arc::new(BinaryArray::from(
805
+ rows.iter()
806
+ .map(|row| row.data.as_deref())
807
+ .collect::<Vec<_>>(),
808
+ )) as ArrayRef,
809
+ HISTORY_COL_ENTITY_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
810
+ HISTORY_COL_SCHEMA_KEY => {
811
+ string_array(rows.iter().map(|_| Some(FILE_DESCRIPTOR_SCHEMA_KEY)))
812
+ }
813
+ HISTORY_COL_FILE_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
814
+ HISTORY_COL_SCHEMA_VERSION => string_array(
815
+ rows.iter()
816
+ .map(|row| Some(row.descriptor_change.schema_version.as_str())),
817
+ ),
818
+ HISTORY_COL_CHANGE_ID => {
819
+ string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
820
+ }
821
+ HISTORY_COL_SNAPSHOT_CONTENT => string_array(
822
+ rows.iter()
823
+ .map(|row| row.descriptor_change.snapshot_content.as_deref()),
824
+ ),
825
+ HISTORY_COL_METADATA => Arc::new(StringArray::from(
826
+ rows.iter()
827
+ .map(|row| {
828
+ row.descriptor_change
829
+ .metadata
830
+ .as_ref()
831
+ .map(serialize_row_metadata)
832
+ })
833
+ .collect::<Vec<_>>(),
834
+ )),
835
+ HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
836
+ rows.iter()
837
+ .map(|row| Some(row.event.observed_commit_id.as_str())),
838
+ ),
839
+ HISTORY_COL_COMMIT_CREATED_AT => string_array(
840
+ rows.iter()
841
+ .map(|row| Some(row.event.commit_created_at.as_str())),
842
+ ),
843
+ HISTORY_COL_START_COMMIT_ID => string_array(
844
+ rows.iter()
845
+ .map(|row| Some(row.event.start_commit_id.as_str())),
846
+ ),
847
+ HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
848
+ rows.iter()
849
+ .map(|row| i64::from(row.event.depth))
850
+ .collect::<Vec<_>>(),
851
+ )) as ArrayRef,
852
+ other => {
853
+ return Err(LixError::new(
854
+ "LIX_ERROR_UNKNOWN",
855
+ format!(
856
+ "sql2 lix_file_history provider does not support projected column '{other}'"
857
+ ),
858
+ ))
859
+ }
860
+ })
861
+ }
862
+
863
+ fn lix_file_history_schema() -> SchemaRef {
864
+ Arc::new(Schema::new(vec![
865
+ Field::new("id", DataType::Utf8, false),
866
+ Field::new("path", DataType::Utf8, true),
867
+ Field::new("directory_id", DataType::Utf8, true),
868
+ Field::new("name", DataType::Utf8, true),
869
+ Field::new("hidden", DataType::Boolean, true),
870
+ Field::new("data", DataType::Binary, true),
871
+ Field::new(HISTORY_COL_ENTITY_ID, DataType::Utf8, false),
872
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
873
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
874
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
875
+ Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
876
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
877
+ json_field(HISTORY_COL_METADATA, true),
878
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
879
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
880
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
881
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
882
+ ]))
883
+ }
884
+
885
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
886
+ let Some(projection) = projection else {
887
+ return Ok(Arc::clone(base_schema));
888
+ };
889
+ Ok(Arc::new(base_schema.project(projection)?))
890
+ }
891
+
892
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
893
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
894
+ }
895
+
896
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
897
+ super::error::datafusion_error_to_lix_error(error)
898
+ }
899
+
900
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
901
+ super::error::lix_error_to_datafusion_error(error)
902
+ }