@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/SKILL.md +305 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/open-lix.d.ts +103 -14
  7. package/dist/open-lix.js +3 -0
  8. package/dist/sqlite/index.js +99 -22
  9. package/dist-engine-src/README.md +18 -0
  10. package/dist-engine-src/src/backend/kv.rs +358 -0
  11. package/dist-engine-src/src/backend/mod.rs +12 -0
  12. package/dist-engine-src/src/backend/testing.rs +658 -0
  13. package/dist-engine-src/src/backend/types.rs +96 -0
  14. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  15. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  16. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  17. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  18. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  19. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  20. package/dist-engine-src/src/cel/context.rs +86 -0
  21. package/dist-engine-src/src/cel/error.rs +19 -0
  22. package/dist-engine-src/src/cel/mod.rs +8 -0
  23. package/dist-engine-src/src/cel/provider.rs +9 -0
  24. package/dist-engine-src/src/cel/runtime.rs +167 -0
  25. package/dist-engine-src/src/cel/value.rs +50 -0
  26. package/dist-engine-src/src/changelog/codec.rs +321 -0
  27. package/dist-engine-src/src/changelog/context.rs +92 -0
  28. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  29. package/dist-engine-src/src/changelog/mod.rs +13 -0
  30. package/dist-engine-src/src/changelog/reader.rs +20 -0
  31. package/dist-engine-src/src/changelog/storage.rs +220 -0
  32. package/dist-engine-src/src/changelog/types.rs +38 -0
  33. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  34. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  35. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  36. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  37. package/dist-engine-src/src/common/error.rs +313 -0
  38. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  39. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  40. package/dist-engine-src/src/common/identity.rs +135 -0
  41. package/dist-engine-src/src/common/metadata.rs +35 -0
  42. package/dist-engine-src/src/common/mod.rs +23 -0
  43. package/dist-engine-src/src/common/types.rs +105 -0
  44. package/dist-engine-src/src/common/wire.rs +222 -0
  45. package/dist-engine-src/src/engine.rs +239 -0
  46. package/dist-engine-src/src/entity_identity.rs +285 -0
  47. package/dist-engine-src/src/functions/context.rs +327 -0
  48. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  49. package/dist-engine-src/src/functions/mod.rs +18 -0
  50. package/dist-engine-src/src/functions/provider.rs +130 -0
  51. package/dist-engine-src/src/functions/state.rs +363 -0
  52. package/dist-engine-src/src/functions/types.rs +37 -0
  53. package/dist-engine-src/src/init.rs +505 -0
  54. package/dist-engine-src/src/json_store/compression.rs +77 -0
  55. package/dist-engine-src/src/json_store/context.rs +129 -0
  56. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  57. package/dist-engine-src/src/json_store/mod.rs +9 -0
  58. package/dist-engine-src/src/json_store/store.rs +236 -0
  59. package/dist-engine-src/src/json_store/types.rs +52 -0
  60. package/dist-engine-src/src/lib.rs +61 -0
  61. package/dist-engine-src/src/live_state/context.rs +2241 -0
  62. package/dist-engine-src/src/live_state/mod.rs +15 -0
  63. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  64. package/dist-engine-src/src/live_state/reader.rs +23 -0
  65. package/dist-engine-src/src/live_state/types.rs +239 -0
  66. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  67. package/dist-engine-src/src/plugin/archive.rs +441 -0
  68. package/dist-engine-src/src/plugin/component.rs +183 -0
  69. package/dist-engine-src/src/plugin/install.rs +637 -0
  70. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  71. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  72. package/dist-engine-src/src/plugin/mod.rs +33 -0
  73. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  74. package/dist-engine-src/src/plugin/storage.rs +74 -0
  75. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  76. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  77. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  78. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  79. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  80. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  81. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  82. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  83. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  84. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  85. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  86. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  87. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  89. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  90. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  91. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  92. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  93. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  94. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  95. package/dist-engine-src/src/schema/definition.json +157 -0
  96. package/dist-engine-src/src/schema/definition.rs +636 -0
  97. package/dist-engine-src/src/schema/key.rs +206 -0
  98. package/dist-engine-src/src/schema/mod.rs +20 -0
  99. package/dist-engine-src/src/schema/seed.rs +14 -0
  100. package/dist-engine-src/src/schema/tests.rs +739 -0
  101. package/dist-engine-src/src/schema_registry.rs +294 -0
  102. package/dist-engine-src/src/session/context.rs +366 -0
  103. package/dist-engine-src/src/session/create_version.rs +80 -0
  104. package/dist-engine-src/src/session/execute.rs +447 -0
  105. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  106. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  107. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  108. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  109. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  110. package/dist-engine-src/src/session/merge/version.rs +437 -0
  111. package/dist-engine-src/src/session/mod.rs +25 -0
  112. package/dist-engine-src/src/session/switch_version.rs +121 -0
  113. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  114. package/dist-engine-src/src/sql2/classify.rs +147 -0
  115. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  116. package/dist-engine-src/src/sql2/context.rs +307 -0
  117. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  118. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  119. package/dist-engine-src/src/sql2/dml.rs +148 -0
  120. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  121. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  122. package/dist-engine-src/src/sql2/error.rs +196 -0
  123. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  124. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  125. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  126. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  127. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  128. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  129. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  130. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  131. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  132. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  133. package/dist-engine-src/src/sql2/mod.rs +43 -0
  134. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  135. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  136. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  137. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  138. package/dist-engine-src/src/sql2/session.rs +135 -0
  139. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  140. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  141. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  142. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  143. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  144. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  148. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  149. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  150. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  151. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  152. package/dist-engine-src/src/storage/context.rs +356 -0
  153. package/dist-engine-src/src/storage/mod.rs +14 -0
  154. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  155. package/dist-engine-src/src/storage/types.rs +501 -0
  156. package/dist-engine-src/src/storage_bench.rs +3406 -0
  157. package/dist-engine-src/src/test_support.rs +81 -0
  158. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  159. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  160. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  161. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  162. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  163. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  164. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  165. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  166. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  167. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  168. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  169. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  170. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  171. package/dist-engine-src/src/transaction/context.rs +1307 -0
  172. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  173. package/dist-engine-src/src/transaction/mod.rs +11 -0
  174. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  175. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  176. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  177. package/dist-engine-src/src/transaction/types.rs +351 -0
  178. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  179. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  180. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  181. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  182. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  183. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  184. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  185. package/dist-engine-src/src/version/context.rs +52 -0
  186. package/dist-engine-src/src/version/mod.rs +12 -0
  187. package/dist-engine-src/src/version/refs.rs +421 -0
  188. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  189. package/dist-engine-src/src/version/types.rs +21 -0
  190. package/dist-engine-src/src/wasm/mod.rs +60 -0
  191. package/package.json +68 -64
@@ -0,0 +1,902 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{DataFusionError, Result};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
+ use datafusion::physical_expr::EquivalenceProperties;
15
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
+ use datafusion::physical_plan::{
18
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
+ };
20
+ use futures_util::stream;
21
+ use serde::Deserialize;
22
+ use tokio::sync::Mutex;
23
+
24
+ use crate::binary_cas::{BlobDataReader, BlobHash};
25
+ use crate::changelog::MaterializedCanonicalChange;
26
+ use crate::commit_graph::CommitGraphReader;
27
+ use crate::serialize_row_metadata;
28
+ use crate::LixError;
29
+
30
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
31
+ use super::history_route::{
32
+ history_descriptor_event_matches, load_history_entries, parse_history_filter,
33
+ HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
34
+ HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID, HISTORY_COL_FILE_ID,
35
+ HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
36
+ HISTORY_COL_SCHEMA_VERSION, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
37
+ };
38
+ use super::result_metadata::json_field;
39
+ use super::SqlChangelogQuerySource;
40
+
41
+ const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
42
+ const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
43
+ const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
44
+
45
+ pub(crate) async fn register_lix_file_history_provider(
46
+ session: &datafusion::prelude::SessionContext,
47
+ commit_graph: Box<dyn CommitGraphReader>,
48
+ query_source: SqlChangelogQuerySource,
49
+ blob_reader: Arc<dyn BlobDataReader>,
50
+ ) -> Result<(), LixError> {
51
+ session
52
+ .register_table(
53
+ "lix_file_history",
54
+ Arc::new(LixFileHistoryProvider::new(
55
+ Arc::new(Mutex::new(commit_graph)),
56
+ query_source,
57
+ blob_reader,
58
+ )),
59
+ )
60
+ .map_err(datafusion_error_to_lix_error)?;
61
+ Ok(())
62
+ }
63
+
64
+ struct LixFileHistoryProvider {
65
+ schema: SchemaRef,
66
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
67
+ query_source: SqlChangelogQuerySource,
68
+ blob_reader: Arc<dyn BlobDataReader>,
69
+ }
70
+
71
+ impl std::fmt::Debug for LixFileHistoryProvider {
72
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73
+ f.debug_struct("LixFileHistoryProvider").finish()
74
+ }
75
+ }
76
+
77
+ impl LixFileHistoryProvider {
78
+ fn new(
79
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
80
+ query_source: SqlChangelogQuerySource,
81
+ blob_reader: Arc<dyn BlobDataReader>,
82
+ ) -> Self {
83
+ Self {
84
+ schema: lix_file_history_schema(),
85
+ commit_graph,
86
+ query_source,
87
+ blob_reader,
88
+ }
89
+ }
90
+ }
91
+
92
+ #[async_trait]
93
+ impl TableProvider for LixFileHistoryProvider {
94
+ fn as_any(&self) -> &dyn Any {
95
+ self
96
+ }
97
+
98
+ fn schema(&self) -> SchemaRef {
99
+ Arc::clone(&self.schema)
100
+ }
101
+
102
+ fn table_type(&self) -> TableType {
103
+ TableType::View
104
+ }
105
+
106
+ fn supports_filters_pushdown(
107
+ &self,
108
+ filters: &[&Expr],
109
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
110
+ Ok(filters
111
+ .iter()
112
+ .map(|filter| {
113
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
114
+ TableProviderFilterPushDown::Exact
115
+ } else {
116
+ TableProviderFilterPushDown::Unsupported
117
+ }
118
+ })
119
+ .collect())
120
+ }
121
+
122
+ async fn scan(
123
+ &self,
124
+ _state: &dyn Session,
125
+ projection: Option<&Vec<usize>>,
126
+ filters: &[Expr],
127
+ limit: Option<usize>,
128
+ ) -> Result<Arc<dyn ExecutionPlan>> {
129
+ let schema = projected_schema(&self.schema, projection)?;
130
+ let needs_data = projection.is_none_or(|projection| {
131
+ projection.iter().any(|index| {
132
+ self.schema
133
+ .field(*index)
134
+ .name()
135
+ .as_str()
136
+ .eq_ignore_ascii_case("data")
137
+ })
138
+ });
139
+ Ok(Arc::new(LixFileHistoryScanExec::new(
140
+ Arc::clone(&self.commit_graph),
141
+ self.query_source.clone(),
142
+ Arc::clone(&self.blob_reader),
143
+ schema,
144
+ needs_data,
145
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
146
+ limit,
147
+ )))
148
+ }
149
+ }
150
+
151
+ struct LixFileHistoryScanExec {
152
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
153
+ query_source: SqlChangelogQuerySource,
154
+ blob_reader: Arc<dyn BlobDataReader>,
155
+ schema: SchemaRef,
156
+ needs_data: bool,
157
+ route: HistoryRoute,
158
+ limit: Option<usize>,
159
+ properties: Arc<PlanProperties>,
160
+ }
161
+
162
+ impl std::fmt::Debug for LixFileHistoryScanExec {
163
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164
+ f.debug_struct("LixFileHistoryScanExec")
165
+ .field("route", &self.route)
166
+ .field("limit", &self.limit)
167
+ .finish()
168
+ }
169
+ }
170
+
171
+ impl LixFileHistoryScanExec {
172
+ fn new(
173
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
174
+ query_source: SqlChangelogQuerySource,
175
+ blob_reader: Arc<dyn BlobDataReader>,
176
+ schema: SchemaRef,
177
+ needs_data: bool,
178
+ route: HistoryRoute,
179
+ limit: Option<usize>,
180
+ ) -> Self {
181
+ let properties = PlanProperties::new(
182
+ EquivalenceProperties::new(Arc::clone(&schema)),
183
+ Partitioning::UnknownPartitioning(1),
184
+ EmissionType::Incremental,
185
+ Boundedness::Bounded,
186
+ );
187
+ Self {
188
+ commit_graph,
189
+ query_source,
190
+ blob_reader,
191
+ schema,
192
+ needs_data,
193
+ route,
194
+ limit,
195
+ properties: Arc::new(properties),
196
+ }
197
+ }
198
+ }
199
+
200
+ impl DisplayAs for LixFileHistoryScanExec {
201
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202
+ match t {
203
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
204
+ f,
205
+ "LixFileHistoryScanExec(route={:?}, limit={:?})",
206
+ self.route, self.limit
207
+ ),
208
+ DisplayFormatType::TreeRender => write!(f, "LixFileHistoryScanExec"),
209
+ }
210
+ }
211
+ }
212
+
213
+ impl ExecutionPlan for LixFileHistoryScanExec {
214
+ fn name(&self) -> &str {
215
+ "LixFileHistoryScanExec"
216
+ }
217
+
218
+ fn as_any(&self) -> &dyn Any {
219
+ self
220
+ }
221
+
222
+ fn properties(&self) -> &Arc<PlanProperties> {
223
+ &self.properties
224
+ }
225
+
226
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
227
+ Vec::new()
228
+ }
229
+
230
+ fn with_new_children(
231
+ self: Arc<Self>,
232
+ children: Vec<Arc<dyn ExecutionPlan>>,
233
+ ) -> Result<Arc<dyn ExecutionPlan>> {
234
+ if !children.is_empty() {
235
+ return Err(DataFusionError::Execution(
236
+ "LixFileHistoryScanExec does not accept children".to_string(),
237
+ ));
238
+ }
239
+ Ok(self)
240
+ }
241
+
242
+ fn execute(
243
+ &self,
244
+ partition: usize,
245
+ _context: Arc<TaskContext>,
246
+ ) -> Result<SendableRecordBatchStream> {
247
+ if partition != 0 {
248
+ return Err(DataFusionError::Execution(format!(
249
+ "LixFileHistoryScanExec only exposes one partition, got {partition}"
250
+ )));
251
+ }
252
+
253
+ let commit_graph = Arc::clone(&self.commit_graph);
254
+ let query_source = self.query_source.clone();
255
+ let blob_reader = Arc::clone(&self.blob_reader);
256
+ let schema = Arc::clone(&self.schema);
257
+ let stream_schema = Arc::clone(&schema);
258
+ let route = self.route.clone();
259
+ let limit = self.limit;
260
+ let needs_data = self.needs_data;
261
+
262
+ let fut = async move {
263
+ let mut rows = load_file_history_rows(
264
+ commit_graph,
265
+ query_source,
266
+ &blob_reader,
267
+ &route,
268
+ needs_data,
269
+ )
270
+ .await
271
+ .map_err(lix_error_to_datafusion_error)?;
272
+ if let Some(limit) = limit {
273
+ rows.truncate(limit);
274
+ }
275
+ file_history_record_batch(&stream_schema, &rows).map_err(lix_error_to_datafusion_error)
276
+ };
277
+
278
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
279
+ schema,
280
+ stream::once(fut),
281
+ )))
282
+ }
283
+ }
284
+
285
+ #[derive(Debug, Clone)]
286
+ struct FileHistoryDescriptorRecord {
287
+ id: String,
288
+ directory_id: Option<String>,
289
+ name: Option<String>,
290
+ hidden: Option<bool>,
291
+ entry: HistoryEntry,
292
+ }
293
+
294
+ #[derive(Debug, Clone)]
295
+ struct FileHistoryDirectoryRecord {
296
+ id: String,
297
+ parent_id: Option<String>,
298
+ name: String,
299
+ entry: HistoryEntry,
300
+ }
301
+
302
+ #[derive(Debug, Clone)]
303
+ struct FileHistoryBlobRecord {
304
+ file_id: String,
305
+ blob_hash: Option<String>,
306
+ entry: HistoryEntry,
307
+ }
308
+
309
+ #[derive(Debug, Clone)]
310
+ struct FileHistoryEvent {
311
+ file_id: String,
312
+ start_commit_id: String,
313
+ depth: u32,
314
+ priority: u8,
315
+ change: MaterializedCanonicalChange,
316
+ observed_commit_id: String,
317
+ commit_created_at: String,
318
+ }
319
+
320
+ #[derive(Debug, Clone)]
321
+ struct FileHistoryOutputRow {
322
+ entity_id: String,
323
+ id: String,
324
+ path: Option<String>,
325
+ directory_id: Option<String>,
326
+ name: Option<String>,
327
+ hidden: Option<bool>,
328
+ data: Option<Vec<u8>>,
329
+ descriptor_change: MaterializedCanonicalChange,
330
+ event: FileHistoryEvent,
331
+ }
332
+
333
+ #[derive(Debug, Deserialize)]
334
+ struct FileDescriptorSnapshot {
335
+ id: String,
336
+ directory_id: Option<String>,
337
+ name: String,
338
+ hidden: bool,
339
+ }
340
+
341
+ #[derive(Debug, Deserialize)]
342
+ struct DirectoryDescriptorSnapshot {
343
+ id: String,
344
+ parent_id: Option<String>,
345
+ name: String,
346
+ }
347
+
348
+ #[derive(Debug, Deserialize)]
349
+ struct BlobRefSnapshot {
350
+ id: String,
351
+ blob_hash: String,
352
+ }
353
+
354
+ async fn load_file_history_rows(
355
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
356
+ query_source: SqlChangelogQuerySource,
357
+ blob_reader: &Arc<dyn BlobDataReader>,
358
+ route: &HistoryRoute,
359
+ needs_data: bool,
360
+ ) -> Result<Vec<FileHistoryOutputRow>, LixError> {
361
+ let event_route = route.traversal_only();
362
+ let event_entries = load_history_entries(
363
+ HistoryViewDescriptor {
364
+ view_name: "lix_file_history",
365
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
366
+ },
367
+ Arc::clone(&commit_graph),
368
+ query_source.json_reader.clone(),
369
+ &event_route,
370
+ vec![
371
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
372
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
373
+ BLOB_REF_SCHEMA_KEY.to_string(),
374
+ ],
375
+ )
376
+ .await?;
377
+ let context_route = route.starts_only();
378
+ let context_entries = load_history_entries(
379
+ HistoryViewDescriptor {
380
+ view_name: "lix_file_history",
381
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
382
+ },
383
+ commit_graph,
384
+ query_source.json_reader,
385
+ &context_route,
386
+ vec![
387
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
388
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
389
+ BLOB_REF_SCHEMA_KEY.to_string(),
390
+ ],
391
+ )
392
+ .await?;
393
+
394
+ let event_descriptors = parse_file_history_descriptors(&event_entries)?;
395
+ let event_directories = parse_file_history_directories(&event_entries)?;
396
+ let event_blobs = parse_file_history_blobs(&event_entries)?;
397
+ let descriptors = parse_file_history_descriptors(&context_entries)?;
398
+ let directories = parse_file_history_directories(&context_entries)?;
399
+ let blobs = parse_file_history_blobs(&context_entries)?;
400
+ let events = file_history_events(
401
+ &event_descriptors,
402
+ &event_directories,
403
+ &event_blobs,
404
+ &descriptors,
405
+ );
406
+
407
+ let mut output = Vec::new();
408
+ for event in events {
409
+ let Some(descriptor) = nearest_file_descriptor(&descriptors, &event) else {
410
+ continue;
411
+ };
412
+ let blob = nearest_blob_ref(&blobs, &event);
413
+ let data = if needs_data {
414
+ match blob.and_then(|blob| blob.blob_hash.as_deref()) {
415
+ Some(blob_hash) => load_single_blob_bytes(blob_reader, blob_hash).await?,
416
+ None => None,
417
+ }
418
+ } else {
419
+ None
420
+ };
421
+ let path = resolve_file_history_path(descriptor, &directories, event.depth);
422
+ let id = tombstone_identity_column_value(
423
+ "id",
424
+ &descriptor.id,
425
+ HistoryIdentityProjection::SingleColumn { column: "id" },
426
+ )?
427
+ .and_then(|value| value.as_str().map(ToOwned::to_owned))
428
+ .unwrap_or_else(|| descriptor.id.clone());
429
+
430
+ output.push(FileHistoryOutputRow {
431
+ entity_id: descriptor.id.clone(),
432
+ id,
433
+ path,
434
+ directory_id: descriptor.directory_id.clone(),
435
+ name: descriptor.name.clone(),
436
+ hidden: descriptor.hidden,
437
+ data,
438
+ descriptor_change: descriptor.entry.change.clone(),
439
+ event,
440
+ });
441
+ }
442
+ output.retain(|row| {
443
+ route.matches_surface_row(
444
+ FILE_DESCRIPTOR_SCHEMA_KEY,
445
+ &row.entity_id,
446
+ Some(&row.entity_id),
447
+ row.event.depth,
448
+ )
449
+ });
450
+
451
+ output.sort_by(|left, right| {
452
+ left.entity_id
453
+ .cmp(&right.entity_id)
454
+ .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
455
+ .then(left.event.depth.cmp(&right.event.depth))
456
+ .then(
457
+ left.event
458
+ .observed_commit_id
459
+ .cmp(&right.event.observed_commit_id),
460
+ )
461
+ .then(left.event.change.id.cmp(&right.event.change.id))
462
+ });
463
+ Ok(output)
464
+ }
465
+
466
+ async fn load_single_blob_bytes(
467
+ blob_reader: &Arc<dyn BlobDataReader>,
468
+ blob_hash: &str,
469
+ ) -> Result<Option<Vec<u8>>, LixError> {
470
+ let hash = BlobHash::from_hex(blob_hash)?;
471
+ Ok(blob_reader
472
+ .load_bytes_many(&[hash])
473
+ .await?
474
+ .into_vec()
475
+ .into_iter()
476
+ .next()
477
+ .flatten())
478
+ }
479
+
480
+ fn file_history_events(
481
+ event_descriptors: &[FileHistoryDescriptorRecord],
482
+ event_directories: &[FileHistoryDirectoryRecord],
483
+ event_blobs: &[FileHistoryBlobRecord],
484
+ context_descriptors: &[FileHistoryDescriptorRecord],
485
+ ) -> Vec<FileHistoryEvent> {
486
+ let mut descriptor_ids_by_start = BTreeSet::<(String, String)>::new();
487
+ let mut directory_ids_by_file_start = BTreeMap::<(String, String), BTreeSet<String>>::new();
488
+
489
+ for descriptor in context_descriptors {
490
+ let key = (
491
+ descriptor.id.clone(),
492
+ descriptor.entry.start_commit_id.clone(),
493
+ );
494
+ descriptor_ids_by_start.insert(key.clone());
495
+ if let Some(directory_id) = &descriptor.directory_id {
496
+ directory_ids_by_file_start
497
+ .entry(key)
498
+ .or_default()
499
+ .insert(directory_id.clone());
500
+ }
501
+ }
502
+
503
+ let mut candidates = Vec::new();
504
+ for descriptor in event_descriptors {
505
+ candidates.push(file_history_event_from_entry(
506
+ descriptor.id.clone(),
507
+ &descriptor.entry,
508
+ 1,
509
+ ));
510
+ }
511
+ for directory in event_directories {
512
+ for ((file_id, start_commit_id), directory_ids) in &directory_ids_by_file_start {
513
+ if start_commit_id == &directory.entry.start_commit_id
514
+ && directory_ids.contains(&directory.id)
515
+ {
516
+ candidates.push(file_history_event_from_entry(
517
+ file_id.clone(),
518
+ &directory.entry,
519
+ 2,
520
+ ));
521
+ }
522
+ }
523
+ }
524
+ for blob in event_blobs {
525
+ if descriptor_ids_by_start
526
+ .contains(&(blob.file_id.clone(), blob.entry.start_commit_id.clone()))
527
+ {
528
+ candidates.push(file_history_event_from_entry(
529
+ blob.file_id.clone(),
530
+ &blob.entry,
531
+ 3,
532
+ ));
533
+ }
534
+ }
535
+
536
+ candidates.sort_by(|left, right| {
537
+ left.file_id
538
+ .cmp(&right.file_id)
539
+ .then(left.start_commit_id.cmp(&right.start_commit_id))
540
+ .then(left.depth.cmp(&right.depth))
541
+ .then(left.priority.cmp(&right.priority))
542
+ .then(left.change.id.cmp(&right.change.id))
543
+ });
544
+ candidates.dedup_by(|left, right| {
545
+ left.file_id == right.file_id
546
+ && left.start_commit_id == right.start_commit_id
547
+ && left.depth == right.depth
548
+ });
549
+ candidates
550
+ }
551
+
552
+ fn file_history_event_from_entry(
553
+ file_id: String,
554
+ entry: &HistoryEntry,
555
+ priority: u8,
556
+ ) -> FileHistoryEvent {
557
+ FileHistoryEvent {
558
+ file_id,
559
+ start_commit_id: entry.start_commit_id.clone(),
560
+ depth: entry.depth,
561
+ priority,
562
+ change: entry.change.clone(),
563
+ observed_commit_id: entry.observed_commit_id.clone(),
564
+ commit_created_at: entry.commit_created_at.clone(),
565
+ }
566
+ }
567
+
568
+ fn parse_file_history_descriptors(
569
+ entries: &[HistoryEntry],
570
+ ) -> Result<Vec<FileHistoryDescriptorRecord>, LixError> {
571
+ entries
572
+ .iter()
573
+ .filter(|entry| entry.change.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY)
574
+ .map(|entry| {
575
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
576
+ return Ok(FileHistoryDescriptorRecord {
577
+ id: entry.change.entity_id.as_string()?,
578
+ directory_id: None,
579
+ name: None,
580
+ hidden: None,
581
+ entry: entry.clone(),
582
+ });
583
+ };
584
+ let snapshot: FileDescriptorSnapshot =
585
+ serde_json::from_str(snapshot_content).map_err(|error| {
586
+ LixError::new(
587
+ "LIX_ERROR_UNKNOWN",
588
+ format!("invalid lix_file_descriptor history snapshot JSON: {error}"),
589
+ )
590
+ })?;
591
+ Ok(FileHistoryDescriptorRecord {
592
+ id: snapshot.id,
593
+ directory_id: snapshot.directory_id,
594
+ name: Some(snapshot.name),
595
+ hidden: Some(snapshot.hidden),
596
+ entry: entry.clone(),
597
+ })
598
+ })
599
+ .collect()
600
+ }
601
+
602
+ fn parse_file_history_directories(
603
+ entries: &[HistoryEntry],
604
+ ) -> Result<Vec<FileHistoryDirectoryRecord>, LixError> {
605
+ entries
606
+ .iter()
607
+ .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
608
+ .filter_map(|entry| {
609
+ let snapshot_content = entry.change.snapshot_content.clone()?;
610
+ Some((entry, snapshot_content))
611
+ })
612
+ .map(|(entry, snapshot_content)| {
613
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(&snapshot_content)
614
+ .map_err(|error| {
615
+ LixError::new(
616
+ "LIX_ERROR_UNKNOWN",
617
+ format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
618
+ )
619
+ })?;
620
+ Ok(FileHistoryDirectoryRecord {
621
+ id: snapshot.id,
622
+ parent_id: snapshot.parent_id,
623
+ name: snapshot.name,
624
+ entry: entry.clone(),
625
+ })
626
+ })
627
+ .collect()
628
+ }
629
+
630
+ fn parse_file_history_blobs(
631
+ entries: &[HistoryEntry],
632
+ ) -> Result<Vec<FileHistoryBlobRecord>, LixError> {
633
+ entries
634
+ .iter()
635
+ .filter(|entry| entry.change.schema_key == BLOB_REF_SCHEMA_KEY)
636
+ .map(|entry| {
637
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
638
+ return Ok(FileHistoryBlobRecord {
639
+ file_id: entry.change.file_id.clone().unwrap_or_else(|| {
640
+ entry
641
+ .change
642
+ .entity_id
643
+ .as_string()
644
+ .expect("canonical change entity identity should project")
645
+ }),
646
+ blob_hash: None,
647
+ entry: entry.clone(),
648
+ });
649
+ };
650
+ let snapshot: BlobRefSnapshot =
651
+ serde_json::from_str(snapshot_content).map_err(|error| {
652
+ LixError::new(
653
+ "LIX_ERROR_UNKNOWN",
654
+ format!("invalid lix_binary_blob_ref history snapshot JSON: {error}"),
655
+ )
656
+ })?;
657
+ Ok(FileHistoryBlobRecord {
658
+ file_id: entry.change.file_id.clone().unwrap_or(snapshot.id),
659
+ blob_hash: Some(snapshot.blob_hash),
660
+ entry: entry.clone(),
661
+ })
662
+ })
663
+ .collect()
664
+ }
665
+
666
+ fn nearest_file_descriptor<'a>(
667
+ descriptors: &'a [FileHistoryDescriptorRecord],
668
+ event: &FileHistoryEvent,
669
+ ) -> Option<&'a FileHistoryDescriptorRecord> {
670
+ descriptors
671
+ .iter()
672
+ .filter(|descriptor| {
673
+ let exact_descriptor_event =
674
+ history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
675
+ (exact_descriptor_event || descriptor.name.is_some())
676
+ && descriptor.id == event.file_id
677
+ && descriptor.entry.start_commit_id == event.start_commit_id
678
+ && descriptor.entry.depth >= event.depth
679
+ })
680
+ .min_by(|left, right| {
681
+ left.entry
682
+ .depth
683
+ .cmp(&right.entry.depth)
684
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
685
+ })
686
+ }
687
+
688
+ fn nearest_blob_ref<'a>(
689
+ blobs: &'a [FileHistoryBlobRecord],
690
+ event: &FileHistoryEvent,
691
+ ) -> Option<&'a FileHistoryBlobRecord> {
692
+ blobs
693
+ .iter()
694
+ .filter(|blob| {
695
+ blob.file_id == event.file_id
696
+ && blob.entry.start_commit_id == event.start_commit_id
697
+ && blob.entry.depth >= event.depth
698
+ })
699
+ .min_by(|left, right| {
700
+ left.entry
701
+ .depth
702
+ .cmp(&right.entry.depth)
703
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
704
+ })
705
+ }
706
+
707
+ fn resolve_file_history_path(
708
+ descriptor: &FileHistoryDescriptorRecord,
709
+ directories: &[FileHistoryDirectoryRecord],
710
+ target_depth: u32,
711
+ ) -> Option<String> {
712
+ let name = descriptor.name.as_ref()?;
713
+ let Some(directory_id) = descriptor.directory_id.as_deref() else {
714
+ return Some(format!("/{name}"));
715
+ };
716
+ let directory_path = resolve_directory_history_path(
717
+ directory_id,
718
+ &descriptor.entry.start_commit_id,
719
+ target_depth,
720
+ directories,
721
+ &mut BTreeMap::new(),
722
+ &mut BTreeSet::new(),
723
+ )?;
724
+ Some(format!("{directory_path}{name}"))
725
+ }
726
+
727
+ fn resolve_directory_history_path(
728
+ directory_id: &str,
729
+ start_commit_id: &str,
730
+ target_depth: u32,
731
+ directories: &[FileHistoryDirectoryRecord],
732
+ cache: &mut BTreeMap<String, Option<String>>,
733
+ visiting: &mut BTreeSet<String>,
734
+ ) -> Option<String> {
735
+ if let Some(path) = cache.get(directory_id) {
736
+ return path.clone();
737
+ }
738
+ if !visiting.insert(directory_id.to_string()) {
739
+ cache.insert(directory_id.to_string(), None);
740
+ return None;
741
+ }
742
+ let directory = directories
743
+ .iter()
744
+ .filter(|directory| {
745
+ directory.id == directory_id
746
+ && directory.entry.start_commit_id == start_commit_id
747
+ && directory.entry.depth >= target_depth
748
+ })
749
+ .min_by(|left, right| {
750
+ left.entry
751
+ .depth
752
+ .cmp(&right.entry.depth)
753
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
754
+ })?;
755
+ let path = match directory.parent_id.as_deref() {
756
+ Some(parent_id) => {
757
+ let parent_path = resolve_directory_history_path(
758
+ parent_id,
759
+ start_commit_id,
760
+ target_depth,
761
+ directories,
762
+ cache,
763
+ visiting,
764
+ )?;
765
+ format!("{parent_path}{}/", directory.name)
766
+ }
767
+ None => format!("/{}/", directory.name),
768
+ };
769
+ visiting.remove(directory_id);
770
+ cache.insert(directory_id.to_string(), Some(path.clone()));
771
+ Some(path)
772
+ }
773
+
774
+ fn file_history_record_batch(
775
+ schema: &SchemaRef,
776
+ rows: &[FileHistoryOutputRow],
777
+ ) -> Result<RecordBatch, LixError> {
778
+ let columns = schema
779
+ .fields()
780
+ .iter()
781
+ .map(|field| file_history_column_array(field.name(), rows))
782
+ .collect::<Result<Vec<_>, _>>()?;
783
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
784
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
785
+ LixError::new(
786
+ "LIX_ERROR_UNKNOWN",
787
+ format!("sql2 failed to build lix_file_history record batch: {error}"),
788
+ )
789
+ })
790
+ }
791
+
792
+ fn file_history_column_array(
793
+ column_name: &str,
794
+ rows: &[FileHistoryOutputRow],
795
+ ) -> Result<ArrayRef, LixError> {
796
+ Ok(match column_name {
797
+ "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
798
+ "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
799
+ "directory_id" => string_array(rows.iter().map(|row| row.directory_id.as_deref())),
800
+ "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
801
+ "hidden" => Arc::new(BooleanArray::from(
802
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
803
+ )) as ArrayRef,
804
+ "data" => Arc::new(BinaryArray::from(
805
+ rows.iter()
806
+ .map(|row| row.data.as_deref())
807
+ .collect::<Vec<_>>(),
808
+ )) as ArrayRef,
809
+ HISTORY_COL_ENTITY_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
810
+ HISTORY_COL_SCHEMA_KEY => {
811
+ string_array(rows.iter().map(|_| Some(FILE_DESCRIPTOR_SCHEMA_KEY)))
812
+ }
813
+ HISTORY_COL_FILE_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
814
+ HISTORY_COL_SCHEMA_VERSION => string_array(
815
+ rows.iter()
816
+ .map(|row| Some(row.descriptor_change.schema_version.as_str())),
817
+ ),
818
+ HISTORY_COL_CHANGE_ID => {
819
+ string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
820
+ }
821
+ HISTORY_COL_SNAPSHOT_CONTENT => string_array(
822
+ rows.iter()
823
+ .map(|row| row.descriptor_change.snapshot_content.as_deref()),
824
+ ),
825
+ HISTORY_COL_METADATA => Arc::new(StringArray::from(
826
+ rows.iter()
827
+ .map(|row| {
828
+ row.descriptor_change
829
+ .metadata
830
+ .as_ref()
831
+ .map(serialize_row_metadata)
832
+ })
833
+ .collect::<Vec<_>>(),
834
+ )),
835
+ HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
836
+ rows.iter()
837
+ .map(|row| Some(row.event.observed_commit_id.as_str())),
838
+ ),
839
+ HISTORY_COL_COMMIT_CREATED_AT => string_array(
840
+ rows.iter()
841
+ .map(|row| Some(row.event.commit_created_at.as_str())),
842
+ ),
843
+ HISTORY_COL_START_COMMIT_ID => string_array(
844
+ rows.iter()
845
+ .map(|row| Some(row.event.start_commit_id.as_str())),
846
+ ),
847
+ HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
848
+ rows.iter()
849
+ .map(|row| i64::from(row.event.depth))
850
+ .collect::<Vec<_>>(),
851
+ )) as ArrayRef,
852
+ other => {
853
+ return Err(LixError::new(
854
+ "LIX_ERROR_UNKNOWN",
855
+ format!(
856
+ "sql2 lix_file_history provider does not support projected column '{other}'"
857
+ ),
858
+ ))
859
+ }
860
+ })
861
+ }
862
+
863
+ fn lix_file_history_schema() -> SchemaRef {
864
+ Arc::new(Schema::new(vec![
865
+ Field::new("id", DataType::Utf8, false),
866
+ Field::new("path", DataType::Utf8, true),
867
+ Field::new("directory_id", DataType::Utf8, true),
868
+ Field::new("name", DataType::Utf8, true),
869
+ Field::new("hidden", DataType::Boolean, true),
870
+ Field::new("data", DataType::Binary, true),
871
+ Field::new(HISTORY_COL_ENTITY_ID, DataType::Utf8, false),
872
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
873
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
874
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
875
+ Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
876
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
877
+ json_field(HISTORY_COL_METADATA, true),
878
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
879
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
880
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
881
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
882
+ ]))
883
+ }
884
+
885
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
886
+ let Some(projection) = projection else {
887
+ return Ok(Arc::clone(base_schema));
888
+ };
889
+ Ok(Arc::new(base_schema.project(projection)?))
890
+ }
891
+
892
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
893
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
894
+ }
895
+
896
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
897
+ super::error::datafusion_error_to_lix_error(error)
898
+ }
899
+
900
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
901
+ super::error::lix_error_to_datafusion_error(error)
902
+ }