@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,910 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BinaryArray, BooleanArray, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{DataFusionError, Result};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
+ use datafusion::physical_expr::EquivalenceProperties;
15
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
+ use datafusion::physical_plan::{
18
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
+ };
20
+ use futures_util::stream;
21
+ use serde::Deserialize;
22
+ use tokio::sync::Mutex;
23
+
24
+ use crate::binary_cas::{BlobDataReader, BlobHash};
25
+ use crate::commit_graph::CommitGraphReader;
26
+ use crate::serialize_row_metadata;
27
+ use crate::LixError;
28
+
29
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
30
+ use super::history_route::{
31
+ history_descriptor_event_matches, load_history_entries, parse_history_filter,
32
+ HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
33
+ HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID, HISTORY_COL_FILE_ID,
34
+ HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
35
+ HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
36
+ };
37
+ use super::result_metadata::json_field;
38
+ use super::SqlCommitStoreQuerySource;
39
+ use crate::commit_store::MaterializedChange;
40
+
41
+ const FILE_DESCRIPTOR_SCHEMA_KEY: &str = "lix_file_descriptor";
42
+ const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
43
+ const BLOB_REF_SCHEMA_KEY: &str = "lix_binary_blob_ref";
44
+
45
+ pub(crate) async fn register_lix_file_history_provider(
46
+ session: &datafusion::prelude::SessionContext,
47
+ commit_graph: Box<dyn CommitGraphReader>,
48
+ query_source: SqlCommitStoreQuerySource,
49
+ blob_reader: Arc<dyn BlobDataReader>,
50
+ ) -> Result<(), LixError> {
51
+ session
52
+ .register_table(
53
+ "lix_file_history",
54
+ Arc::new(LixFileHistoryProvider::new(
55
+ Arc::new(Mutex::new(commit_graph)),
56
+ query_source,
57
+ blob_reader,
58
+ )),
59
+ )
60
+ .map_err(datafusion_error_to_lix_error)?;
61
+ Ok(())
62
+ }
63
+
64
+ struct LixFileHistoryProvider {
65
+ schema: SchemaRef,
66
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
67
+ query_source: SqlCommitStoreQuerySource,
68
+ blob_reader: Arc<dyn BlobDataReader>,
69
+ }
70
+
71
+ impl std::fmt::Debug for LixFileHistoryProvider {
72
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73
+ f.debug_struct("LixFileHistoryProvider").finish()
74
+ }
75
+ }
76
+
77
+ impl LixFileHistoryProvider {
78
+ fn new(
79
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
80
+ query_source: SqlCommitStoreQuerySource,
81
+ blob_reader: Arc<dyn BlobDataReader>,
82
+ ) -> Self {
83
+ Self {
84
+ schema: lix_file_history_schema(),
85
+ commit_graph,
86
+ query_source,
87
+ blob_reader,
88
+ }
89
+ }
90
+ }
91
+
92
+ #[async_trait]
93
+ impl TableProvider for LixFileHistoryProvider {
94
+ fn as_any(&self) -> &dyn Any {
95
+ self
96
+ }
97
+
98
+ fn schema(&self) -> SchemaRef {
99
+ Arc::clone(&self.schema)
100
+ }
101
+
102
+ fn table_type(&self) -> TableType {
103
+ TableType::View
104
+ }
105
+
106
+ fn supports_filters_pushdown(
107
+ &self,
108
+ filters: &[&Expr],
109
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
110
+ Ok(filters
111
+ .iter()
112
+ .map(|filter| {
113
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
114
+ TableProviderFilterPushDown::Exact
115
+ } else {
116
+ TableProviderFilterPushDown::Unsupported
117
+ }
118
+ })
119
+ .collect())
120
+ }
121
+
122
+ async fn scan(
123
+ &self,
124
+ _state: &dyn Session,
125
+ projection: Option<&Vec<usize>>,
126
+ filters: &[Expr],
127
+ limit: Option<usize>,
128
+ ) -> Result<Arc<dyn ExecutionPlan>> {
129
+ let schema = projected_schema(&self.schema, projection)?;
130
+ let needs_data = projection.is_none_or(|projection| {
131
+ projection.iter().any(|index| {
132
+ self.schema
133
+ .field(*index)
134
+ .name()
135
+ .as_str()
136
+ .eq_ignore_ascii_case("data")
137
+ })
138
+ });
139
+ Ok(Arc::new(LixFileHistoryScanExec::new(
140
+ Arc::clone(&self.commit_graph),
141
+ self.query_source.clone(),
142
+ Arc::clone(&self.blob_reader),
143
+ schema,
144
+ needs_data,
145
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
146
+ limit,
147
+ )))
148
+ }
149
+ }
150
+
151
+ struct LixFileHistoryScanExec {
152
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
153
+ query_source: SqlCommitStoreQuerySource,
154
+ blob_reader: Arc<dyn BlobDataReader>,
155
+ schema: SchemaRef,
156
+ needs_data: bool,
157
+ route: HistoryRoute,
158
+ limit: Option<usize>,
159
+ properties: Arc<PlanProperties>,
160
+ }
161
+
162
+ impl std::fmt::Debug for LixFileHistoryScanExec {
163
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164
+ f.debug_struct("LixFileHistoryScanExec")
165
+ .field("route", &self.route)
166
+ .field("limit", &self.limit)
167
+ .finish()
168
+ }
169
+ }
170
+
171
+ impl LixFileHistoryScanExec {
172
+ fn new(
173
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
174
+ query_source: SqlCommitStoreQuerySource,
175
+ blob_reader: Arc<dyn BlobDataReader>,
176
+ schema: SchemaRef,
177
+ needs_data: bool,
178
+ route: HistoryRoute,
179
+ limit: Option<usize>,
180
+ ) -> Self {
181
+ let properties = PlanProperties::new(
182
+ EquivalenceProperties::new(Arc::clone(&schema)),
183
+ Partitioning::UnknownPartitioning(1),
184
+ EmissionType::Incremental,
185
+ Boundedness::Bounded,
186
+ );
187
+ Self {
188
+ commit_graph,
189
+ query_source,
190
+ blob_reader,
191
+ schema,
192
+ needs_data,
193
+ route,
194
+ limit,
195
+ properties: Arc::new(properties),
196
+ }
197
+ }
198
+ }
199
+
200
+ impl DisplayAs for LixFileHistoryScanExec {
201
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202
+ match t {
203
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
204
+ f,
205
+ "LixFileHistoryScanExec(route={:?}, limit={:?})",
206
+ self.route, self.limit
207
+ ),
208
+ DisplayFormatType::TreeRender => write!(f, "LixFileHistoryScanExec"),
209
+ }
210
+ }
211
+ }
212
+
213
+ impl ExecutionPlan for LixFileHistoryScanExec {
214
+ fn name(&self) -> &str {
215
+ "LixFileHistoryScanExec"
216
+ }
217
+
218
+ fn as_any(&self) -> &dyn Any {
219
+ self
220
+ }
221
+
222
+ fn properties(&self) -> &Arc<PlanProperties> {
223
+ &self.properties
224
+ }
225
+
226
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
227
+ Vec::new()
228
+ }
229
+
230
+ fn with_new_children(
231
+ self: Arc<Self>,
232
+ children: Vec<Arc<dyn ExecutionPlan>>,
233
+ ) -> Result<Arc<dyn ExecutionPlan>> {
234
+ if !children.is_empty() {
235
+ return Err(DataFusionError::Execution(
236
+ "LixFileHistoryScanExec does not accept children".to_string(),
237
+ ));
238
+ }
239
+ Ok(self)
240
+ }
241
+
242
+ fn execute(
243
+ &self,
244
+ partition: usize,
245
+ _context: Arc<TaskContext>,
246
+ ) -> Result<SendableRecordBatchStream> {
247
+ if partition != 0 {
248
+ return Err(DataFusionError::Execution(format!(
249
+ "LixFileHistoryScanExec only exposes one partition, got {partition}"
250
+ )));
251
+ }
252
+
253
+ let commit_graph = Arc::clone(&self.commit_graph);
254
+ let query_source = self.query_source.clone();
255
+ let blob_reader = Arc::clone(&self.blob_reader);
256
+ let schema = Arc::clone(&self.schema);
257
+ let stream_schema = Arc::clone(&schema);
258
+ let route = self.route.clone();
259
+ let limit = self.limit;
260
+ let needs_data = self.needs_data;
261
+
262
+ let fut = async move {
263
+ let mut rows = load_file_history_rows(
264
+ commit_graph,
265
+ query_source,
266
+ &blob_reader,
267
+ &route,
268
+ needs_data,
269
+ )
270
+ .await
271
+ .map_err(lix_error_to_datafusion_error)?;
272
+ if let Some(limit) = limit {
273
+ rows.truncate(limit);
274
+ }
275
+ file_history_record_batch(&stream_schema, &rows).map_err(lix_error_to_datafusion_error)
276
+ };
277
+
278
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
279
+ schema,
280
+ stream::once(fut),
281
+ )))
282
+ }
283
+ }
284
+
285
+ #[derive(Debug, Clone)]
286
+ struct FileHistoryDescriptorRecord {
287
+ id: String,
288
+ directory_id: Option<String>,
289
+ name: Option<String>,
290
+ hidden: Option<bool>,
291
+ entry: HistoryEntry,
292
+ }
293
+
294
+ #[derive(Debug, Clone)]
295
+ struct FileHistoryDirectoryRecord {
296
+ id: String,
297
+ parent_id: Option<String>,
298
+ name: String,
299
+ entry: HistoryEntry,
300
+ }
301
+
302
+ #[derive(Debug, Clone)]
303
+ struct FileHistoryBlobRecord {
304
+ file_id: String,
305
+ blob_hash: Option<String>,
306
+ entry: HistoryEntry,
307
+ }
308
+
309
+ #[derive(Debug, Clone)]
310
+ struct FileHistoryEvent {
311
+ file_id: String,
312
+ start_commit_id: String,
313
+ depth: u32,
314
+ priority: u8,
315
+ change: MaterializedChange,
316
+ observed_commit_id: String,
317
+ commit_created_at: String,
318
+ }
319
+
320
+ #[derive(Debug, Clone)]
321
+ struct FileHistoryOutputRow {
322
+ entity_id: String,
323
+ id: String,
324
+ path: Option<String>,
325
+ directory_id: Option<String>,
326
+ name: Option<String>,
327
+ hidden: Option<bool>,
328
+ data: Option<Vec<u8>>,
329
+ descriptor_change: MaterializedChange,
330
+ event: FileHistoryEvent,
331
+ }
332
+
333
+ #[derive(Debug, Deserialize)]
334
+ struct FileDescriptorSnapshot {
335
+ id: String,
336
+ directory_id: Option<String>,
337
+ name: String,
338
+ hidden: bool,
339
+ }
340
+
341
+ #[derive(Debug, Deserialize)]
342
+ struct DirectoryDescriptorSnapshot {
343
+ id: String,
344
+ parent_id: Option<String>,
345
+ name: String,
346
+ }
347
+
348
+ #[derive(Debug, Deserialize)]
349
+ struct BlobRefSnapshot {
350
+ id: String,
351
+ blob_hash: String,
352
+ }
353
+
354
+ async fn load_file_history_rows(
355
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
356
+ query_source: SqlCommitStoreQuerySource,
357
+ blob_reader: &Arc<dyn BlobDataReader>,
358
+ route: &HistoryRoute,
359
+ needs_data: bool,
360
+ ) -> Result<Vec<FileHistoryOutputRow>, LixError> {
361
+ let event_route = route.traversal_only();
362
+ let event_entries = load_history_entries(
363
+ HistoryViewDescriptor {
364
+ view_name: "lix_file_history",
365
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
366
+ },
367
+ Arc::clone(&commit_graph),
368
+ query_source.json_reader.clone(),
369
+ &event_route,
370
+ vec![
371
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
372
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
373
+ BLOB_REF_SCHEMA_KEY.to_string(),
374
+ ],
375
+ )
376
+ .await?;
377
+ let context_route = route.starts_only();
378
+ let context_entries = load_history_entries(
379
+ HistoryViewDescriptor {
380
+ view_name: "lix_file_history",
381
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
382
+ },
383
+ commit_graph,
384
+ query_source.json_reader,
385
+ &context_route,
386
+ vec![
387
+ FILE_DESCRIPTOR_SCHEMA_KEY.to_string(),
388
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string(),
389
+ BLOB_REF_SCHEMA_KEY.to_string(),
390
+ ],
391
+ )
392
+ .await?;
393
+
394
+ let event_descriptors = parse_file_history_descriptors(&event_entries)?;
395
+ let event_directories = parse_file_history_directories(&event_entries)?;
396
+ let event_blobs = parse_file_history_blobs(&event_entries)?;
397
+ let descriptors = parse_file_history_descriptors(&context_entries)?;
398
+ let directories = parse_file_history_directories(&context_entries)?;
399
+ let blobs = parse_file_history_blobs(&context_entries)?;
400
+ let events = file_history_events(
401
+ &event_descriptors,
402
+ &event_directories,
403
+ &event_blobs,
404
+ &descriptors,
405
+ );
406
+
407
+ let mut output = Vec::new();
408
+ for event in events {
409
+ let Some(descriptor) = nearest_file_descriptor(&descriptors, &event) else {
410
+ continue;
411
+ };
412
+ let blob = nearest_blob_ref(&blobs, &event);
413
+ let data = if needs_data {
414
+ match blob.and_then(|blob| blob.blob_hash.as_deref()) {
415
+ Some(blob_hash) => load_single_blob_bytes(blob_reader, blob_hash).await?,
416
+ None => None,
417
+ }
418
+ } else {
419
+ None
420
+ };
421
+ let path = resolve_file_history_path(descriptor, &directories, event.depth);
422
+ let id = tombstone_identity_column_value(
423
+ "id",
424
+ &descriptor.id,
425
+ HistoryIdentityProjection::SingleColumn { column: "id" },
426
+ )?
427
+ .and_then(|value| value.as_str().map(ToOwned::to_owned))
428
+ .unwrap_or_else(|| descriptor.id.clone());
429
+
430
+ output.push(FileHistoryOutputRow {
431
+ entity_id: descriptor.id.clone(),
432
+ id,
433
+ path,
434
+ directory_id: descriptor.directory_id.clone(),
435
+ name: descriptor.name.clone(),
436
+ hidden: descriptor.hidden,
437
+ data,
438
+ descriptor_change: descriptor.entry.change.clone(),
439
+ event,
440
+ });
441
+ }
442
+ output.retain(|row| {
443
+ let entity_id = entity_id_json_array(&row.entity_id).ok();
444
+ route.matches_surface_row(
445
+ FILE_DESCRIPTOR_SCHEMA_KEY,
446
+ entity_id.as_deref().unwrap_or(&row.entity_id),
447
+ Some(&row.entity_id),
448
+ row.event.depth,
449
+ )
450
+ });
451
+
452
+ output.sort_by(|left, right| {
453
+ left.entity_id
454
+ .cmp(&right.entity_id)
455
+ .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
456
+ .then(left.event.depth.cmp(&right.event.depth))
457
+ .then(
458
+ left.event
459
+ .observed_commit_id
460
+ .cmp(&right.event.observed_commit_id),
461
+ )
462
+ .then(left.event.change.id.cmp(&right.event.change.id))
463
+ });
464
+ Ok(output)
465
+ }
466
+
467
+ async fn load_single_blob_bytes(
468
+ blob_reader: &Arc<dyn BlobDataReader>,
469
+ blob_hash: &str,
470
+ ) -> Result<Option<Vec<u8>>, LixError> {
471
+ let hash = BlobHash::from_hex(blob_hash)?;
472
+ Ok(blob_reader
473
+ .load_bytes_many(&[hash])
474
+ .await?
475
+ .into_vec()
476
+ .into_iter()
477
+ .next()
478
+ .flatten())
479
+ }
480
+
481
+ fn file_history_events(
482
+ event_descriptors: &[FileHistoryDescriptorRecord],
483
+ event_directories: &[FileHistoryDirectoryRecord],
484
+ event_blobs: &[FileHistoryBlobRecord],
485
+ context_descriptors: &[FileHistoryDescriptorRecord],
486
+ ) -> Vec<FileHistoryEvent> {
487
+ let mut descriptor_ids_by_start = BTreeSet::<(String, String)>::new();
488
+ let mut directory_ids_by_file_start = BTreeMap::<(String, String), BTreeSet<String>>::new();
489
+
490
+ for descriptor in context_descriptors {
491
+ let key = (
492
+ descriptor.id.clone(),
493
+ descriptor.entry.start_commit_id.clone(),
494
+ );
495
+ descriptor_ids_by_start.insert(key.clone());
496
+ if let Some(directory_id) = &descriptor.directory_id {
497
+ directory_ids_by_file_start
498
+ .entry(key)
499
+ .or_default()
500
+ .insert(directory_id.clone());
501
+ }
502
+ }
503
+
504
+ let mut candidates = Vec::new();
505
+ for descriptor in event_descriptors {
506
+ candidates.push(file_history_event_from_entry(
507
+ descriptor.id.clone(),
508
+ &descriptor.entry,
509
+ 1,
510
+ ));
511
+ }
512
+ for directory in event_directories {
513
+ for ((file_id, start_commit_id), directory_ids) in &directory_ids_by_file_start {
514
+ if start_commit_id == &directory.entry.start_commit_id
515
+ && directory_ids.contains(&directory.id)
516
+ {
517
+ candidates.push(file_history_event_from_entry(
518
+ file_id.clone(),
519
+ &directory.entry,
520
+ 2,
521
+ ));
522
+ }
523
+ }
524
+ }
525
+ for blob in event_blobs {
526
+ if descriptor_ids_by_start
527
+ .contains(&(blob.file_id.clone(), blob.entry.start_commit_id.clone()))
528
+ {
529
+ candidates.push(file_history_event_from_entry(
530
+ blob.file_id.clone(),
531
+ &blob.entry,
532
+ 3,
533
+ ));
534
+ }
535
+ }
536
+
537
+ candidates.sort_by(|left, right| {
538
+ left.file_id
539
+ .cmp(&right.file_id)
540
+ .then(left.start_commit_id.cmp(&right.start_commit_id))
541
+ .then(left.depth.cmp(&right.depth))
542
+ .then(left.priority.cmp(&right.priority))
543
+ .then(left.change.id.cmp(&right.change.id))
544
+ });
545
+ candidates.dedup_by(|left, right| {
546
+ left.file_id == right.file_id
547
+ && left.start_commit_id == right.start_commit_id
548
+ && left.depth == right.depth
549
+ });
550
+ candidates
551
+ }
552
+
553
+ fn file_history_event_from_entry(
554
+ file_id: String,
555
+ entry: &HistoryEntry,
556
+ priority: u8,
557
+ ) -> FileHistoryEvent {
558
+ FileHistoryEvent {
559
+ file_id,
560
+ start_commit_id: entry.start_commit_id.clone(),
561
+ depth: entry.depth,
562
+ priority,
563
+ change: entry.change.clone(),
564
+ observed_commit_id: entry.observed_commit_id.clone(),
565
+ commit_created_at: entry.commit_created_at.clone(),
566
+ }
567
+ }
568
+
569
+ fn parse_file_history_descriptors(
570
+ entries: &[HistoryEntry],
571
+ ) -> Result<Vec<FileHistoryDescriptorRecord>, LixError> {
572
+ entries
573
+ .iter()
574
+ .filter(|entry| entry.change.schema_key == FILE_DESCRIPTOR_SCHEMA_KEY)
575
+ .map(|entry| {
576
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
577
+ return Ok(FileHistoryDescriptorRecord {
578
+ id: entry.change.entity_id.as_single_string_owned()?,
579
+ directory_id: None,
580
+ name: None,
581
+ hidden: None,
582
+ entry: entry.clone(),
583
+ });
584
+ };
585
+ let snapshot: FileDescriptorSnapshot =
586
+ serde_json::from_str(snapshot_content).map_err(|error| {
587
+ LixError::new(
588
+ "LIX_ERROR_UNKNOWN",
589
+ format!("invalid lix_file_descriptor history snapshot JSON: {error}"),
590
+ )
591
+ })?;
592
+ Ok(FileHistoryDescriptorRecord {
593
+ id: snapshot.id,
594
+ directory_id: snapshot.directory_id,
595
+ name: Some(snapshot.name),
596
+ hidden: Some(snapshot.hidden),
597
+ entry: entry.clone(),
598
+ })
599
+ })
600
+ .collect()
601
+ }
602
+
603
+ fn parse_file_history_directories(
604
+ entries: &[HistoryEntry],
605
+ ) -> Result<Vec<FileHistoryDirectoryRecord>, LixError> {
606
+ entries
607
+ .iter()
608
+ .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
609
+ .filter_map(|entry| {
610
+ let snapshot_content = entry.change.snapshot_content.clone()?;
611
+ Some((entry, snapshot_content))
612
+ })
613
+ .map(|(entry, snapshot_content)| {
614
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(&snapshot_content)
615
+ .map_err(|error| {
616
+ LixError::new(
617
+ "LIX_ERROR_UNKNOWN",
618
+ format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
619
+ )
620
+ })?;
621
+ Ok(FileHistoryDirectoryRecord {
622
+ id: snapshot.id,
623
+ parent_id: snapshot.parent_id,
624
+ name: snapshot.name,
625
+ entry: entry.clone(),
626
+ })
627
+ })
628
+ .collect()
629
+ }
630
+
631
+ fn parse_file_history_blobs(
632
+ entries: &[HistoryEntry],
633
+ ) -> Result<Vec<FileHistoryBlobRecord>, LixError> {
634
+ entries
635
+ .iter()
636
+ .filter(|entry| entry.change.schema_key == BLOB_REF_SCHEMA_KEY)
637
+ .map(|entry| {
638
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
639
+ return Ok(FileHistoryBlobRecord {
640
+ file_id: entry.change.file_id.clone().unwrap_or_else(|| {
641
+ entry
642
+ .change
643
+ .entity_id
644
+ .as_single_string_owned()
645
+ .expect("canonical change entity identity should project")
646
+ }),
647
+ blob_hash: None,
648
+ entry: entry.clone(),
649
+ });
650
+ };
651
+ let snapshot: BlobRefSnapshot =
652
+ serde_json::from_str(snapshot_content).map_err(|error| {
653
+ LixError::new(
654
+ "LIX_ERROR_UNKNOWN",
655
+ format!("invalid lix_binary_blob_ref history snapshot JSON: {error}"),
656
+ )
657
+ })?;
658
+ Ok(FileHistoryBlobRecord {
659
+ file_id: entry.change.file_id.clone().unwrap_or(snapshot.id),
660
+ blob_hash: Some(snapshot.blob_hash),
661
+ entry: entry.clone(),
662
+ })
663
+ })
664
+ .collect()
665
+ }
666
+
667
+ fn nearest_file_descriptor<'a>(
668
+ descriptors: &'a [FileHistoryDescriptorRecord],
669
+ event: &FileHistoryEvent,
670
+ ) -> Option<&'a FileHistoryDescriptorRecord> {
671
+ descriptors
672
+ .iter()
673
+ .filter(|descriptor| {
674
+ let exact_descriptor_event =
675
+ history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
676
+ (exact_descriptor_event || descriptor.name.is_some())
677
+ && descriptor.id == event.file_id
678
+ && descriptor.entry.start_commit_id == event.start_commit_id
679
+ && descriptor.entry.depth >= event.depth
680
+ })
681
+ .min_by(|left, right| {
682
+ left.entry
683
+ .depth
684
+ .cmp(&right.entry.depth)
685
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
686
+ })
687
+ }
688
+
689
+ fn nearest_blob_ref<'a>(
690
+ blobs: &'a [FileHistoryBlobRecord],
691
+ event: &FileHistoryEvent,
692
+ ) -> Option<&'a FileHistoryBlobRecord> {
693
+ blobs
694
+ .iter()
695
+ .filter(|blob| {
696
+ blob.file_id == event.file_id
697
+ && blob.entry.start_commit_id == event.start_commit_id
698
+ && blob.entry.depth >= event.depth
699
+ })
700
+ .min_by(|left, right| {
701
+ left.entry
702
+ .depth
703
+ .cmp(&right.entry.depth)
704
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
705
+ })
706
+ }
707
+
708
+ fn resolve_file_history_path(
709
+ descriptor: &FileHistoryDescriptorRecord,
710
+ directories: &[FileHistoryDirectoryRecord],
711
+ target_depth: u32,
712
+ ) -> Option<String> {
713
+ let name = descriptor.name.as_ref()?;
714
+ let Some(directory_id) = descriptor.directory_id.as_deref() else {
715
+ return Some(format!("/{name}"));
716
+ };
717
+ let directory_path = resolve_directory_history_path(
718
+ directory_id,
719
+ &descriptor.entry.start_commit_id,
720
+ target_depth,
721
+ directories,
722
+ &mut BTreeMap::new(),
723
+ &mut BTreeSet::new(),
724
+ )?;
725
+ Some(format!("{directory_path}{name}"))
726
+ }
727
+
728
+ fn resolve_directory_history_path(
729
+ directory_id: &str,
730
+ start_commit_id: &str,
731
+ target_depth: u32,
732
+ directories: &[FileHistoryDirectoryRecord],
733
+ cache: &mut BTreeMap<String, Option<String>>,
734
+ visiting: &mut BTreeSet<String>,
735
+ ) -> Option<String> {
736
+ if let Some(path) = cache.get(directory_id) {
737
+ return path.clone();
738
+ }
739
+ if !visiting.insert(directory_id.to_string()) {
740
+ cache.insert(directory_id.to_string(), None);
741
+ return None;
742
+ }
743
+ let directory = directories
744
+ .iter()
745
+ .filter(|directory| {
746
+ directory.id == directory_id
747
+ && directory.entry.start_commit_id == start_commit_id
748
+ && directory.entry.depth >= target_depth
749
+ })
750
+ .min_by(|left, right| {
751
+ left.entry
752
+ .depth
753
+ .cmp(&right.entry.depth)
754
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
755
+ })?;
756
+ let path = match directory.parent_id.as_deref() {
757
+ Some(parent_id) => {
758
+ let parent_path = resolve_directory_history_path(
759
+ parent_id,
760
+ start_commit_id,
761
+ target_depth,
762
+ directories,
763
+ cache,
764
+ visiting,
765
+ )?;
766
+ format!("{parent_path}{}/", directory.name)
767
+ }
768
+ None => format!("/{}/", directory.name),
769
+ };
770
+ visiting.remove(directory_id);
771
+ cache.insert(directory_id.to_string(), Some(path.clone()));
772
+ Some(path)
773
+ }
774
+
775
+ fn file_history_record_batch(
776
+ schema: &SchemaRef,
777
+ rows: &[FileHistoryOutputRow],
778
+ ) -> Result<RecordBatch, LixError> {
779
+ let columns = schema
780
+ .fields()
781
+ .iter()
782
+ .map(|field| file_history_column_array(field.name(), rows))
783
+ .collect::<Result<Vec<_>, _>>()?;
784
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
785
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
786
+ LixError::new(
787
+ "LIX_ERROR_UNKNOWN",
788
+ format!("sql2 failed to build lix_file_history record batch: {error}"),
789
+ )
790
+ })
791
+ }
792
+
793
+ fn file_history_column_array(
794
+ column_name: &str,
795
+ rows: &[FileHistoryOutputRow],
796
+ ) -> Result<ArrayRef, LixError> {
797
+ Ok(match column_name {
798
+ "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
799
+ "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
800
+ "directory_id" => string_array(rows.iter().map(|row| row.directory_id.as_deref())),
801
+ "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
802
+ "hidden" => Arc::new(BooleanArray::from(
803
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
804
+ )) as ArrayRef,
805
+ "data" => Arc::new(BinaryArray::from(
806
+ rows.iter()
807
+ .map(|row| row.data.as_deref())
808
+ .collect::<Vec<_>>(),
809
+ )) as ArrayRef,
810
+ HISTORY_COL_ENTITY_ID => Arc::new(StringArray::from(
811
+ rows.iter()
812
+ .map(|row| entity_id_json_array(&row.entity_id).map(Some))
813
+ .collect::<std::result::Result<Vec<_>, _>>()?,
814
+ )) as ArrayRef,
815
+ HISTORY_COL_SCHEMA_KEY => {
816
+ string_array(rows.iter().map(|_| Some(FILE_DESCRIPTOR_SCHEMA_KEY)))
817
+ }
818
+ HISTORY_COL_FILE_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
819
+ HISTORY_COL_CHANGE_ID => {
820
+ string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
821
+ }
822
+ HISTORY_COL_SNAPSHOT_CONTENT => string_array(
823
+ rows.iter()
824
+ .map(|row| row.descriptor_change.snapshot_content.as_deref()),
825
+ ),
826
+ HISTORY_COL_METADATA => Arc::new(StringArray::from(
827
+ rows.iter()
828
+ .map(|row| {
829
+ row.descriptor_change
830
+ .metadata
831
+ .as_ref()
832
+ .map(serialize_row_metadata)
833
+ })
834
+ .collect::<Vec<_>>(),
835
+ )),
836
+ HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
837
+ rows.iter()
838
+ .map(|row| Some(row.event.observed_commit_id.as_str())),
839
+ ),
840
+ HISTORY_COL_COMMIT_CREATED_AT => string_array(
841
+ rows.iter()
842
+ .map(|row| Some(row.event.commit_created_at.as_str())),
843
+ ),
844
+ HISTORY_COL_START_COMMIT_ID => string_array(
845
+ rows.iter()
846
+ .map(|row| Some(row.event.start_commit_id.as_str())),
847
+ ),
848
+ HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
849
+ rows.iter()
850
+ .map(|row| i64::from(row.event.depth))
851
+ .collect::<Vec<_>>(),
852
+ )) as ArrayRef,
853
+ other => {
854
+ return Err(LixError::new(
855
+ "LIX_ERROR_UNKNOWN",
856
+ format!(
857
+ "sql2 lix_file_history provider does not support projected column '{other}'"
858
+ ),
859
+ ))
860
+ }
861
+ })
862
+ }
863
+
864
+ fn lix_file_history_schema() -> SchemaRef {
865
+ Arc::new(Schema::new(vec![
866
+ Field::new("id", DataType::Utf8, false),
867
+ Field::new("path", DataType::Utf8, true),
868
+ Field::new("directory_id", DataType::Utf8, true),
869
+ Field::new("name", DataType::Utf8, true),
870
+ Field::new("hidden", DataType::Boolean, true),
871
+ Field::new("data", DataType::Binary, true),
872
+ json_field(HISTORY_COL_ENTITY_ID, false),
873
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
874
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
875
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
876
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
877
+ json_field(HISTORY_COL_METADATA, true),
878
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
879
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
880
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
881
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
882
+ ]))
883
+ }
884
+
885
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
886
+ let Some(projection) = projection else {
887
+ return Ok(Arc::clone(base_schema));
888
+ };
889
+ Ok(Arc::new(base_schema.project(projection)?))
890
+ }
891
+
892
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
893
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
894
+ }
895
+
896
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
897
+ super::error::datafusion_error_to_lix_error(error)
898
+ }
899
+
900
+ fn entity_id_json_array(entity_id: &str) -> Result<String, LixError> {
901
+ serde_json::to_string(&[entity_id]).map_err(|error| {
902
+ LixError::unknown(format!(
903
+ "failed to encode history entity id as JSON: {error}"
904
+ ))
905
+ })
906
+ }
907
+
908
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
909
+ super::error::lix_error_to_datafusion_error(error)
910
+ }