@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,623 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{DataFusionError, Result};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
+ use datafusion::physical_expr::EquivalenceProperties;
15
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
+ use datafusion::physical_plan::{
18
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
+ };
20
+ use futures_util::stream;
21
+ use serde::Deserialize;
22
+ use tokio::sync::Mutex;
23
+
24
+ use crate::changelog::MaterializedCanonicalChange;
25
+ use crate::commit_graph::CommitGraphReader;
26
+ use crate::serialize_row_metadata;
27
+ use crate::LixError;
28
+
29
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
30
+ use super::history_route::{
31
+ history_descriptor_event_matches, load_history_entries, parse_history_filter,
32
+ HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
33
+ HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID, HISTORY_COL_FILE_ID,
34
+ HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
35
+ HISTORY_COL_SCHEMA_VERSION, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
36
+ };
37
+ use super::result_metadata::json_field;
38
+ use super::SqlChangelogQuerySource;
39
+
40
+ const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
41
+
42
+ pub(crate) async fn register_lix_directory_history_provider(
43
+ session: &datafusion::prelude::SessionContext,
44
+ commit_graph: Box<dyn CommitGraphReader>,
45
+ query_source: SqlChangelogQuerySource,
46
+ ) -> Result<(), LixError> {
47
+ session
48
+ .register_table(
49
+ "lix_directory_history",
50
+ Arc::new(LixDirectoryHistoryProvider::new(
51
+ Arc::new(Mutex::new(commit_graph)),
52
+ query_source,
53
+ )),
54
+ )
55
+ .map_err(datafusion_error_to_lix_error)?;
56
+ Ok(())
57
+ }
58
+
59
+ struct LixDirectoryHistoryProvider {
60
+ schema: SchemaRef,
61
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
62
+ query_source: SqlChangelogQuerySource,
63
+ }
64
+
65
+ impl std::fmt::Debug for LixDirectoryHistoryProvider {
66
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67
+ f.debug_struct("LixDirectoryHistoryProvider").finish()
68
+ }
69
+ }
70
+
71
+ impl LixDirectoryHistoryProvider {
72
+ fn new(
73
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
74
+ query_source: SqlChangelogQuerySource,
75
+ ) -> Self {
76
+ Self {
77
+ schema: lix_directory_history_schema(),
78
+ commit_graph,
79
+ query_source,
80
+ }
81
+ }
82
+ }
83
+
84
+ #[async_trait]
85
+ impl TableProvider for LixDirectoryHistoryProvider {
86
+ fn as_any(&self) -> &dyn Any {
87
+ self
88
+ }
89
+
90
+ fn schema(&self) -> SchemaRef {
91
+ Arc::clone(&self.schema)
92
+ }
93
+
94
+ fn table_type(&self) -> TableType {
95
+ TableType::View
96
+ }
97
+
98
+ fn supports_filters_pushdown(
99
+ &self,
100
+ filters: &[&Expr],
101
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
102
+ Ok(filters
103
+ .iter()
104
+ .map(|filter| {
105
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
106
+ TableProviderFilterPushDown::Exact
107
+ } else {
108
+ TableProviderFilterPushDown::Unsupported
109
+ }
110
+ })
111
+ .collect())
112
+ }
113
+
114
+ async fn scan(
115
+ &self,
116
+ _state: &dyn Session,
117
+ projection: Option<&Vec<usize>>,
118
+ filters: &[Expr],
119
+ limit: Option<usize>,
120
+ ) -> Result<Arc<dyn ExecutionPlan>> {
121
+ Ok(Arc::new(LixDirectoryHistoryScanExec::new(
122
+ Arc::clone(&self.commit_graph),
123
+ self.query_source.clone(),
124
+ projected_schema(&self.schema, projection)?,
125
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
126
+ limit,
127
+ )))
128
+ }
129
+ }
130
+
131
+ struct LixDirectoryHistoryScanExec {
132
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
133
+ query_source: SqlChangelogQuerySource,
134
+ schema: SchemaRef,
135
+ route: HistoryRoute,
136
+ limit: Option<usize>,
137
+ properties: Arc<PlanProperties>,
138
+ }
139
+
140
+ impl std::fmt::Debug for LixDirectoryHistoryScanExec {
141
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142
+ f.debug_struct("LixDirectoryHistoryScanExec")
143
+ .field("route", &self.route)
144
+ .field("limit", &self.limit)
145
+ .finish()
146
+ }
147
+ }
148
+
149
+ impl LixDirectoryHistoryScanExec {
150
+ fn new(
151
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
152
+ query_source: SqlChangelogQuerySource,
153
+ schema: SchemaRef,
154
+ route: HistoryRoute,
155
+ limit: Option<usize>,
156
+ ) -> Self {
157
+ let properties = PlanProperties::new(
158
+ EquivalenceProperties::new(Arc::clone(&schema)),
159
+ Partitioning::UnknownPartitioning(1),
160
+ EmissionType::Incremental,
161
+ Boundedness::Bounded,
162
+ );
163
+ Self {
164
+ commit_graph,
165
+ query_source,
166
+ schema,
167
+ route,
168
+ limit,
169
+ properties: Arc::new(properties),
170
+ }
171
+ }
172
+ }
173
+
174
+ impl DisplayAs for LixDirectoryHistoryScanExec {
175
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176
+ match t {
177
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
178
+ f,
179
+ "LixDirectoryHistoryScanExec(route={:?}, limit={:?})",
180
+ self.route, self.limit
181
+ ),
182
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryHistoryScanExec"),
183
+ }
184
+ }
185
+ }
186
+
187
+ impl ExecutionPlan for LixDirectoryHistoryScanExec {
188
+ fn name(&self) -> &str {
189
+ "LixDirectoryHistoryScanExec"
190
+ }
191
+
192
+ fn as_any(&self) -> &dyn Any {
193
+ self
194
+ }
195
+
196
+ fn properties(&self) -> &Arc<PlanProperties> {
197
+ &self.properties
198
+ }
199
+
200
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
201
+ Vec::new()
202
+ }
203
+
204
+ fn with_new_children(
205
+ self: Arc<Self>,
206
+ children: Vec<Arc<dyn ExecutionPlan>>,
207
+ ) -> Result<Arc<dyn ExecutionPlan>> {
208
+ if !children.is_empty() {
209
+ return Err(DataFusionError::Execution(
210
+ "LixDirectoryHistoryScanExec does not accept children".to_string(),
211
+ ));
212
+ }
213
+ Ok(self)
214
+ }
215
+
216
+ fn execute(
217
+ &self,
218
+ partition: usize,
219
+ _context: Arc<TaskContext>,
220
+ ) -> Result<SendableRecordBatchStream> {
221
+ if partition != 0 {
222
+ return Err(DataFusionError::Execution(format!(
223
+ "LixDirectoryHistoryScanExec only exposes one partition, got {partition}"
224
+ )));
225
+ }
226
+
227
+ let commit_graph = Arc::clone(&self.commit_graph);
228
+ let query_source = self.query_source.clone();
229
+ let schema = Arc::clone(&self.schema);
230
+ let stream_schema = Arc::clone(&schema);
231
+ let route = self.route.clone();
232
+ let limit = self.limit;
233
+ let fut = async move {
234
+ let mut rows = load_directory_history_rows(commit_graph, query_source, &route)
235
+ .await
236
+ .map_err(lix_error_to_datafusion_error)?;
237
+ if let Some(limit) = limit {
238
+ rows.truncate(limit);
239
+ }
240
+ directory_history_record_batch(&stream_schema, &rows)
241
+ .map_err(lix_error_to_datafusion_error)
242
+ };
243
+
244
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
245
+ schema,
246
+ stream::once(fut),
247
+ )))
248
+ }
249
+ }
250
+
251
+ #[derive(Debug, Clone)]
252
+ struct DirectoryHistoryRecord {
253
+ id: String,
254
+ parent_id: Option<String>,
255
+ name: Option<String>,
256
+ hidden: Option<bool>,
257
+ entry: HistoryEntry,
258
+ }
259
+
260
+ #[derive(Debug, Clone)]
261
+ struct DirectoryHistoryOutputRow {
262
+ entity_id: String,
263
+ id: String,
264
+ path: Option<String>,
265
+ parent_id: Option<String>,
266
+ name: Option<String>,
267
+ hidden: Option<bool>,
268
+ descriptor_change: MaterializedCanonicalChange,
269
+ event: DirectoryHistoryEvent,
270
+ }
271
+
272
+ #[derive(Debug, Clone)]
273
+ struct DirectoryHistoryEvent {
274
+ directory_id: String,
275
+ start_commit_id: String,
276
+ depth: u32,
277
+ change: MaterializedCanonicalChange,
278
+ observed_commit_id: String,
279
+ commit_created_at: String,
280
+ }
281
+
282
+ #[derive(Debug, Deserialize)]
283
+ struct DirectoryDescriptorSnapshot {
284
+ id: String,
285
+ parent_id: Option<String>,
286
+ name: String,
287
+ hidden: Option<bool>,
288
+ }
289
+
290
+ async fn load_directory_history_rows(
291
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
292
+ query_source: SqlChangelogQuerySource,
293
+ route: &HistoryRoute,
294
+ ) -> Result<Vec<DirectoryHistoryOutputRow>, LixError> {
295
+ let event_route = route.traversal_only();
296
+ let event_entries = load_history_entries(
297
+ HistoryViewDescriptor {
298
+ view_name: "lix_directory_history",
299
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
300
+ },
301
+ Arc::clone(&commit_graph),
302
+ query_source.json_reader.clone(),
303
+ &event_route,
304
+ vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()],
305
+ )
306
+ .await?;
307
+ let context_route = route.starts_only();
308
+ let context_entries = load_history_entries(
309
+ HistoryViewDescriptor {
310
+ view_name: "lix_directory_history",
311
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
312
+ },
313
+ commit_graph,
314
+ query_source.json_reader,
315
+ &context_route,
316
+ vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()],
317
+ )
318
+ .await?;
319
+ let event_descriptors = parse_directory_history_records(&event_entries)?;
320
+ let descriptors = parse_directory_history_records(&context_entries)?;
321
+ let mut output = Vec::new();
322
+
323
+ for descriptor in &event_descriptors {
324
+ let event = directory_history_event_from_entry(&descriptor.id, &descriptor.entry);
325
+ let Some(visible_descriptor) = nearest_directory_descriptor(&descriptors, &event) else {
326
+ continue;
327
+ };
328
+ let path = if visible_descriptor.name.is_some() {
329
+ resolve_directory_history_path(
330
+ &visible_descriptor.id,
331
+ &event.start_commit_id,
332
+ event.depth,
333
+ &descriptors,
334
+ &mut BTreeMap::new(),
335
+ &mut BTreeSet::new(),
336
+ )
337
+ } else {
338
+ None
339
+ };
340
+ let id = tombstone_identity_column_value(
341
+ "id",
342
+ &visible_descriptor.id,
343
+ HistoryIdentityProjection::SingleColumn { column: "id" },
344
+ )?
345
+ .and_then(|value| value.as_str().map(ToOwned::to_owned))
346
+ .unwrap_or_else(|| visible_descriptor.id.clone());
347
+ output.push(DirectoryHistoryOutputRow {
348
+ entity_id: visible_descriptor.id.clone(),
349
+ id,
350
+ path,
351
+ parent_id: visible_descriptor.parent_id.clone(),
352
+ name: visible_descriptor.name.clone(),
353
+ hidden: visible_descriptor.hidden,
354
+ descriptor_change: visible_descriptor.entry.change.clone(),
355
+ event,
356
+ });
357
+ }
358
+ output.retain(|row| {
359
+ route.matches_surface_row(
360
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY,
361
+ &row.entity_id,
362
+ None,
363
+ row.event.depth,
364
+ )
365
+ });
366
+
367
+ output.sort_by(|left, right| {
368
+ left.entity_id
369
+ .cmp(&right.entity_id)
370
+ .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
371
+ .then(left.event.depth.cmp(&right.event.depth))
372
+ .then(
373
+ left.event
374
+ .observed_commit_id
375
+ .cmp(&right.event.observed_commit_id),
376
+ )
377
+ .then(left.event.change.id.cmp(&right.event.change.id))
378
+ });
379
+ Ok(output)
380
+ }
381
+
382
+ fn parse_directory_history_records(
383
+ entries: &[HistoryEntry],
384
+ ) -> Result<Vec<DirectoryHistoryRecord>, LixError> {
385
+ entries
386
+ .iter()
387
+ .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
388
+ .map(|entry| {
389
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
390
+ return Ok(DirectoryHistoryRecord {
391
+ id: entry.change.entity_id.as_string()?,
392
+ parent_id: None,
393
+ name: None,
394
+ hidden: None,
395
+ entry: entry.clone(),
396
+ });
397
+ };
398
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
399
+ .map_err(|error| {
400
+ LixError::new(
401
+ "LIX_ERROR_UNKNOWN",
402
+ format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
403
+ )
404
+ })?;
405
+ Ok(DirectoryHistoryRecord {
406
+ id: snapshot.id,
407
+ parent_id: snapshot.parent_id,
408
+ name: Some(snapshot.name),
409
+ hidden: Some(snapshot.hidden.unwrap_or(false)),
410
+ entry: entry.clone(),
411
+ })
412
+ })
413
+ .collect()
414
+ }
415
+
416
+ fn directory_history_event_from_entry(
417
+ directory_id: &str,
418
+ entry: &HistoryEntry,
419
+ ) -> DirectoryHistoryEvent {
420
+ DirectoryHistoryEvent {
421
+ directory_id: directory_id.to_string(),
422
+ start_commit_id: entry.start_commit_id.clone(),
423
+ depth: entry.depth,
424
+ change: entry.change.clone(),
425
+ observed_commit_id: entry.observed_commit_id.clone(),
426
+ commit_created_at: entry.commit_created_at.clone(),
427
+ }
428
+ }
429
+
430
+ fn nearest_directory_descriptor<'a>(
431
+ descriptors: &'a [DirectoryHistoryRecord],
432
+ event: &DirectoryHistoryEvent,
433
+ ) -> Option<&'a DirectoryHistoryRecord> {
434
+ descriptors
435
+ .iter()
436
+ .filter(|descriptor| {
437
+ let exact_descriptor_event =
438
+ history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
439
+ (exact_descriptor_event || descriptor.name.is_some())
440
+ && descriptor.id == event.directory_id
441
+ && descriptor.entry.start_commit_id == event.start_commit_id
442
+ && descriptor.entry.depth >= event.depth
443
+ })
444
+ .min_by(|left, right| {
445
+ left.entry
446
+ .depth
447
+ .cmp(&right.entry.depth)
448
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
449
+ })
450
+ }
451
+
452
+ fn resolve_directory_history_path(
453
+ directory_id: &str,
454
+ start_commit_id: &str,
455
+ target_depth: u32,
456
+ directories: &[DirectoryHistoryRecord],
457
+ cache: &mut BTreeMap<String, Option<String>>,
458
+ visiting: &mut BTreeSet<String>,
459
+ ) -> Option<String> {
460
+ if let Some(path) = cache.get(directory_id) {
461
+ return path.clone();
462
+ }
463
+ if !visiting.insert(directory_id.to_string()) {
464
+ cache.insert(directory_id.to_string(), None);
465
+ return None;
466
+ }
467
+ let directory = directories
468
+ .iter()
469
+ .filter(|directory| {
470
+ directory.name.is_some()
471
+ && directory.id == directory_id
472
+ && directory.entry.start_commit_id == start_commit_id
473
+ && directory.entry.depth >= target_depth
474
+ })
475
+ .min_by(|left, right| {
476
+ left.entry
477
+ .depth
478
+ .cmp(&right.entry.depth)
479
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
480
+ })?;
481
+ let name = directory.name.as_ref()?;
482
+ let path = match directory.parent_id.as_deref() {
483
+ Some(parent_id) => {
484
+ let parent_path = resolve_directory_history_path(
485
+ parent_id,
486
+ start_commit_id,
487
+ target_depth,
488
+ directories,
489
+ cache,
490
+ visiting,
491
+ )?;
492
+ format!("{parent_path}{name}/")
493
+ }
494
+ None => format!("/{name}/"),
495
+ };
496
+ visiting.remove(directory_id);
497
+ cache.insert(directory_id.to_string(), Some(path.clone()));
498
+ Some(path)
499
+ }
500
+
501
+ fn directory_history_record_batch(
502
+ schema: &SchemaRef,
503
+ rows: &[DirectoryHistoryOutputRow],
504
+ ) -> Result<RecordBatch, LixError> {
505
+ let columns = schema
506
+ .fields()
507
+ .iter()
508
+ .map(|field| directory_history_column_array(field.name(), rows))
509
+ .collect::<Result<Vec<_>, _>>()?;
510
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
511
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
512
+ LixError::new(
513
+ "LIX_ERROR_UNKNOWN",
514
+ format!("sql2 failed to build lix_directory_history record batch: {error}"),
515
+ )
516
+ })
517
+ }
518
+
519
+ fn directory_history_column_array(
520
+ column_name: &str,
521
+ rows: &[DirectoryHistoryOutputRow],
522
+ ) -> Result<ArrayRef, LixError> {
523
+ Ok(match column_name {
524
+ "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
525
+ "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
526
+ "parent_id" => string_array(rows.iter().map(|row| row.parent_id.as_deref())),
527
+ "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
528
+ "hidden" => Arc::new(BooleanArray::from(
529
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
530
+ )) as ArrayRef,
531
+ HISTORY_COL_ENTITY_ID => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
532
+ HISTORY_COL_SCHEMA_KEY => {
533
+ string_array(rows.iter().map(|_| Some(DIRECTORY_DESCRIPTOR_SCHEMA_KEY)))
534
+ }
535
+ HISTORY_COL_FILE_ID => string_array(rows.iter().map(|_| None)),
536
+ HISTORY_COL_SCHEMA_VERSION => string_array(
537
+ rows.iter()
538
+ .map(|row| Some(row.descriptor_change.schema_version.as_str())),
539
+ ),
540
+ HISTORY_COL_CHANGE_ID => {
541
+ string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
542
+ }
543
+ HISTORY_COL_SNAPSHOT_CONTENT => string_array(
544
+ rows.iter()
545
+ .map(|row| row.descriptor_change.snapshot_content.as_deref()),
546
+ ),
547
+ HISTORY_COL_METADATA => Arc::new(StringArray::from(
548
+ rows.iter()
549
+ .map(|row| {
550
+ row.descriptor_change
551
+ .metadata
552
+ .as_ref()
553
+ .map(serialize_row_metadata)
554
+ })
555
+ .collect::<Vec<_>>(),
556
+ )),
557
+ HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
558
+ rows.iter()
559
+ .map(|row| Some(row.event.observed_commit_id.as_str())),
560
+ ),
561
+ HISTORY_COL_COMMIT_CREATED_AT => string_array(
562
+ rows.iter()
563
+ .map(|row| Some(row.event.commit_created_at.as_str())),
564
+ ),
565
+ HISTORY_COL_START_COMMIT_ID => string_array(
566
+ rows.iter()
567
+ .map(|row| Some(row.event.start_commit_id.as_str())),
568
+ ),
569
+ HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
570
+ rows.iter()
571
+ .map(|row| i64::from(row.event.depth))
572
+ .collect::<Vec<_>>(),
573
+ )) as ArrayRef,
574
+ other => {
575
+ return Err(LixError::new(
576
+ "LIX_ERROR_UNKNOWN",
577
+ format!(
578
+ "sql2 lix_directory_history provider does not support projected column '{other}'"
579
+ ),
580
+ ))
581
+ }
582
+ })
583
+ }
584
+
585
+ fn lix_directory_history_schema() -> SchemaRef {
586
+ Arc::new(Schema::new(vec![
587
+ Field::new("id", DataType::Utf8, false),
588
+ Field::new("path", DataType::Utf8, true),
589
+ Field::new("parent_id", DataType::Utf8, true),
590
+ Field::new("name", DataType::Utf8, true),
591
+ Field::new("hidden", DataType::Boolean, true),
592
+ Field::new(HISTORY_COL_ENTITY_ID, DataType::Utf8, false),
593
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
594
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
595
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
596
+ Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
597
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
598
+ json_field(HISTORY_COL_METADATA, true),
599
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
600
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
601
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
602
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
603
+ ]))
604
+ }
605
+
606
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
607
+ let Some(projection) = projection else {
608
+ return Ok(Arc::clone(base_schema));
609
+ };
610
+ Ok(Arc::new(base_schema.project(projection)?))
611
+ }
612
+
613
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
614
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
615
+ }
616
+
617
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
618
+ super::error::datafusion_error_to_lix_error(error)
619
+ }
620
+
621
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
622
+ super::error::lix_error_to_datafusion_error(error)
623
+ }