@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,631 @@
1
+ use std::any::Any;
2
+ use std::collections::{BTreeMap, BTreeSet};
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, Int64Array, StringArray};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
9
+ use datafusion::catalog::{Session, TableProvider};
10
+ use datafusion::common::{DataFusionError, Result};
11
+ use datafusion::datasource::TableType;
12
+ use datafusion::execution::TaskContext;
13
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
14
+ use datafusion::physical_expr::EquivalenceProperties;
15
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
16
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
17
+ use datafusion::physical_plan::{
18
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
19
+ };
20
+ use futures_util::stream;
21
+ use serde::Deserialize;
22
+ use tokio::sync::Mutex;
23
+
24
+ use crate::commit_graph::CommitGraphReader;
25
+ use crate::serialize_row_metadata;
26
+ use crate::LixError;
27
+
28
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
29
+ use super::history_route::{
30
+ history_descriptor_event_matches, load_history_entries, parse_history_filter,
31
+ HistoryColumnStyle, HistoryEntry, HistoryRoute, HistoryViewDescriptor, HISTORY_COL_CHANGE_ID,
32
+ HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID, HISTORY_COL_FILE_ID,
33
+ HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID, HISTORY_COL_SCHEMA_KEY,
34
+ HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
35
+ };
36
+ use super::result_metadata::json_field;
37
+ use super::SqlCommitStoreQuerySource;
38
+ use crate::commit_store::MaterializedChange;
39
+
40
+ const DIRECTORY_DESCRIPTOR_SCHEMA_KEY: &str = "lix_directory_descriptor";
41
+
42
+ pub(crate) async fn register_lix_directory_history_provider(
43
+ session: &datafusion::prelude::SessionContext,
44
+ commit_graph: Box<dyn CommitGraphReader>,
45
+ query_source: SqlCommitStoreQuerySource,
46
+ ) -> Result<(), LixError> {
47
+ session
48
+ .register_table(
49
+ "lix_directory_history",
50
+ Arc::new(LixDirectoryHistoryProvider::new(
51
+ Arc::new(Mutex::new(commit_graph)),
52
+ query_source,
53
+ )),
54
+ )
55
+ .map_err(datafusion_error_to_lix_error)?;
56
+ Ok(())
57
+ }
58
+
59
+ struct LixDirectoryHistoryProvider {
60
+ schema: SchemaRef,
61
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
62
+ query_source: SqlCommitStoreQuerySource,
63
+ }
64
+
65
+ impl std::fmt::Debug for LixDirectoryHistoryProvider {
66
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67
+ f.debug_struct("LixDirectoryHistoryProvider").finish()
68
+ }
69
+ }
70
+
71
+ impl LixDirectoryHistoryProvider {
72
+ fn new(
73
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
74
+ query_source: SqlCommitStoreQuerySource,
75
+ ) -> Self {
76
+ Self {
77
+ schema: lix_directory_history_schema(),
78
+ commit_graph,
79
+ query_source,
80
+ }
81
+ }
82
+ }
83
+
84
+ #[async_trait]
85
+ impl TableProvider for LixDirectoryHistoryProvider {
86
+ fn as_any(&self) -> &dyn Any {
87
+ self
88
+ }
89
+
90
+ fn schema(&self) -> SchemaRef {
91
+ Arc::clone(&self.schema)
92
+ }
93
+
94
+ fn table_type(&self) -> TableType {
95
+ TableType::View
96
+ }
97
+
98
+ fn supports_filters_pushdown(
99
+ &self,
100
+ filters: &[&Expr],
101
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
102
+ Ok(filters
103
+ .iter()
104
+ .map(|filter| {
105
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
106
+ TableProviderFilterPushDown::Exact
107
+ } else {
108
+ TableProviderFilterPushDown::Unsupported
109
+ }
110
+ })
111
+ .collect())
112
+ }
113
+
114
+ async fn scan(
115
+ &self,
116
+ _state: &dyn Session,
117
+ projection: Option<&Vec<usize>>,
118
+ filters: &[Expr],
119
+ limit: Option<usize>,
120
+ ) -> Result<Arc<dyn ExecutionPlan>> {
121
+ Ok(Arc::new(LixDirectoryHistoryScanExec::new(
122
+ Arc::clone(&self.commit_graph),
123
+ self.query_source.clone(),
124
+ projected_schema(&self.schema, projection)?,
125
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed),
126
+ limit,
127
+ )))
128
+ }
129
+ }
130
+
131
+ struct LixDirectoryHistoryScanExec {
132
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
133
+ query_source: SqlCommitStoreQuerySource,
134
+ schema: SchemaRef,
135
+ route: HistoryRoute,
136
+ limit: Option<usize>,
137
+ properties: Arc<PlanProperties>,
138
+ }
139
+
140
+ impl std::fmt::Debug for LixDirectoryHistoryScanExec {
141
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142
+ f.debug_struct("LixDirectoryHistoryScanExec")
143
+ .field("route", &self.route)
144
+ .field("limit", &self.limit)
145
+ .finish()
146
+ }
147
+ }
148
+
149
+ impl LixDirectoryHistoryScanExec {
150
+ fn new(
151
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
152
+ query_source: SqlCommitStoreQuerySource,
153
+ schema: SchemaRef,
154
+ route: HistoryRoute,
155
+ limit: Option<usize>,
156
+ ) -> Self {
157
+ let properties = PlanProperties::new(
158
+ EquivalenceProperties::new(Arc::clone(&schema)),
159
+ Partitioning::UnknownPartitioning(1),
160
+ EmissionType::Incremental,
161
+ Boundedness::Bounded,
162
+ );
163
+ Self {
164
+ commit_graph,
165
+ query_source,
166
+ schema,
167
+ route,
168
+ limit,
169
+ properties: Arc::new(properties),
170
+ }
171
+ }
172
+ }
173
+
174
+ impl DisplayAs for LixDirectoryHistoryScanExec {
175
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176
+ match t {
177
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
178
+ f,
179
+ "LixDirectoryHistoryScanExec(route={:?}, limit={:?})",
180
+ self.route, self.limit
181
+ ),
182
+ DisplayFormatType::TreeRender => write!(f, "LixDirectoryHistoryScanExec"),
183
+ }
184
+ }
185
+ }
186
+
187
+ impl ExecutionPlan for LixDirectoryHistoryScanExec {
188
+ fn name(&self) -> &str {
189
+ "LixDirectoryHistoryScanExec"
190
+ }
191
+
192
+ fn as_any(&self) -> &dyn Any {
193
+ self
194
+ }
195
+
196
+ fn properties(&self) -> &Arc<PlanProperties> {
197
+ &self.properties
198
+ }
199
+
200
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
201
+ Vec::new()
202
+ }
203
+
204
+ fn with_new_children(
205
+ self: Arc<Self>,
206
+ children: Vec<Arc<dyn ExecutionPlan>>,
207
+ ) -> Result<Arc<dyn ExecutionPlan>> {
208
+ if !children.is_empty() {
209
+ return Err(DataFusionError::Execution(
210
+ "LixDirectoryHistoryScanExec does not accept children".to_string(),
211
+ ));
212
+ }
213
+ Ok(self)
214
+ }
215
+
216
+ fn execute(
217
+ &self,
218
+ partition: usize,
219
+ _context: Arc<TaskContext>,
220
+ ) -> Result<SendableRecordBatchStream> {
221
+ if partition != 0 {
222
+ return Err(DataFusionError::Execution(format!(
223
+ "LixDirectoryHistoryScanExec only exposes one partition, got {partition}"
224
+ )));
225
+ }
226
+
227
+ let commit_graph = Arc::clone(&self.commit_graph);
228
+ let query_source = self.query_source.clone();
229
+ let schema = Arc::clone(&self.schema);
230
+ let stream_schema = Arc::clone(&schema);
231
+ let route = self.route.clone();
232
+ let limit = self.limit;
233
+ let fut = async move {
234
+ let mut rows = load_directory_history_rows(commit_graph, query_source, &route)
235
+ .await
236
+ .map_err(lix_error_to_datafusion_error)?;
237
+ if let Some(limit) = limit {
238
+ rows.truncate(limit);
239
+ }
240
+ directory_history_record_batch(&stream_schema, &rows)
241
+ .map_err(lix_error_to_datafusion_error)
242
+ };
243
+
244
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
245
+ schema,
246
+ stream::once(fut),
247
+ )))
248
+ }
249
+ }
250
+
251
+ #[derive(Debug, Clone)]
252
+ struct DirectoryHistoryRecord {
253
+ id: String,
254
+ parent_id: Option<String>,
255
+ name: Option<String>,
256
+ hidden: Option<bool>,
257
+ entry: HistoryEntry,
258
+ }
259
+
260
+ #[derive(Debug, Clone)]
261
+ struct DirectoryHistoryOutputRow {
262
+ entity_id: String,
263
+ id: String,
264
+ path: Option<String>,
265
+ parent_id: Option<String>,
266
+ name: Option<String>,
267
+ hidden: Option<bool>,
268
+ descriptor_change: MaterializedChange,
269
+ event: DirectoryHistoryEvent,
270
+ }
271
+
272
+ #[derive(Debug, Clone)]
273
+ struct DirectoryHistoryEvent {
274
+ directory_id: String,
275
+ start_commit_id: String,
276
+ depth: u32,
277
+ change: MaterializedChange,
278
+ observed_commit_id: String,
279
+ commit_created_at: String,
280
+ }
281
+
282
+ #[derive(Debug, Deserialize)]
283
+ struct DirectoryDescriptorSnapshot {
284
+ id: String,
285
+ parent_id: Option<String>,
286
+ name: String,
287
+ hidden: Option<bool>,
288
+ }
289
+
290
+ async fn load_directory_history_rows(
291
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
292
+ query_source: SqlCommitStoreQuerySource,
293
+ route: &HistoryRoute,
294
+ ) -> Result<Vec<DirectoryHistoryOutputRow>, LixError> {
295
+ let event_route = route.traversal_only();
296
+ let event_entries = load_history_entries(
297
+ HistoryViewDescriptor {
298
+ view_name: "lix_directory_history",
299
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
300
+ },
301
+ Arc::clone(&commit_graph),
302
+ query_source.json_reader.clone(),
303
+ &event_route,
304
+ vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()],
305
+ )
306
+ .await?;
307
+ let context_route = route.starts_only();
308
+ let context_entries = load_history_entries(
309
+ HistoryViewDescriptor {
310
+ view_name: "lix_directory_history",
311
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
312
+ },
313
+ commit_graph,
314
+ query_source.json_reader,
315
+ &context_route,
316
+ vec![DIRECTORY_DESCRIPTOR_SCHEMA_KEY.to_string()],
317
+ )
318
+ .await?;
319
+ let event_descriptors = parse_directory_history_records(&event_entries)?;
320
+ let descriptors = parse_directory_history_records(&context_entries)?;
321
+ let mut output = Vec::new();
322
+
323
+ for descriptor in &event_descriptors {
324
+ let event = directory_history_event_from_entry(&descriptor.id, &descriptor.entry);
325
+ let Some(visible_descriptor) = nearest_directory_descriptor(&descriptors, &event) else {
326
+ continue;
327
+ };
328
+ let path = if visible_descriptor.name.is_some() {
329
+ resolve_directory_history_path(
330
+ &visible_descriptor.id,
331
+ &event.start_commit_id,
332
+ event.depth,
333
+ &descriptors,
334
+ &mut BTreeMap::new(),
335
+ &mut BTreeSet::new(),
336
+ )
337
+ } else {
338
+ None
339
+ };
340
+ let id = tombstone_identity_column_value(
341
+ "id",
342
+ &visible_descriptor.id,
343
+ HistoryIdentityProjection::SingleColumn { column: "id" },
344
+ )?
345
+ .and_then(|value| value.as_str().map(ToOwned::to_owned))
346
+ .unwrap_or_else(|| visible_descriptor.id.clone());
347
+ output.push(DirectoryHistoryOutputRow {
348
+ entity_id: visible_descriptor.id.clone(),
349
+ id,
350
+ path,
351
+ parent_id: visible_descriptor.parent_id.clone(),
352
+ name: visible_descriptor.name.clone(),
353
+ hidden: visible_descriptor.hidden,
354
+ descriptor_change: visible_descriptor.entry.change.clone(),
355
+ event,
356
+ });
357
+ }
358
+ output.retain(|row| {
359
+ let entity_id = entity_id_json_array(&row.entity_id).ok();
360
+ route.matches_surface_row(
361
+ DIRECTORY_DESCRIPTOR_SCHEMA_KEY,
362
+ entity_id.as_deref().unwrap_or(&row.entity_id),
363
+ None,
364
+ row.event.depth,
365
+ )
366
+ });
367
+
368
+ output.sort_by(|left, right| {
369
+ left.entity_id
370
+ .cmp(&right.entity_id)
371
+ .then(left.event.start_commit_id.cmp(&right.event.start_commit_id))
372
+ .then(left.event.depth.cmp(&right.event.depth))
373
+ .then(
374
+ left.event
375
+ .observed_commit_id
376
+ .cmp(&right.event.observed_commit_id),
377
+ )
378
+ .then(left.event.change.id.cmp(&right.event.change.id))
379
+ });
380
+ Ok(output)
381
+ }
382
+
383
+ fn parse_directory_history_records(
384
+ entries: &[HistoryEntry],
385
+ ) -> Result<Vec<DirectoryHistoryRecord>, LixError> {
386
+ entries
387
+ .iter()
388
+ .filter(|entry| entry.change.schema_key == DIRECTORY_DESCRIPTOR_SCHEMA_KEY)
389
+ .map(|entry| {
390
+ let Some(snapshot_content) = entry.change.snapshot_content.as_deref() else {
391
+ return Ok(DirectoryHistoryRecord {
392
+ id: entry.change.entity_id.as_single_string_owned()?,
393
+ parent_id: None,
394
+ name: None,
395
+ hidden: None,
396
+ entry: entry.clone(),
397
+ });
398
+ };
399
+ let snapshot: DirectoryDescriptorSnapshot = serde_json::from_str(snapshot_content)
400
+ .map_err(|error| {
401
+ LixError::new(
402
+ "LIX_ERROR_UNKNOWN",
403
+ format!("invalid lix_directory_descriptor history snapshot JSON: {error}"),
404
+ )
405
+ })?;
406
+ Ok(DirectoryHistoryRecord {
407
+ id: snapshot.id,
408
+ parent_id: snapshot.parent_id,
409
+ name: Some(snapshot.name),
410
+ hidden: Some(snapshot.hidden.unwrap_or(false)),
411
+ entry: entry.clone(),
412
+ })
413
+ })
414
+ .collect()
415
+ }
416
+
417
+ fn directory_history_event_from_entry(
418
+ directory_id: &str,
419
+ entry: &HistoryEntry,
420
+ ) -> DirectoryHistoryEvent {
421
+ DirectoryHistoryEvent {
422
+ directory_id: directory_id.to_string(),
423
+ start_commit_id: entry.start_commit_id.clone(),
424
+ depth: entry.depth,
425
+ change: entry.change.clone(),
426
+ observed_commit_id: entry.observed_commit_id.clone(),
427
+ commit_created_at: entry.commit_created_at.clone(),
428
+ }
429
+ }
430
+
431
+ fn nearest_directory_descriptor<'a>(
432
+ descriptors: &'a [DirectoryHistoryRecord],
433
+ event: &DirectoryHistoryEvent,
434
+ ) -> Option<&'a DirectoryHistoryRecord> {
435
+ descriptors
436
+ .iter()
437
+ .filter(|descriptor| {
438
+ let exact_descriptor_event =
439
+ history_descriptor_event_matches(&descriptor.entry, event.depth, &event.change.id);
440
+ (exact_descriptor_event || descriptor.name.is_some())
441
+ && descriptor.id == event.directory_id
442
+ && descriptor.entry.start_commit_id == event.start_commit_id
443
+ && descriptor.entry.depth >= event.depth
444
+ })
445
+ .min_by(|left, right| {
446
+ left.entry
447
+ .depth
448
+ .cmp(&right.entry.depth)
449
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
450
+ })
451
+ }
452
+
453
+ fn resolve_directory_history_path(
454
+ directory_id: &str,
455
+ start_commit_id: &str,
456
+ target_depth: u32,
457
+ directories: &[DirectoryHistoryRecord],
458
+ cache: &mut BTreeMap<String, Option<String>>,
459
+ visiting: &mut BTreeSet<String>,
460
+ ) -> Option<String> {
461
+ if let Some(path) = cache.get(directory_id) {
462
+ return path.clone();
463
+ }
464
+ if !visiting.insert(directory_id.to_string()) {
465
+ cache.insert(directory_id.to_string(), None);
466
+ return None;
467
+ }
468
+ let directory = directories
469
+ .iter()
470
+ .filter(|directory| {
471
+ directory.name.is_some()
472
+ && directory.id == directory_id
473
+ && directory.entry.start_commit_id == start_commit_id
474
+ && directory.entry.depth >= target_depth
475
+ })
476
+ .min_by(|left, right| {
477
+ left.entry
478
+ .depth
479
+ .cmp(&right.entry.depth)
480
+ .then(left.entry.change.id.cmp(&right.entry.change.id))
481
+ })?;
482
+ let name = directory.name.as_ref()?;
483
+ let path = match directory.parent_id.as_deref() {
484
+ Some(parent_id) => {
485
+ let parent_path = resolve_directory_history_path(
486
+ parent_id,
487
+ start_commit_id,
488
+ target_depth,
489
+ directories,
490
+ cache,
491
+ visiting,
492
+ )?;
493
+ format!("{parent_path}{name}/")
494
+ }
495
+ None => format!("/{name}/"),
496
+ };
497
+ visiting.remove(directory_id);
498
+ cache.insert(directory_id.to_string(), Some(path.clone()));
499
+ Some(path)
500
+ }
501
+
502
+ fn directory_history_record_batch(
503
+ schema: &SchemaRef,
504
+ rows: &[DirectoryHistoryOutputRow],
505
+ ) -> Result<RecordBatch, LixError> {
506
+ let columns = schema
507
+ .fields()
508
+ .iter()
509
+ .map(|field| directory_history_column_array(field.name(), rows))
510
+ .collect::<Result<Vec<_>, _>>()?;
511
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
512
+ RecordBatch::try_new_with_options(Arc::clone(schema), columns, &options).map_err(|error| {
513
+ LixError::new(
514
+ "LIX_ERROR_UNKNOWN",
515
+ format!("sql2 failed to build lix_directory_history record batch: {error}"),
516
+ )
517
+ })
518
+ }
519
+
520
+ fn directory_history_column_array(
521
+ column_name: &str,
522
+ rows: &[DirectoryHistoryOutputRow],
523
+ ) -> Result<ArrayRef, LixError> {
524
+ Ok(match column_name {
525
+ "id" => string_array(rows.iter().map(|row| Some(row.id.as_str()))),
526
+ "path" => string_array(rows.iter().map(|row| row.path.as_deref())),
527
+ "parent_id" => string_array(rows.iter().map(|row| row.parent_id.as_deref())),
528
+ "name" => string_array(rows.iter().map(|row| row.name.as_deref())),
529
+ "hidden" => Arc::new(BooleanArray::from(
530
+ rows.iter().map(|row| row.hidden).collect::<Vec<_>>(),
531
+ )) as ArrayRef,
532
+ HISTORY_COL_ENTITY_ID => Arc::new(StringArray::from(
533
+ rows.iter()
534
+ .map(|row| entity_id_json_array(&row.entity_id).map(Some))
535
+ .collect::<std::result::Result<Vec<_>, _>>()?,
536
+ )) as ArrayRef,
537
+ HISTORY_COL_SCHEMA_KEY => {
538
+ string_array(rows.iter().map(|_| Some(DIRECTORY_DESCRIPTOR_SCHEMA_KEY)))
539
+ }
540
+ HISTORY_COL_FILE_ID => string_array(rows.iter().map(|_| None)),
541
+ HISTORY_COL_CHANGE_ID => {
542
+ string_array(rows.iter().map(|row| Some(row.event.change.id.as_str())))
543
+ }
544
+ HISTORY_COL_SNAPSHOT_CONTENT => string_array(
545
+ rows.iter()
546
+ .map(|row| row.descriptor_change.snapshot_content.as_deref()),
547
+ ),
548
+ HISTORY_COL_METADATA => Arc::new(StringArray::from(
549
+ rows.iter()
550
+ .map(|row| {
551
+ row.descriptor_change
552
+ .metadata
553
+ .as_ref()
554
+ .map(serialize_row_metadata)
555
+ })
556
+ .collect::<Vec<_>>(),
557
+ )),
558
+ HISTORY_COL_OBSERVED_COMMIT_ID => string_array(
559
+ rows.iter()
560
+ .map(|row| Some(row.event.observed_commit_id.as_str())),
561
+ ),
562
+ HISTORY_COL_COMMIT_CREATED_AT => string_array(
563
+ rows.iter()
564
+ .map(|row| Some(row.event.commit_created_at.as_str())),
565
+ ),
566
+ HISTORY_COL_START_COMMIT_ID => string_array(
567
+ rows.iter()
568
+ .map(|row| Some(row.event.start_commit_id.as_str())),
569
+ ),
570
+ HISTORY_COL_DEPTH => Arc::new(Int64Array::from(
571
+ rows.iter()
572
+ .map(|row| i64::from(row.event.depth))
573
+ .collect::<Vec<_>>(),
574
+ )) as ArrayRef,
575
+ other => {
576
+ return Err(LixError::new(
577
+ "LIX_ERROR_UNKNOWN",
578
+ format!(
579
+ "sql2 lix_directory_history provider does not support projected column '{other}'"
580
+ ),
581
+ ))
582
+ }
583
+ })
584
+ }
585
+
586
+ fn lix_directory_history_schema() -> SchemaRef {
587
+ Arc::new(Schema::new(vec![
588
+ Field::new("id", DataType::Utf8, false),
589
+ Field::new("path", DataType::Utf8, true),
590
+ Field::new("parent_id", DataType::Utf8, true),
591
+ Field::new("name", DataType::Utf8, true),
592
+ Field::new("hidden", DataType::Boolean, true),
593
+ json_field(HISTORY_COL_ENTITY_ID, false),
594
+ Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
595
+ Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
596
+ json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
597
+ Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
598
+ json_field(HISTORY_COL_METADATA, true),
599
+ Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
600
+ Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
601
+ Field::new(HISTORY_COL_START_COMMIT_ID, DataType::Utf8, false),
602
+ Field::new(HISTORY_COL_DEPTH, DataType::Int64, false),
603
+ ]))
604
+ }
605
+
606
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
607
+ let Some(projection) = projection else {
608
+ return Ok(Arc::clone(base_schema));
609
+ };
610
+ Ok(Arc::new(base_schema.project(projection)?))
611
+ }
612
+
613
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
614
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
615
+ }
616
+
617
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
618
+ super::error::datafusion_error_to_lix_error(error)
619
+ }
620
+
621
+ fn entity_id_json_array(entity_id: &str) -> Result<String, LixError> {
622
+ serde_json::to_string(&[entity_id]).map_err(|error| {
623
+ LixError::unknown(format!(
624
+ "failed to encode history entity id as JSON: {error}"
625
+ ))
626
+ })
627
+ }
628
+
629
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
630
+ super::error::lix_error_to_datafusion_error(error)
631
+ }