@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,412 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use async_trait::async_trait;
5
+ use datafusion::arrow::array::{ArrayRef, Int64Array, StringArray};
6
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
7
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
8
+ use datafusion::catalog::{Session, TableProvider};
9
+ use datafusion::common::{DataFusionError, Result};
10
+ use datafusion::datasource::TableType;
11
+ use datafusion::execution::TaskContext;
12
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
13
+ use datafusion::physical_expr::EquivalenceProperties;
14
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
15
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
16
+ use datafusion::physical_plan::{
17
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
18
+ };
19
+ use datafusion::prelude::SessionContext;
20
+ use futures_util::{stream, TryStreamExt};
21
+ use tokio::sync::Mutex;
22
+
23
+ use crate::commit_graph::CommitGraphReader;
24
+ use crate::{serialize_row_metadata, LixError};
25
+
26
+ use super::history_route::{
27
+ load_history_entries, parse_history_filter, HistoryColumnStyle, HistoryRoute,
28
+ HistoryViewDescriptor,
29
+ };
30
+ use super::result_metadata::json_field;
31
+ use super::SqlCommitStoreQuerySource;
32
+
33
+ pub(crate) async fn register_history_providers(
34
+ session: &SessionContext,
35
+ commit_graph: Box<dyn CommitGraphReader>,
36
+ query_source: SqlCommitStoreQuerySource,
37
+ ) -> Result<Arc<dyn TableProvider>, LixError> {
38
+ let provider: Arc<dyn TableProvider> = Arc::new(LixStateHistoryProvider::new(
39
+ Arc::new(Mutex::new(commit_graph)),
40
+ query_source,
41
+ ));
42
+ session
43
+ .register_table("lix_state_history", Arc::clone(&provider))
44
+ .map_err(datafusion_error_to_lix_error)?;
45
+ Ok(provider)
46
+ }
47
+
48
+ pub(crate) struct LixStateHistoryProvider {
49
+ schema: SchemaRef,
50
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
51
+ query_source: SqlCommitStoreQuerySource,
52
+ }
53
+
54
+ impl std::fmt::Debug for LixStateHistoryProvider {
55
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56
+ f.debug_struct("LixStateHistoryProvider").finish()
57
+ }
58
+ }
59
+
60
+ impl LixStateHistoryProvider {
61
+ pub(crate) fn new(
62
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
63
+ query_source: SqlCommitStoreQuerySource,
64
+ ) -> Self {
65
+ Self {
66
+ schema: lix_state_history_schema(),
67
+ commit_graph,
68
+ query_source,
69
+ }
70
+ }
71
+ }
72
+
73
+ #[async_trait]
74
+ impl TableProvider for LixStateHistoryProvider {
75
+ fn as_any(&self) -> &dyn Any {
76
+ self
77
+ }
78
+
79
+ fn schema(&self) -> SchemaRef {
80
+ Arc::clone(&self.schema)
81
+ }
82
+
83
+ fn table_type(&self) -> TableType {
84
+ TableType::View
85
+ }
86
+
87
+ fn supports_filters_pushdown(
88
+ &self,
89
+ filters: &[&Expr],
90
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
91
+ Ok(filters
92
+ .iter()
93
+ .map(|filter| {
94
+ if parse_history_filter(filter, HistoryColumnStyle::Bare).is_some() {
95
+ TableProviderFilterPushDown::Exact
96
+ } else {
97
+ TableProviderFilterPushDown::Unsupported
98
+ }
99
+ })
100
+ .collect())
101
+ }
102
+
103
+ async fn scan(
104
+ &self,
105
+ _state: &dyn Session,
106
+ projection: Option<&Vec<usize>>,
107
+ filters: &[Expr],
108
+ limit: Option<usize>,
109
+ ) -> Result<Arc<dyn ExecutionPlan>> {
110
+ let projected_schema = projected_schema(&self.schema, projection)?;
111
+ Ok(Arc::new(LixStateHistoryScanExec::new(
112
+ Arc::clone(&self.commit_graph),
113
+ self.query_source.clone(),
114
+ projected_schema,
115
+ projection.cloned(),
116
+ HistoryRoute::from_filters(filters, HistoryColumnStyle::Bare),
117
+ limit,
118
+ )))
119
+ }
120
+ }
121
+
122
+ struct LixStateHistoryScanExec {
123
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
124
+ query_source: SqlCommitStoreQuerySource,
125
+ schema: SchemaRef,
126
+ projection: Option<Vec<usize>>,
127
+ route: HistoryRoute,
128
+ limit: Option<usize>,
129
+ properties: Arc<PlanProperties>,
130
+ }
131
+
132
+ impl std::fmt::Debug for LixStateHistoryScanExec {
133
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134
+ f.debug_struct("LixStateHistoryScanExec")
135
+ .field("limit", &self.limit)
136
+ .field("route", &self.route)
137
+ .finish()
138
+ }
139
+ }
140
+
141
+ impl LixStateHistoryScanExec {
142
+ fn new(
143
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
144
+ query_source: SqlCommitStoreQuerySource,
145
+ schema: SchemaRef,
146
+ projection: Option<Vec<usize>>,
147
+ route: HistoryRoute,
148
+ limit: Option<usize>,
149
+ ) -> Self {
150
+ let properties = PlanProperties::new(
151
+ EquivalenceProperties::new(Arc::clone(&schema)),
152
+ Partitioning::UnknownPartitioning(1),
153
+ EmissionType::Incremental,
154
+ Boundedness::Bounded,
155
+ );
156
+ Self {
157
+ commit_graph,
158
+ query_source,
159
+ schema,
160
+ projection,
161
+ route,
162
+ limit,
163
+ properties: Arc::new(properties),
164
+ }
165
+ }
166
+ }
167
+
168
+ impl DisplayAs for LixStateHistoryScanExec {
169
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170
+ match t {
171
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
172
+ write!(
173
+ f,
174
+ "LixStateHistoryScanExec(limit={:?}, route={:?})",
175
+ self.limit, self.route
176
+ )
177
+ }
178
+ DisplayFormatType::TreeRender => write!(f, "LixStateHistoryScanExec"),
179
+ }
180
+ }
181
+ }
182
+
183
+ impl ExecutionPlan for LixStateHistoryScanExec {
184
+ fn name(&self) -> &str {
185
+ "LixStateHistoryScanExec"
186
+ }
187
+
188
+ fn as_any(&self) -> &dyn Any {
189
+ self
190
+ }
191
+
192
+ fn properties(&self) -> &Arc<PlanProperties> {
193
+ &self.properties
194
+ }
195
+
196
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
197
+ Vec::new()
198
+ }
199
+
200
+ fn with_new_children(
201
+ self: Arc<Self>,
202
+ children: Vec<Arc<dyn ExecutionPlan>>,
203
+ ) -> Result<Arc<dyn ExecutionPlan>> {
204
+ if !children.is_empty() {
205
+ return Err(DataFusionError::Execution(
206
+ "LixStateHistoryScanExec does not accept children".to_string(),
207
+ ));
208
+ }
209
+ Ok(self)
210
+ }
211
+
212
+ fn execute(
213
+ &self,
214
+ partition: usize,
215
+ _context: Arc<TaskContext>,
216
+ ) -> Result<SendableRecordBatchStream> {
217
+ if partition != 0 {
218
+ return Err(DataFusionError::Execution(format!(
219
+ "LixStateHistoryScanExec only exposes one partition, got {partition}"
220
+ )));
221
+ }
222
+
223
+ let commit_graph = Arc::clone(&self.commit_graph);
224
+ let query_source = self.query_source.clone();
225
+ let route = self.route.clone();
226
+ let schema = Arc::clone(&self.schema);
227
+ let stream_schema = Arc::clone(&schema);
228
+ let limit = self.limit;
229
+ let zero_column_projection = self
230
+ .projection
231
+ .as_ref()
232
+ .is_some_and(|projection| projection.is_empty());
233
+
234
+ let stream = stream::once(async move {
235
+ let rows = if route.is_contradictory() {
236
+ Vec::new()
237
+ } else {
238
+ load_state_history_rows(commit_graph, query_source, &route)
239
+ .await
240
+ .map_err(lix_error_to_datafusion_error)?
241
+ };
242
+ let rows = if let Some(limit) = limit {
243
+ rows.into_iter().take(limit).collect::<Vec<_>>()
244
+ } else {
245
+ rows
246
+ };
247
+
248
+ let batch = if zero_column_projection {
249
+ let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
250
+ RecordBatch::try_new_with_options(Arc::clone(&stream_schema), vec![], &options)
251
+ .map_err(|error| {
252
+ DataFusionError::Execution(format!(
253
+ "failed to build zero-column lix_state_history batch: {error}"
254
+ ))
255
+ })?
256
+ } else {
257
+ state_history_record_batch(Arc::clone(&stream_schema), &rows)?
258
+ };
259
+ Ok::<_, DataFusionError>(stream::iter(vec![Ok::<RecordBatch, DataFusionError>(
260
+ batch,
261
+ )]))
262
+ })
263
+ .try_flatten();
264
+
265
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
266
+ }
267
+ }
268
+
269
+ fn lix_state_history_schema() -> SchemaRef {
270
+ Arc::new(Schema::new(vec![
271
+ json_field("entity_id", false),
272
+ Field::new("schema_key", DataType::Utf8, false),
273
+ Field::new("file_id", DataType::Utf8, true),
274
+ json_field("snapshot_content", true),
275
+ json_field("metadata", true),
276
+ Field::new("change_id", DataType::Utf8, false),
277
+ Field::new("observed_commit_id", DataType::Utf8, false),
278
+ Field::new("commit_created_at", DataType::Utf8, false),
279
+ Field::new("start_commit_id", DataType::Utf8, false),
280
+ Field::new("depth", DataType::Int64, false),
281
+ ]))
282
+ }
283
+
284
+ fn projected_schema(base_schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
285
+ let fields = match projection {
286
+ Some(indices) => indices
287
+ .iter()
288
+ .map(|index| base_schema.field(*index).as_ref().clone())
289
+ .collect::<Vec<_>>(),
290
+ None => base_schema
291
+ .fields()
292
+ .iter()
293
+ .map(|field| field.as_ref().clone())
294
+ .collect::<Vec<_>>(),
295
+ };
296
+ Ok(Arc::new(Schema::new(fields)))
297
+ }
298
+
299
+ #[derive(Debug, Clone)]
300
+ struct StateHistorySqlRow {
301
+ entity_id: String,
302
+ schema_key: String,
303
+ file_id: Option<String>,
304
+ snapshot_content: Option<String>,
305
+ metadata: Option<String>,
306
+ change_id: String,
307
+ observed_commit_id: String,
308
+ commit_created_at: String,
309
+ start_commit_id: String,
310
+ depth: i64,
311
+ }
312
+
313
+ fn state_history_record_batch(
314
+ schema: SchemaRef,
315
+ rows: &[StateHistorySqlRow],
316
+ ) -> Result<RecordBatch> {
317
+ let arrays = schema
318
+ .fields()
319
+ .iter()
320
+ .map(|field| {
321
+ Ok(match field.name().as_str() {
322
+ "entity_id" => string_array(rows.iter().map(|row| Some(row.entity_id.as_str()))),
323
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.schema_key.as_str()))),
324
+ "file_id" => string_array(rows.iter().map(|row| row.file_id.as_deref())),
325
+ "snapshot_content" => {
326
+ string_array(rows.iter().map(|row| row.snapshot_content.as_deref()))
327
+ }
328
+ "metadata" => Arc::new(StringArray::from(
329
+ rows.iter()
330
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
331
+ .collect::<Vec<_>>(),
332
+ )),
333
+ "change_id" => string_array(rows.iter().map(|row| Some(row.change_id.as_str()))),
334
+ "observed_commit_id" => {
335
+ string_array(rows.iter().map(|row| Some(row.observed_commit_id.as_str())))
336
+ }
337
+ "commit_created_at" => {
338
+ string_array(rows.iter().map(|row| Some(row.commit_created_at.as_str())))
339
+ }
340
+ "start_commit_id" => {
341
+ string_array(rows.iter().map(|row| Some(row.start_commit_id.as_str())))
342
+ }
343
+ "depth" => Arc::new(Int64Array::from(
344
+ rows.iter().map(|row| row.depth).collect::<Vec<_>>(),
345
+ )) as ArrayRef,
346
+ other => {
347
+ return Err(DataFusionError::Execution(format!(
348
+ "lix_state_history provider does not support projected column '{other}'"
349
+ )))
350
+ }
351
+ })
352
+ })
353
+ .collect::<Result<Vec<_>>>()?;
354
+ RecordBatch::try_new(schema, arrays).map_err(DataFusionError::from)
355
+ }
356
+
357
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
358
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
359
+ }
360
+
361
+ async fn load_state_history_rows(
362
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
363
+ query_source: SqlCommitStoreQuerySource,
364
+ route: &HistoryRoute,
365
+ ) -> Result<Vec<StateHistorySqlRow>, LixError> {
366
+ let entries = load_history_entries(
367
+ HistoryViewDescriptor {
368
+ view_name: "lix_state_history",
369
+ start_commit_column: "start_commit_id",
370
+ },
371
+ commit_graph,
372
+ query_source.json_reader,
373
+ route,
374
+ Vec::new(),
375
+ )
376
+ .await?;
377
+ let mut rows = entries
378
+ .into_iter()
379
+ .map(|entry| -> Result<StateHistorySqlRow, LixError> {
380
+ Ok(StateHistorySqlRow {
381
+ entity_id: entry.change.entity_id.as_json_array_text()?,
382
+ schema_key: entry.change.schema_key,
383
+ file_id: entry.change.file_id,
384
+ snapshot_content: entry.change.snapshot_content,
385
+ metadata: entry.change.metadata,
386
+ change_id: entry.change.id,
387
+ observed_commit_id: entry.observed_commit_id,
388
+ commit_created_at: entry.commit_created_at,
389
+ start_commit_id: entry.start_commit_id,
390
+ depth: i64::from(entry.depth),
391
+ })
392
+ })
393
+ .collect::<Result<Vec<_>, _>>()?;
394
+
395
+ rows.sort_by(|left, right| {
396
+ left.entity_id
397
+ .cmp(&right.entity_id)
398
+ .then(left.file_id.cmp(&right.file_id))
399
+ .then(left.schema_key.cmp(&right.schema_key))
400
+ .then(left.depth.cmp(&right.depth))
401
+ .then(left.change_id.cmp(&right.change_id))
402
+ });
403
+ Ok(rows)
404
+ }
405
+
406
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
407
+ super::error::datafusion_error_to_lix_error(error)
408
+ }
409
+
410
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
411
+ super::error::lix_error_to_datafusion_error(error)
412
+ }