@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,148 @@
1
+ use std::any::Any;
2
+ use std::fmt::Debug;
3
+ use std::sync::Arc;
4
+
5
+ use async_trait::async_trait;
6
+ use datafusion::arrow::array::{ArrayRef, UInt64Array};
7
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
8
+ use datafusion::arrow::record_batch::RecordBatch;
9
+ use datafusion::common::{DataFusionError, Result};
10
+ use datafusion::execution::TaskContext;
11
+ use datafusion::physical_expr::EquivalenceProperties;
12
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
13
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
14
+ use datafusion::physical_plan::{
15
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
16
+ };
17
+ use futures_util::stream;
18
+
19
+ use super::runtime;
20
+
21
+ #[async_trait]
22
+ pub(crate) trait InsertSink: Debug + DisplayAs + Send + Sync {
23
+ async fn write_batches(
24
+ &self,
25
+ batches: Vec<RecordBatch>,
26
+ context: &Arc<TaskContext>,
27
+ ) -> Result<u64>;
28
+ }
29
+
30
+ pub(crate) struct InsertExec {
31
+ input: Arc<dyn ExecutionPlan>,
32
+ sink: Arc<dyn InsertSink>,
33
+ result_schema: SchemaRef,
34
+ properties: Arc<PlanProperties>,
35
+ }
36
+
37
+ impl InsertExec {
38
+ pub(crate) fn new(input: Arc<dyn ExecutionPlan>, sink: Arc<dyn InsertSink>) -> Self {
39
+ let result_schema = dml_count_schema();
40
+ let properties = PlanProperties::new(
41
+ EquivalenceProperties::new(Arc::clone(&result_schema)),
42
+ Partitioning::UnknownPartitioning(1),
43
+ EmissionType::Final,
44
+ Boundedness::Bounded,
45
+ );
46
+ Self {
47
+ input,
48
+ sink,
49
+ result_schema,
50
+ properties: Arc::new(properties),
51
+ }
52
+ }
53
+ }
54
+
55
+ impl Debug for InsertExec {
56
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57
+ f.debug_struct("InsertExec").finish()
58
+ }
59
+ }
60
+
61
+ impl DisplayAs for InsertExec {
62
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63
+ match t {
64
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
65
+ write!(f, "InsertExec: sink=")?;
66
+ self.sink.fmt_as(t, f)
67
+ }
68
+ DisplayFormatType::TreeRender => write!(f, "InsertExec"),
69
+ }
70
+ }
71
+ }
72
+
73
+ impl ExecutionPlan for InsertExec {
74
+ fn name(&self) -> &str {
75
+ "InsertExec"
76
+ }
77
+
78
+ fn as_any(&self) -> &dyn Any {
79
+ self
80
+ }
81
+
82
+ fn properties(&self) -> &Arc<PlanProperties> {
83
+ &self.properties
84
+ }
85
+
86
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
87
+ vec![&self.input]
88
+ }
89
+
90
+ fn with_new_children(
91
+ self: Arc<Self>,
92
+ mut children: Vec<Arc<dyn ExecutionPlan>>,
93
+ ) -> Result<Arc<dyn ExecutionPlan>> {
94
+ if children.len() != 1 {
95
+ return Err(DataFusionError::Execution(format!(
96
+ "InsertExec expects one input child, got {}",
97
+ children.len()
98
+ )));
99
+ }
100
+ Ok(Arc::new(Self::new(
101
+ children.swap_remove(0),
102
+ Arc::clone(&self.sink),
103
+ )))
104
+ }
105
+
106
+ fn execute(
107
+ &self,
108
+ partition: usize,
109
+ context: Arc<TaskContext>,
110
+ ) -> Result<SendableRecordBatchStream> {
111
+ if partition != 0 {
112
+ return Err(DataFusionError::Execution(format!(
113
+ "InsertExec only exposes one partition, got {partition}"
114
+ )));
115
+ }
116
+
117
+ let input = Arc::clone(&self.input);
118
+ let sink = Arc::clone(&self.sink);
119
+ let stream_schema = Arc::clone(&self.result_schema);
120
+ let result_schema = Arc::clone(&self.result_schema);
121
+ let stream = stream::once(async move {
122
+ let batches = runtime::collect_input_plan(input, Arc::clone(&context)).await?;
123
+ let count = sink.write_batches(batches, &context).await?;
124
+ dml_count_batch(stream_schema, count)
125
+ });
126
+
127
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
128
+ result_schema,
129
+ stream,
130
+ )))
131
+ }
132
+ }
133
+
134
+ fn dml_count_schema() -> SchemaRef {
135
+ Arc::new(Schema::new(vec![Field::new(
136
+ "count",
137
+ DataType::UInt64,
138
+ false,
139
+ )]))
140
+ }
141
+
142
+ fn dml_count_batch(schema: SchemaRef, count: u64) -> Result<RecordBatch> {
143
+ RecordBatch::try_new(
144
+ schema,
145
+ vec![Arc::new(UInt64Array::from(vec![count])) as ArrayRef],
146
+ )
147
+ .map_err(DataFusionError::from)
148
+ }
@@ -0,0 +1,440 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use async_trait::async_trait;
5
+ use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
6
+ use datafusion::arrow::datatypes::SchemaRef;
7
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
8
+ use datafusion::catalog::{Session, TableProvider};
9
+ use datafusion::common::{DataFusionError, Result};
10
+ use datafusion::datasource::TableType;
11
+ use datafusion::execution::TaskContext;
12
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
13
+ use datafusion::physical_expr::EquivalenceProperties;
14
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
15
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
16
+ use datafusion::physical_plan::{
17
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
18
+ };
19
+ use futures_util::stream;
20
+ use serde_json::Value as JsonValue;
21
+ use tokio::sync::Mutex;
22
+
23
+ use crate::commit_graph::CommitGraphReader;
24
+ use crate::serialize_row_metadata;
25
+ use crate::LixError;
26
+
27
+ use super::entity_provider::{
28
+ entity_f64_value, entity_i64_value, entity_json_text_value, entity_surface_schema,
29
+ parse_snapshot, string_array, EntityColumnType, EntityProviderVariant, EntitySurfaceSpec,
30
+ };
31
+ use super::history_projection::{tombstone_identity_column_value, HistoryIdentityProjection};
32
+ use super::history_route::{
33
+ load_history_entries, parse_history_filter, HistoryColumnStyle, HistoryRoute,
34
+ HistoryViewDescriptor, HISTORY_COL_START_COMMIT_ID,
35
+ };
36
+ use super::SqlCommitStoreQuerySource;
37
+ use crate::commit_store::MaterializedChange;
38
+
39
+ /// Schema-specific history surface backed directly by the commit graph.
40
+ ///
41
+ /// The provider does not query `lix_state_history` through SQL. It uses the same
42
+ /// commit graph primitive as the generic history surface, then shapes canonical
43
+ /// changes into the typed entity columns for one registered schema.
44
+ pub(crate) struct EntityHistoryProvider {
45
+ spec: Arc<EntitySurfaceSpec>,
46
+ schema: SchemaRef,
47
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
48
+ query_source: SqlCommitStoreQuerySource,
49
+ }
50
+
51
+ impl std::fmt::Debug for EntityHistoryProvider {
52
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53
+ f.debug_struct("EntityHistoryProvider")
54
+ .field("schema_key", &self.spec.schema_key)
55
+ .finish()
56
+ }
57
+ }
58
+
59
+ impl EntityHistoryProvider {
60
+ pub(crate) fn new(
61
+ spec: Arc<EntitySurfaceSpec>,
62
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
63
+ query_source: SqlCommitStoreQuerySource,
64
+ ) -> Self {
65
+ Self {
66
+ schema: entity_surface_schema(&spec, EntityProviderVariant::History),
67
+ spec,
68
+ commit_graph,
69
+ query_source,
70
+ }
71
+ }
72
+ }
73
+
74
+ #[async_trait]
75
+ impl TableProvider for EntityHistoryProvider {
76
+ fn as_any(&self) -> &dyn Any {
77
+ self
78
+ }
79
+
80
+ fn schema(&self) -> SchemaRef {
81
+ Arc::clone(&self.schema)
82
+ }
83
+
84
+ fn table_type(&self) -> TableType {
85
+ TableType::View
86
+ }
87
+
88
+ fn supports_filters_pushdown(
89
+ &self,
90
+ filters: &[&Expr],
91
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
92
+ Ok(filters
93
+ .iter()
94
+ .map(|filter| {
95
+ if parse_history_filter(filter, HistoryColumnStyle::Prefixed).is_some() {
96
+ TableProviderFilterPushDown::Exact
97
+ } else {
98
+ TableProviderFilterPushDown::Unsupported
99
+ }
100
+ })
101
+ .collect())
102
+ }
103
+
104
+ async fn scan(
105
+ &self,
106
+ _state: &dyn Session,
107
+ projection: Option<&Vec<usize>>,
108
+ filters: &[Expr],
109
+ limit: Option<usize>,
110
+ ) -> Result<Arc<dyn ExecutionPlan>> {
111
+ let route = HistoryRoute::from_filters(filters, HistoryColumnStyle::Prefixed);
112
+ let schema = projected_schema(&self.schema, projection)?;
113
+ Ok(Arc::new(EntityHistoryScanExec::new(
114
+ Arc::clone(&self.spec),
115
+ Arc::clone(&self.commit_graph),
116
+ self.query_source.clone(),
117
+ schema,
118
+ route,
119
+ limit,
120
+ )))
121
+ }
122
+ }
123
+
124
+ struct EntityHistoryScanExec {
125
+ spec: Arc<EntitySurfaceSpec>,
126
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
127
+ query_source: SqlCommitStoreQuerySource,
128
+ schema: SchemaRef,
129
+ route: HistoryRoute,
130
+ limit: Option<usize>,
131
+ properties: Arc<PlanProperties>,
132
+ }
133
+
134
+ impl std::fmt::Debug for EntityHistoryScanExec {
135
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136
+ f.debug_struct("EntityHistoryScanExec")
137
+ .field("schema_key", &self.spec.schema_key)
138
+ .field("route", &self.route)
139
+ .field("limit", &self.limit)
140
+ .finish()
141
+ }
142
+ }
143
+
144
+ impl EntityHistoryScanExec {
145
+ fn new(
146
+ spec: Arc<EntitySurfaceSpec>,
147
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
148
+ query_source: SqlCommitStoreQuerySource,
149
+ schema: SchemaRef,
150
+ route: HistoryRoute,
151
+ limit: Option<usize>,
152
+ ) -> Self {
153
+ let properties = PlanProperties::new(
154
+ EquivalenceProperties::new(Arc::clone(&schema)),
155
+ Partitioning::UnknownPartitioning(1),
156
+ EmissionType::Incremental,
157
+ Boundedness::Bounded,
158
+ );
159
+ Self {
160
+ spec,
161
+ commit_graph,
162
+ query_source,
163
+ schema,
164
+ route,
165
+ limit,
166
+ properties: Arc::new(properties),
167
+ }
168
+ }
169
+ }
170
+
171
+ impl DisplayAs for EntityHistoryScanExec {
172
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173
+ match t {
174
+ DisplayFormatType::Default | DisplayFormatType::Verbose => write!(
175
+ f,
176
+ "EntityHistoryScanExec(schema_key={}, route={:?}, limit={:?})",
177
+ self.spec.schema_key, self.route, self.limit
178
+ ),
179
+ DisplayFormatType::TreeRender => write!(f, "EntityHistoryScanExec"),
180
+ }
181
+ }
182
+ }
183
+
184
+ impl ExecutionPlan for EntityHistoryScanExec {
185
+ fn name(&self) -> &str {
186
+ "EntityHistoryScanExec"
187
+ }
188
+
189
+ fn as_any(&self) -> &dyn Any {
190
+ self
191
+ }
192
+
193
+ fn properties(&self) -> &Arc<PlanProperties> {
194
+ &self.properties
195
+ }
196
+
197
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
198
+ Vec::new()
199
+ }
200
+
201
+ fn with_new_children(
202
+ self: Arc<Self>,
203
+ children: Vec<Arc<dyn ExecutionPlan>>,
204
+ ) -> Result<Arc<dyn ExecutionPlan>> {
205
+ if !children.is_empty() {
206
+ return Err(DataFusionError::Internal(
207
+ "EntityHistoryScanExec does not accept children".to_string(),
208
+ ));
209
+ }
210
+ Ok(self)
211
+ }
212
+
213
+ fn execute(
214
+ &self,
215
+ partition: usize,
216
+ _context: Arc<TaskContext>,
217
+ ) -> Result<SendableRecordBatchStream> {
218
+ if partition != 0 {
219
+ return Err(DataFusionError::Execution(format!(
220
+ "EntityHistoryScanExec only exposes one partition, got {partition}"
221
+ )));
222
+ }
223
+
224
+ let spec = Arc::clone(&self.spec);
225
+ let commit_graph = Arc::clone(&self.commit_graph);
226
+ let query_source = self.query_source.clone();
227
+ let schema = Arc::clone(&self.schema);
228
+ let route = self.route.clone();
229
+ let limit = self.limit;
230
+ let stream_schema = Arc::clone(&schema);
231
+ let fut = async move {
232
+ let rows = load_entity_history_rows(&spec, commit_graph, query_source, &route, limit)
233
+ .await
234
+ .map_err(lix_error_to_datafusion_error)?;
235
+ entity_history_record_batch(&stream_schema, &spec, &rows)
236
+ };
237
+
238
+ Ok(Box::pin(RecordBatchStreamAdapter::new(
239
+ schema,
240
+ stream::once(fut),
241
+ )))
242
+ }
243
+ }
244
+
245
+ #[derive(Debug, Clone)]
246
+ struct EntityHistoryRow {
247
+ change: MaterializedChange,
248
+ observed_commit_id: String,
249
+ commit_created_at: String,
250
+ start_commit_id: String,
251
+ depth: u32,
252
+ }
253
+
254
+ async fn load_entity_history_rows(
255
+ spec: &EntitySurfaceSpec,
256
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
257
+ query_source: SqlCommitStoreQuerySource,
258
+ route: &HistoryRoute,
259
+ limit: Option<usize>,
260
+ ) -> Result<Vec<EntityHistoryRow>, LixError> {
261
+ let history_view_name = format!("{}_history", spec.schema_key);
262
+ let entries = load_history_entries(
263
+ HistoryViewDescriptor {
264
+ view_name: history_view_name.as_str(),
265
+ start_commit_column: HISTORY_COL_START_COMMIT_ID,
266
+ },
267
+ commit_graph,
268
+ query_source.json_reader,
269
+ route,
270
+ vec![spec.schema_key.clone()],
271
+ )
272
+ .await?;
273
+ let mut rows = entries
274
+ .into_iter()
275
+ .map(|entry| EntityHistoryRow {
276
+ change: entry.change,
277
+ observed_commit_id: entry.observed_commit_id,
278
+ commit_created_at: entry.commit_created_at,
279
+ start_commit_id: entry.start_commit_id,
280
+ depth: entry.depth,
281
+ })
282
+ .collect::<Vec<_>>();
283
+ if let Some(limit) = limit {
284
+ rows.truncate(limit);
285
+ }
286
+ Ok(rows)
287
+ }
288
+
289
+ fn entity_history_record_batch(
290
+ schema: &SchemaRef,
291
+ spec: &EntitySurfaceSpec,
292
+ rows: &[EntityHistoryRow],
293
+ ) -> Result<RecordBatch> {
294
+ let columns = schema
295
+ .fields()
296
+ .iter()
297
+ .map(|field| entity_history_column_array(field.name(), spec, rows))
298
+ .collect::<Result<Vec<_>>>()?;
299
+ Ok(RecordBatch::try_new_with_options(
300
+ Arc::clone(schema),
301
+ columns,
302
+ &RecordBatchOptions::new().with_row_count(Some(rows.len())),
303
+ )?)
304
+ }
305
+
306
+ fn entity_history_column_array(
307
+ column_name: &str,
308
+ spec: &EntitySurfaceSpec,
309
+ rows: &[EntityHistoryRow],
310
+ ) -> Result<ArrayRef> {
311
+ if let Some(system_column) = column_name.strip_prefix("lixcol_") {
312
+ return entity_history_system_column_array(system_column, rows);
313
+ }
314
+
315
+ let column_type = spec
316
+ .visible_column(column_name)
317
+ .ok_or_else(|| {
318
+ DataFusionError::Execution(format!(
319
+ "sql2 entity history provider '{}' does not expose column '{}'",
320
+ spec.schema_key, column_name
321
+ ))
322
+ })?
323
+ .column_type;
324
+ let projected_values = rows
325
+ .iter()
326
+ .map(|row| entity_history_column_value(row, spec, column_name))
327
+ .collect::<Result<Vec<_>>>()?;
328
+
329
+ Ok(match column_type {
330
+ EntityColumnType::String | EntityColumnType::Json => Arc::new(StringArray::from(
331
+ projected_values
332
+ .iter()
333
+ .map(|snapshot| entity_json_text_value(snapshot.as_ref(), column_type))
334
+ .collect::<Result<Vec<_>>>()?,
335
+ )) as ArrayRef,
336
+ EntityColumnType::Integer => Arc::new(Int64Array::from(
337
+ projected_values
338
+ .iter()
339
+ .map(|snapshot| entity_i64_value(snapshot.as_ref()))
340
+ .collect::<Vec<_>>(),
341
+ )) as ArrayRef,
342
+ EntityColumnType::Number => Arc::new(Float64Array::from(
343
+ projected_values
344
+ .iter()
345
+ .map(|snapshot| entity_f64_value(snapshot.as_ref()))
346
+ .collect::<Vec<_>>(),
347
+ )) as ArrayRef,
348
+ EntityColumnType::Boolean => Arc::new(BooleanArray::from(
349
+ projected_values
350
+ .iter()
351
+ .map(|snapshot| snapshot.as_ref().and_then(JsonValue::as_bool))
352
+ .collect::<Vec<_>>(),
353
+ )) as ArrayRef,
354
+ })
355
+ }
356
+
357
+ fn entity_history_column_value(
358
+ row: &EntityHistoryRow,
359
+ spec: &EntitySurfaceSpec,
360
+ column_name: &str,
361
+ ) -> Result<Option<JsonValue>> {
362
+ let snapshot = parse_snapshot(row.change.snapshot_content.as_deref())?;
363
+ if let Some(snapshot) = snapshot {
364
+ return Ok(snapshot.get(column_name).cloned());
365
+ }
366
+
367
+ let entity_id = row.change.entity_id.as_json_array_text().map_err(|error| {
368
+ DataFusionError::Execution(format!(
369
+ "sql2 entity history provider failed to project entity id: {error}"
370
+ ))
371
+ })?;
372
+ tombstone_identity_column_value(
373
+ column_name,
374
+ &entity_id,
375
+ HistoryIdentityProjection::PrimaryKeyPaths(&spec.primary_key_paths),
376
+ )
377
+ .map_err(|error| DataFusionError::Execution(error.to_string()))
378
+ }
379
+
380
+ fn entity_history_system_column_array(
381
+ column_name: &str,
382
+ rows: &[EntityHistoryRow],
383
+ ) -> Result<ArrayRef> {
384
+ Ok(match column_name {
385
+ "entity_id" => Arc::new(StringArray::from(
386
+ rows.iter()
387
+ .map(|row| {
388
+ Some(
389
+ row.change
390
+ .entity_id
391
+ .as_json_array_text()
392
+ .expect("canonical change entity identity should project"),
393
+ )
394
+ })
395
+ .collect::<Vec<_>>(),
396
+ )) as ArrayRef,
397
+ "schema_key" => string_array(rows.iter().map(|row| Some(row.change.schema_key.as_str()))),
398
+ "file_id" => string_array(rows.iter().map(|row| row.change.file_id.as_deref())),
399
+ "snapshot_content" => string_array(
400
+ rows.iter()
401
+ .map(|row| row.change.snapshot_content.as_deref()),
402
+ ),
403
+ "metadata" => Arc::new(StringArray::from(
404
+ rows.iter()
405
+ .map(|row| row.change.metadata.as_ref().map(serialize_row_metadata))
406
+ .collect::<Vec<_>>(),
407
+ )) as ArrayRef,
408
+ "change_id" => string_array(rows.iter().map(|row| Some(row.change.id.as_str()))),
409
+ "observed_commit_id" => {
410
+ string_array(rows.iter().map(|row| Some(row.observed_commit_id.as_str())))
411
+ }
412
+ "commit_created_at" => {
413
+ string_array(rows.iter().map(|row| Some(row.commit_created_at.as_str())))
414
+ }
415
+ "start_commit_id" => {
416
+ string_array(rows.iter().map(|row| Some(row.start_commit_id.as_str())))
417
+ }
418
+ "depth" => Arc::new(Int64Array::from(
419
+ rows.iter()
420
+ .map(|row| i64::from(row.depth))
421
+ .collect::<Vec<_>>(),
422
+ )) as ArrayRef,
423
+ other => {
424
+ return Err(DataFusionError::Execution(format!(
425
+ "sql2 entity history provider does not support system column 'lixcol_{other}'"
426
+ )))
427
+ }
428
+ })
429
+ }
430
+
431
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> Result<SchemaRef> {
432
+ let Some(projection) = projection else {
433
+ return Ok(Arc::clone(schema));
434
+ };
435
+ Ok(Arc::new(schema.project(projection)?))
436
+ }
437
+
438
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
439
+ super::error::lix_error_to_datafusion_error(error)
440
+ }