@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,109 @@
1
+ use std::sync::Arc;
2
+
3
+ use serde_json::json;
4
+
5
+ use crate::transaction::types::{TransactionJson, TransactionWriteRow};
6
+ use crate::version::{VersionLifecycle, VersionOperation, VersionReferenceRole};
7
+ use crate::LixError;
8
+ use crate::GLOBAL_VERSION_ID;
9
+
10
+ use super::context::{SessionContext, SessionMode, WORKSPACE_VERSION_KEY};
11
+
12
+ const KEY_VALUE_SCHEMA_KEY: &str = "lix_key_value";
13
+
14
+ /// Options for switching a session to another version.
15
+ #[derive(Debug, Clone, PartialEq, Eq)]
16
+ pub struct SwitchVersionOptions {
17
+ pub version_id: String,
18
+ }
19
+
20
+ /// Receipt returned after switching to another version.
21
+ #[derive(Debug, Clone, PartialEq, Eq)]
22
+ pub struct SwitchVersionReceipt {
23
+ pub version_id: String,
24
+ }
25
+
26
+ impl SessionContext {
27
+ /// Switches the session's active version selector.
28
+ ///
29
+ /// Pinned sessions switch in memory and return a new pinned session.
30
+ /// Workspace sessions update the shared workspace selector so other
31
+ /// workspace sessions observe the new active version on their next use.
32
+ pub async fn switch_version(
33
+ &self,
34
+ options: SwitchVersionOptions,
35
+ ) -> Result<(SessionContext, SwitchVersionReceipt), LixError> {
36
+ let version_id = options.version_id;
37
+ let receipt_version_id = version_id.clone();
38
+ let current_mode = self.mode.clone();
39
+ let next_mode = self
40
+ .with_write_transaction(|transaction| {
41
+ Box::pin(async move {
42
+ {
43
+ let reader = transaction.version_ref_reader();
44
+ VersionLifecycle::new(&reader)
45
+ .require_existing_commit_id(
46
+ &version_id,
47
+ VersionOperation::SwitchVersion,
48
+ VersionReferenceRole::Target,
49
+ )
50
+ .await?
51
+ };
52
+
53
+ match current_mode {
54
+ SessionMode::Pinned { .. } => Ok(SessionMode::Pinned {
55
+ version_id: version_id.clone(),
56
+ }),
57
+ SessionMode::Workspace => {
58
+ transaction
59
+ .stage_rows(vec![workspace_version_stage_row(&version_id)?])
60
+ .await?;
61
+ Ok(SessionMode::Workspace)
62
+ }
63
+ }
64
+ })
65
+ })
66
+ .await?;
67
+
68
+ let session = SessionContext::new_with_closed(
69
+ next_mode,
70
+ self.storage.clone(),
71
+ Arc::clone(&self.live_state),
72
+ Arc::clone(&self.tracked_state),
73
+ Arc::clone(&self.binary_cas),
74
+ Arc::clone(&self.commit_store),
75
+ Arc::clone(&self.version_ctx),
76
+ Arc::clone(&self.catalog_context),
77
+ self.closed_flag(),
78
+ );
79
+ Ok((
80
+ session,
81
+ SwitchVersionReceipt {
82
+ version_id: receipt_version_id,
83
+ },
84
+ ))
85
+ }
86
+ }
87
+
88
+ fn workspace_version_stage_row(version_id: &str) -> Result<TransactionWriteRow, LixError> {
89
+ Ok(TransactionWriteRow {
90
+ entity_id: Some(crate::entity_identity::EntityIdentity::single(
91
+ WORKSPACE_VERSION_KEY,
92
+ )),
93
+ schema_key: KEY_VALUE_SCHEMA_KEY.to_string(),
94
+ file_id: None,
95
+ snapshot: Some(TransactionJson::from_value_unchecked(json!({
96
+ "key": WORKSPACE_VERSION_KEY,
97
+ "value": version_id,
98
+ }))),
99
+ metadata: None,
100
+ origin: None,
101
+ created_at: None,
102
+ updated_at: None,
103
+ global: true,
104
+ change_id: None,
105
+ commit_id: None,
106
+ untracked: true,
107
+ version_id: GLOBAL_VERSION_ID.to_string(),
108
+ })
109
+ }
@@ -0,0 +1,331 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use async_trait::async_trait;
5
+ use datafusion::arrow::array::{ArrayRef, StringArray};
6
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
7
+ use datafusion::arrow::record_batch::RecordBatch;
8
+ use datafusion::catalog::{Session, TableProvider};
9
+ use datafusion::common::{DataFusionError, Result};
10
+ use datafusion::datasource::TableType;
11
+ use datafusion::execution::TaskContext;
12
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
13
+ use datafusion::physical_expr::EquivalenceProperties;
14
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
15
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
16
+ use datafusion::physical_plan::{
17
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
18
+ };
19
+ use futures_util::stream;
20
+
21
+ use crate::commit_store::ChangeScanRequest;
22
+ use crate::serialize_row_metadata;
23
+ use crate::LixError;
24
+
25
+ use super::record_batch::record_batch_with_row_count;
26
+ use super::result_metadata::json_field;
27
+ use super::SqlCommitStoreQuerySource;
28
+ use crate::commit_store::{materialize_change, MaterializedChange};
29
+
30
+ pub(crate) async fn register_lix_change_provider(
31
+ session: &datafusion::prelude::SessionContext,
32
+ query_source: SqlCommitStoreQuerySource,
33
+ ) -> Result<(), LixError> {
34
+ session
35
+ .register_table("lix_change", Arc::new(LixChangeProvider::new(query_source)))
36
+ .map_err(datafusion_error_to_lix_error)?;
37
+ Ok(())
38
+ }
39
+
40
+ struct LixChangeProvider {
41
+ schema: SchemaRef,
42
+ query_source: SqlCommitStoreQuerySource,
43
+ }
44
+
45
+ impl std::fmt::Debug for LixChangeProvider {
46
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47
+ f.debug_struct("LixChangeProvider").finish()
48
+ }
49
+ }
50
+
51
+ impl LixChangeProvider {
52
+ fn new(query_source: SqlCommitStoreQuerySource) -> Self {
53
+ Self {
54
+ schema: lix_change_schema(),
55
+ query_source,
56
+ }
57
+ }
58
+ }
59
+
60
+ #[async_trait]
61
+ impl TableProvider for LixChangeProvider {
62
+ fn as_any(&self) -> &dyn Any {
63
+ self
64
+ }
65
+
66
+ fn schema(&self) -> SchemaRef {
67
+ Arc::clone(&self.schema)
68
+ }
69
+
70
+ fn table_type(&self) -> TableType {
71
+ TableType::Base
72
+ }
73
+
74
+ fn supports_filters_pushdown(
75
+ &self,
76
+ filters: &[&Expr],
77
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
78
+ Ok(filters
79
+ .iter()
80
+ .map(|_| TableProviderFilterPushDown::Unsupported)
81
+ .collect())
82
+ }
83
+
84
+ async fn scan(
85
+ &self,
86
+ _state: &dyn Session,
87
+ projection: Option<&Vec<usize>>,
88
+ _filters: &[Expr],
89
+ limit: Option<usize>,
90
+ ) -> Result<Arc<dyn ExecutionPlan>> {
91
+ Ok(Arc::new(LixChangeScanExec::new(
92
+ self.query_source.clone(),
93
+ projected_schema(&self.schema, projection),
94
+ projection.cloned(),
95
+ limit,
96
+ )))
97
+ }
98
+ }
99
+
100
+ struct LixChangeScanExec {
101
+ query_source: SqlCommitStoreQuerySource,
102
+ schema: SchemaRef,
103
+ projection: Option<Vec<usize>>,
104
+ limit: Option<usize>,
105
+ properties: Arc<PlanProperties>,
106
+ }
107
+
108
+ impl std::fmt::Debug for LixChangeScanExec {
109
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110
+ f.debug_struct("LixChangeScanExec").finish()
111
+ }
112
+ }
113
+
114
+ impl LixChangeScanExec {
115
+ fn new(
116
+ query_source: SqlCommitStoreQuerySource,
117
+ schema: SchemaRef,
118
+ projection: Option<Vec<usize>>,
119
+ limit: Option<usize>,
120
+ ) -> Self {
121
+ let properties = PlanProperties::new(
122
+ EquivalenceProperties::new(schema.clone()),
123
+ Partitioning::UnknownPartitioning(1),
124
+ EmissionType::Incremental,
125
+ Boundedness::Bounded,
126
+ );
127
+ Self {
128
+ query_source,
129
+ schema,
130
+ projection,
131
+ limit,
132
+ properties: Arc::new(properties),
133
+ }
134
+ }
135
+ }
136
+
137
+ impl DisplayAs for LixChangeScanExec {
138
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139
+ match t {
140
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
141
+ write!(f, "LixChangeScanExec")
142
+ }
143
+ DisplayFormatType::TreeRender => write!(f, "LixChangeScanExec"),
144
+ }
145
+ }
146
+ }
147
+
148
+ impl ExecutionPlan for LixChangeScanExec {
149
+ fn name(&self) -> &str {
150
+ "LixChangeScanExec"
151
+ }
152
+
153
+ fn as_any(&self) -> &dyn Any {
154
+ self
155
+ }
156
+
157
+ fn properties(&self) -> &Arc<PlanProperties> {
158
+ &self.properties
159
+ }
160
+
161
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
162
+ Vec::new()
163
+ }
164
+
165
+ fn with_new_children(
166
+ self: Arc<Self>,
167
+ children: Vec<Arc<dyn ExecutionPlan>>,
168
+ ) -> Result<Arc<dyn ExecutionPlan>> {
169
+ if !children.is_empty() {
170
+ return Err(DataFusionError::Execution(
171
+ "LixChangeScanExec does not accept children".to_string(),
172
+ ));
173
+ }
174
+ Ok(self)
175
+ }
176
+
177
+ fn execute(
178
+ &self,
179
+ partition: usize,
180
+ _context: Arc<TaskContext>,
181
+ ) -> Result<SendableRecordBatchStream> {
182
+ if partition != 0 {
183
+ return Err(DataFusionError::Execution(format!(
184
+ "LixChangeScanExec only exposes one partition, got {partition}"
185
+ )));
186
+ }
187
+
188
+ let query_source = self.query_source.clone();
189
+ let projection = change_projection_for_scan(self.projection.as_ref());
190
+ let limit = self.limit;
191
+ let schema = Arc::clone(&self.schema);
192
+ let stream = stream::once(async move {
193
+ let mut json_reader = query_source.json_reader;
194
+ let canonical_changes = query_source
195
+ .commit_store_reader
196
+ .scan_changes(&ChangeScanRequest { limit })
197
+ .await
198
+ .map_err(lix_error_to_datafusion_error)?;
199
+ let mut changes = Vec::with_capacity(canonical_changes.len());
200
+ for change in canonical_changes {
201
+ changes.push(
202
+ materialize_change(&mut json_reader, change)
203
+ .await
204
+ .map_err(lix_error_to_datafusion_error)?,
205
+ );
206
+ }
207
+ change_record_batch(&projection, &changes)
208
+ });
209
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
210
+ }
211
+ }
212
+
213
+ #[derive(Debug, Clone, Copy)]
214
+ enum ChangeColumn {
215
+ Id,
216
+ EntityId,
217
+ SchemaKey,
218
+ FileId,
219
+ Metadata,
220
+ CreatedAt,
221
+ SnapshotContent,
222
+ }
223
+
224
+ fn lix_change_schema() -> SchemaRef {
225
+ Arc::new(Schema::new(vec![
226
+ Field::new("id", DataType::Utf8, false),
227
+ json_field("entity_id", false),
228
+ Field::new("schema_key", DataType::Utf8, false),
229
+ Field::new("file_id", DataType::Utf8, true),
230
+ json_field("metadata", true),
231
+ Field::new("created_at", DataType::Utf8, false),
232
+ json_field("snapshot_content", true),
233
+ ]))
234
+ }
235
+
236
+ fn change_projection_for_scan(projection: Option<&Vec<usize>>) -> Vec<ChangeColumn> {
237
+ let all_columns = vec![
238
+ ChangeColumn::Id,
239
+ ChangeColumn::EntityId,
240
+ ChangeColumn::SchemaKey,
241
+ ChangeColumn::FileId,
242
+ ChangeColumn::Metadata,
243
+ ChangeColumn::CreatedAt,
244
+ ChangeColumn::SnapshotContent,
245
+ ];
246
+ projection.map_or(all_columns.clone(), |indices| {
247
+ indices
248
+ .iter()
249
+ .filter_map(|index| all_columns.get(*index).copied())
250
+ .collect()
251
+ })
252
+ }
253
+
254
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> SchemaRef {
255
+ match projection {
256
+ Some(projection) => Arc::new(schema.project(projection).expect("projection is valid")),
257
+ None => Arc::clone(schema),
258
+ }
259
+ }
260
+
261
+ fn change_record_batch(
262
+ projection: &[ChangeColumn],
263
+ changes: &[MaterializedChange],
264
+ ) -> Result<RecordBatch> {
265
+ let arrays = projection
266
+ .iter()
267
+ .map(|column| match column {
268
+ ChangeColumn::Id => string_array(changes.iter().map(|row| Some(row.id.as_str()))),
269
+ ChangeColumn::EntityId => Arc::new(StringArray::from(
270
+ changes
271
+ .iter()
272
+ .map(|row| {
273
+ Some(
274
+ row.entity_id
275
+ .as_json_array_text()
276
+ .expect("canonical change entity identity should project"),
277
+ )
278
+ })
279
+ .collect::<Vec<_>>(),
280
+ )) as ArrayRef,
281
+ ChangeColumn::SchemaKey => {
282
+ string_array(changes.iter().map(|row| Some(row.schema_key.as_str())))
283
+ }
284
+ ChangeColumn::FileId => string_array(changes.iter().map(|row| row.file_id.as_deref())),
285
+ ChangeColumn::Metadata => Arc::new(StringArray::from(
286
+ changes
287
+ .iter()
288
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
289
+ .collect::<Vec<_>>(),
290
+ )),
291
+ ChangeColumn::CreatedAt => {
292
+ string_array(changes.iter().map(|row| Some(row.created_at.as_str())))
293
+ }
294
+ ChangeColumn::SnapshotContent => {
295
+ string_array(changes.iter().map(|row| row.snapshot_content.as_deref()))
296
+ }
297
+ })
298
+ .collect::<Vec<_>>();
299
+ record_batch_with_row_count(change_schema(projection), arrays, changes.len()).map_err(|error| {
300
+ DataFusionError::Execution(format!("failed to build lix_change batch: {error}"))
301
+ })
302
+ }
303
+
304
+ fn change_schema(projection: &[ChangeColumn]) -> SchemaRef {
305
+ Arc::new(Schema::new(
306
+ projection
307
+ .iter()
308
+ .map(|column| match column {
309
+ ChangeColumn::Id => Field::new("id", DataType::Utf8, false),
310
+ ChangeColumn::EntityId => json_field("entity_id", false),
311
+ ChangeColumn::SchemaKey => Field::new("schema_key", DataType::Utf8, false),
312
+ ChangeColumn::FileId => Field::new("file_id", DataType::Utf8, true),
313
+ ChangeColumn::Metadata => json_field("metadata", true),
314
+ ChangeColumn::CreatedAt => Field::new("created_at", DataType::Utf8, false),
315
+ ChangeColumn::SnapshotContent => json_field("snapshot_content", true),
316
+ })
317
+ .collect::<Vec<_>>(),
318
+ ))
319
+ }
320
+
321
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
322
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
323
+ }
324
+
325
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
326
+ super::error::datafusion_error_to_lix_error(error)
327
+ }
328
+
329
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
330
+ super::error::lix_error_to_datafusion_error(error)
331
+ }
@@ -0,0 +1,182 @@
1
+ use datafusion::sql::parser::Statement as DataFusionStatement;
2
+ use datafusion::sql::sqlparser::ast::{
3
+ FromTable, ObjectName, Query, SetExpr, Statement as SqlStatement, TableFactor, TableObject,
4
+ TableWithJoins,
5
+ };
6
+ use datafusion::sql::sqlparser::dialect::GenericDialect;
7
+ use datafusion::sql::sqlparser::parser::Parser;
8
+
9
+ use crate::LixError;
10
+
11
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
12
+ pub(crate) enum SqlStatementKind {
13
+ Read,
14
+ Write,
15
+ Other,
16
+ }
17
+
18
+ pub(crate) fn classify_statement(sql: &str) -> Result<SqlStatementKind, LixError> {
19
+ let statements = parse_sql_statements(sql)?;
20
+ let [statement] = statements.as_slice() else {
21
+ return Ok(SqlStatementKind::Other);
22
+ };
23
+ Ok(classify_ast_statement(statement))
24
+ }
25
+
26
+ pub(crate) fn validate_supported_statement_ast(sql: &str) -> Result<(), LixError> {
27
+ let statements = parse_sql_statements(sql)?;
28
+ let [statement] = statements.as_slice() else {
29
+ return Err(unsupported_sql_error(
30
+ "Lix SQL only supports one statement per execute() call",
31
+ ));
32
+ };
33
+ validate_supported_ast_statement(statement)
34
+ }
35
+
36
+ pub(crate) fn validate_supported_datafusion_statement_ast(
37
+ statement: &DataFusionStatement,
38
+ ) -> Result<(), LixError> {
39
+ match statement {
40
+ DataFusionStatement::Statement(statement) => validate_supported_ast_statement(statement),
41
+ DataFusionStatement::Explain(explain) => {
42
+ validate_supported_datafusion_statement_ast(explain.statement.as_ref())
43
+ }
44
+ _ => Err(unsupported_sql_error(format!(
45
+ "SQL statement is not supported by Lix SQL: {statement}"
46
+ ))),
47
+ }
48
+ }
49
+
50
+ pub(crate) fn datafusion_statement_dml_target_table_names(
51
+ statement: &DataFusionStatement,
52
+ ) -> Vec<String> {
53
+ let mut targets = Vec::new();
54
+ collect_datafusion_statement_dml_target_table_names(statement, &mut targets);
55
+ targets
56
+ }
57
+
58
+ fn parse_sql_statements(sql: &str) -> Result<Vec<SqlStatement>, LixError> {
59
+ Parser::parse_sql(&GenericDialect {}, sql).map_err(|error| {
60
+ LixError::new(
61
+ LixError::CODE_PARSE_ERROR,
62
+ format!("sql2 SQL parse error: {error}"),
63
+ )
64
+ })
65
+ }
66
+
67
+ fn collect_datafusion_statement_dml_target_table_names(
68
+ statement: &DataFusionStatement,
69
+ targets: &mut Vec<String>,
70
+ ) {
71
+ match statement {
72
+ DataFusionStatement::Statement(statement) => {
73
+ collect_dml_target_table_names(statement, targets);
74
+ }
75
+ DataFusionStatement::Explain(explain) => {
76
+ collect_datafusion_statement_dml_target_table_names(
77
+ explain.statement.as_ref(),
78
+ targets,
79
+ );
80
+ }
81
+ _ => {}
82
+ }
83
+ }
84
+
85
+ fn collect_dml_target_table_names(statement: &SqlStatement, targets: &mut Vec<String>) {
86
+ match statement {
87
+ SqlStatement::Insert(insert) => {
88
+ if let TableObject::TableName(name) = &insert.table {
89
+ if let Some(table_name) = object_name_table_part(name) {
90
+ targets.push(table_name);
91
+ }
92
+ }
93
+ }
94
+ SqlStatement::Update(update) => {
95
+ collect_table_with_joins_target(&update.table, targets);
96
+ }
97
+ SqlStatement::Delete(delete) => {
98
+ let tables = match &delete.from {
99
+ FromTable::WithFromKeyword(tables) | FromTable::WithoutKeyword(tables) => tables,
100
+ };
101
+ for table in tables {
102
+ collect_table_with_joins_target(table, targets);
103
+ }
104
+ }
105
+ SqlStatement::Explain { statement, .. } => {
106
+ collect_dml_target_table_names(statement.as_ref(), targets);
107
+ }
108
+ _ => {}
109
+ }
110
+ }
111
+
112
+ fn collect_table_with_joins_target(table: &TableWithJoins, targets: &mut Vec<String>) {
113
+ if let TableFactor::Table { name, .. } = &table.relation {
114
+ if let Some(table_name) = object_name_table_part(name) {
115
+ targets.push(table_name);
116
+ }
117
+ }
118
+ }
119
+
120
+ fn object_name_table_part(name: &ObjectName) -> Option<String> {
121
+ name.0.last().and_then(|part| part.as_ident()).map(|ident| {
122
+ if ident.quote_style.is_some() {
123
+ ident.value.clone()
124
+ } else {
125
+ ident.value.to_ascii_lowercase()
126
+ }
127
+ })
128
+ }
129
+
130
+ fn classify_ast_statement(statement: &SqlStatement) -> SqlStatementKind {
131
+ match statement {
132
+ SqlStatement::Insert(_) | SqlStatement::Update(_) | SqlStatement::Delete(_) => {
133
+ SqlStatementKind::Write
134
+ }
135
+ SqlStatement::Query(_) => SqlStatementKind::Read,
136
+ SqlStatement::Explain { statement, .. } => classify_ast_statement(statement.as_ref()),
137
+ _ => SqlStatementKind::Other,
138
+ }
139
+ }
140
+
141
+ fn validate_supported_ast_statement(statement: &SqlStatement) -> Result<(), LixError> {
142
+ match statement {
143
+ SqlStatement::Query(query) => validate_supported_query(query),
144
+ SqlStatement::Insert(_) | SqlStatement::Update(_) | SqlStatement::Delete(_) => Ok(()),
145
+ SqlStatement::Explain { statement, .. } => validate_supported_ast_statement(statement),
146
+ _ => Err(unsupported_sql_error(format!(
147
+ "SQL statement is not supported by Lix SQL: {statement}"
148
+ ))),
149
+ }
150
+ }
151
+
152
+ fn validate_supported_query(query: &Query) -> Result<(), LixError> {
153
+ if query.with.as_ref().is_some_and(|with| with.recursive) {
154
+ return Err(
155
+ unsupported_sql_error("recursive CTEs are not supported by Lix SQL").with_hint(
156
+ "Use explicit commit graph surfaces such as lix_commit, lix_commit_edge, and lix_state_history instead of WITH RECURSIVE.",
157
+ ),
158
+ );
159
+ }
160
+
161
+ if let Some(with) = &query.with {
162
+ for cte in &with.cte_tables {
163
+ validate_supported_query(&cte.query)?;
164
+ }
165
+ }
166
+ validate_supported_set_expr(&query.body)
167
+ }
168
+
169
+ fn validate_supported_set_expr(expr: &SetExpr) -> Result<(), LixError> {
170
+ match expr {
171
+ SetExpr::Query(query) => validate_supported_query(query),
172
+ SetExpr::SetOperation { left, right, .. } => {
173
+ validate_supported_set_expr(left)?;
174
+ validate_supported_set_expr(right)
175
+ }
176
+ _ => Ok(()),
177
+ }
178
+ }
179
+
180
+ fn unsupported_sql_error(message: impl Into<String>) -> LixError {
181
+ LixError::new(LixError::CODE_UNSUPPORTED_SQL, message)
182
+ }