@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,121 @@
1
+ use std::sync::Arc;
2
+
3
+ use serde_json::json;
4
+
5
+ use crate::transaction::types::StageRow;
6
+ use crate::version::VersionRefReader;
7
+ use crate::LixError;
8
+ use crate::GLOBAL_VERSION_ID;
9
+
10
+ use super::context::{SessionContext, SessionMode, WORKSPACE_VERSION_KEY};
11
+
12
+ const KEY_VALUE_SCHEMA_KEY: &str = "lix_key_value";
13
+ const KEY_VALUE_SCHEMA_VERSION: &str = "1";
14
+
15
+ /// Options for switching a session to another version.
16
+ #[derive(Debug, Clone, PartialEq, Eq)]
17
+ pub struct SwitchVersionOptions {
18
+ pub version_id: String,
19
+ }
20
+
21
+ /// Receipt returned after switching to another version.
22
+ #[derive(Debug, Clone, PartialEq, Eq)]
23
+ pub struct SwitchVersionReceipt {
24
+ pub version_id: String,
25
+ }
26
+
27
+ impl SessionContext {
28
+ /// Switches the session's active version selector.
29
+ ///
30
+ /// Pinned sessions switch in memory and return a new pinned session.
31
+ /// Workspace sessions update the shared workspace selector so other
32
+ /// workspace sessions observe the new active version on their next use.
33
+ pub async fn switch_version(
34
+ &self,
35
+ options: SwitchVersionOptions,
36
+ ) -> Result<(SessionContext, SwitchVersionReceipt), LixError> {
37
+ let version_id = options.version_id;
38
+ let receipt_version_id = version_id.clone();
39
+ let current_mode = self.mode.clone();
40
+ let next_mode = self
41
+ .with_write_transaction(|transaction| {
42
+ Box::pin(async move {
43
+ let head = {
44
+ let reader = transaction.version_ref_reader();
45
+ reader.load_head_commit_id(&version_id).await?
46
+ };
47
+ if head.is_none() {
48
+ return Err(LixError::version_not_found(
49
+ version_id.clone(),
50
+ "switch_version",
51
+ "target",
52
+ ));
53
+ }
54
+
55
+ match current_mode {
56
+ SessionMode::Pinned { .. } => Ok(SessionMode::Pinned {
57
+ version_id: version_id.clone(),
58
+ }),
59
+ SessionMode::Workspace => {
60
+ transaction
61
+ .stage_rows(vec![workspace_version_stage_row(&version_id)?])
62
+ .await?;
63
+ Ok(SessionMode::Workspace)
64
+ }
65
+ }
66
+ })
67
+ })
68
+ .await?;
69
+
70
+ let session = SessionContext::new_with_closed(
71
+ next_mode,
72
+ self.storage.clone(),
73
+ Arc::clone(&self.live_state),
74
+ Arc::clone(&self.tracked_state),
75
+ Arc::clone(&self.binary_cas),
76
+ Arc::clone(&self.changelog),
77
+ Arc::clone(&self.version_ctx),
78
+ Arc::clone(&self.schema_registry),
79
+ self.closed_flag(),
80
+ );
81
+ Ok((
82
+ session,
83
+ SwitchVersionReceipt {
84
+ version_id: receipt_version_id,
85
+ },
86
+ ))
87
+ }
88
+ }
89
+
90
+ fn workspace_version_stage_row(version_id: &str) -> Result<StageRow, LixError> {
91
+ Ok(StageRow {
92
+ entity_id: Some(crate::entity_identity::EntityIdentity::single(
93
+ WORKSPACE_VERSION_KEY,
94
+ )),
95
+ schema_key: KEY_VALUE_SCHEMA_KEY.to_string(),
96
+ file_id: None,
97
+ snapshot_content: Some(encode_snapshot(json!({
98
+ "key": WORKSPACE_VERSION_KEY,
99
+ "value": version_id,
100
+ }))?),
101
+ metadata: None,
102
+ origin: None,
103
+ schema_version: KEY_VALUE_SCHEMA_VERSION.to_string(),
104
+ created_at: None,
105
+ updated_at: None,
106
+ global: true,
107
+ change_id: None,
108
+ commit_id: None,
109
+ untracked: true,
110
+ version_id: GLOBAL_VERSION_ID.to_string(),
111
+ })
112
+ }
113
+
114
+ fn encode_snapshot(value: serde_json::Value) -> Result<String, LixError> {
115
+ serde_json::to_string(&value).map_err(|error| {
116
+ LixError::new(
117
+ "LIX_ERROR_UNKNOWN",
118
+ format!("engine2 switch_version snapshot serialization failed: {error}"),
119
+ )
120
+ })
121
+ }
@@ -0,0 +1,337 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use async_trait::async_trait;
5
+ use datafusion::arrow::array::{ArrayRef, StringArray};
6
+ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
7
+ use datafusion::arrow::record_batch::RecordBatch;
8
+ use datafusion::catalog::{Session, TableProvider};
9
+ use datafusion::common::{DataFusionError, Result};
10
+ use datafusion::datasource::TableType;
11
+ use datafusion::execution::TaskContext;
12
+ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
13
+ use datafusion::physical_expr::EquivalenceProperties;
14
+ use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
15
+ use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
16
+ use datafusion::physical_plan::{
17
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
18
+ };
19
+ use futures_util::stream;
20
+
21
+ use crate::changelog::{materialize_change, ChangelogScanRequest, MaterializedCanonicalChange};
22
+ use crate::serialize_row_metadata;
23
+ use crate::LixError;
24
+
25
+ use super::record_batch::record_batch_with_row_count;
26
+ use super::result_metadata::json_field;
27
+ use super::SqlChangelogQuerySource;
28
+
29
+ pub(crate) async fn register_lix_change_provider(
30
+ session: &datafusion::prelude::SessionContext,
31
+ query_source: SqlChangelogQuerySource,
32
+ ) -> Result<(), LixError> {
33
+ session
34
+ .register_table("lix_change", Arc::new(LixChangeProvider::new(query_source)))
35
+ .map_err(datafusion_error_to_lix_error)?;
36
+ Ok(())
37
+ }
38
+
39
+ struct LixChangeProvider {
40
+ schema: SchemaRef,
41
+ query_source: SqlChangelogQuerySource,
42
+ }
43
+
44
+ impl std::fmt::Debug for LixChangeProvider {
45
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46
+ f.debug_struct("LixChangeProvider").finish()
47
+ }
48
+ }
49
+
50
+ impl LixChangeProvider {
51
+ fn new(query_source: SqlChangelogQuerySource) -> Self {
52
+ Self {
53
+ schema: lix_change_schema(),
54
+ query_source,
55
+ }
56
+ }
57
+ }
58
+
59
+ #[async_trait]
60
+ impl TableProvider for LixChangeProvider {
61
+ fn as_any(&self) -> &dyn Any {
62
+ self
63
+ }
64
+
65
+ fn schema(&self) -> SchemaRef {
66
+ Arc::clone(&self.schema)
67
+ }
68
+
69
+ fn table_type(&self) -> TableType {
70
+ TableType::Base
71
+ }
72
+
73
+ fn supports_filters_pushdown(
74
+ &self,
75
+ filters: &[&Expr],
76
+ ) -> Result<Vec<TableProviderFilterPushDown>> {
77
+ Ok(filters
78
+ .iter()
79
+ .map(|_| TableProviderFilterPushDown::Unsupported)
80
+ .collect())
81
+ }
82
+
83
+ async fn scan(
84
+ &self,
85
+ _state: &dyn Session,
86
+ projection: Option<&Vec<usize>>,
87
+ _filters: &[Expr],
88
+ limit: Option<usize>,
89
+ ) -> Result<Arc<dyn ExecutionPlan>> {
90
+ Ok(Arc::new(LixChangeScanExec::new(
91
+ self.query_source.clone(),
92
+ projected_schema(&self.schema, projection),
93
+ projection.cloned(),
94
+ limit,
95
+ )))
96
+ }
97
+ }
98
+
99
+ struct LixChangeScanExec {
100
+ query_source: SqlChangelogQuerySource,
101
+ schema: SchemaRef,
102
+ projection: Option<Vec<usize>>,
103
+ limit: Option<usize>,
104
+ properties: Arc<PlanProperties>,
105
+ }
106
+
107
+ impl std::fmt::Debug for LixChangeScanExec {
108
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109
+ f.debug_struct("LixChangeScanExec").finish()
110
+ }
111
+ }
112
+
113
+ impl LixChangeScanExec {
114
+ fn new(
115
+ query_source: SqlChangelogQuerySource,
116
+ schema: SchemaRef,
117
+ projection: Option<Vec<usize>>,
118
+ limit: Option<usize>,
119
+ ) -> Self {
120
+ let properties = PlanProperties::new(
121
+ EquivalenceProperties::new(schema.clone()),
122
+ Partitioning::UnknownPartitioning(1),
123
+ EmissionType::Incremental,
124
+ Boundedness::Bounded,
125
+ );
126
+ Self {
127
+ query_source,
128
+ schema,
129
+ projection,
130
+ limit,
131
+ properties: Arc::new(properties),
132
+ }
133
+ }
134
+ }
135
+
136
+ impl DisplayAs for LixChangeScanExec {
137
+ fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138
+ match t {
139
+ DisplayFormatType::Default | DisplayFormatType::Verbose => {
140
+ write!(f, "LixChangeScanExec")
141
+ }
142
+ DisplayFormatType::TreeRender => write!(f, "LixChangeScanExec"),
143
+ }
144
+ }
145
+ }
146
+
147
+ impl ExecutionPlan for LixChangeScanExec {
148
+ fn name(&self) -> &str {
149
+ "LixChangeScanExec"
150
+ }
151
+
152
+ fn as_any(&self) -> &dyn Any {
153
+ self
154
+ }
155
+
156
+ fn properties(&self) -> &Arc<PlanProperties> {
157
+ &self.properties
158
+ }
159
+
160
+ fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
161
+ Vec::new()
162
+ }
163
+
164
+ fn with_new_children(
165
+ self: Arc<Self>,
166
+ children: Vec<Arc<dyn ExecutionPlan>>,
167
+ ) -> Result<Arc<dyn ExecutionPlan>> {
168
+ if !children.is_empty() {
169
+ return Err(DataFusionError::Execution(
170
+ "LixChangeScanExec does not accept children".to_string(),
171
+ ));
172
+ }
173
+ Ok(self)
174
+ }
175
+
176
+ fn execute(
177
+ &self,
178
+ partition: usize,
179
+ _context: Arc<TaskContext>,
180
+ ) -> Result<SendableRecordBatchStream> {
181
+ if partition != 0 {
182
+ return Err(DataFusionError::Execution(format!(
183
+ "LixChangeScanExec only exposes one partition, got {partition}"
184
+ )));
185
+ }
186
+
187
+ let query_source = self.query_source.clone();
188
+ let projection = change_projection_for_scan(self.projection.as_ref());
189
+ let limit = self.limit;
190
+ let schema = Arc::clone(&self.schema);
191
+ let stream = stream::once(async move {
192
+ let mut json_reader = query_source.json_reader;
193
+ let canonical_changes = query_source
194
+ .changelog_reader
195
+ .scan_changes(&ChangelogScanRequest { limit })
196
+ .await
197
+ .map_err(lix_error_to_datafusion_error)?;
198
+ let mut changes = Vec::with_capacity(canonical_changes.len());
199
+ for change in canonical_changes {
200
+ changes.push(
201
+ materialize_change(&mut json_reader, change)
202
+ .await
203
+ .map_err(lix_error_to_datafusion_error)?,
204
+ );
205
+ }
206
+ change_record_batch(&projection, &changes)
207
+ });
208
+ Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
209
+ }
210
+ }
211
+
212
+ #[derive(Debug, Clone, Copy)]
213
+ enum ChangeColumn {
214
+ Id,
215
+ EntityId,
216
+ SchemaKey,
217
+ SchemaVersion,
218
+ FileId,
219
+ Metadata,
220
+ CreatedAt,
221
+ SnapshotContent,
222
+ }
223
+
224
+ fn lix_change_schema() -> SchemaRef {
225
+ Arc::new(Schema::new(vec![
226
+ Field::new("id", DataType::Utf8, false),
227
+ Field::new("entity_id", DataType::Utf8, false),
228
+ Field::new("schema_key", DataType::Utf8, false),
229
+ Field::new("schema_version", DataType::Utf8, false),
230
+ Field::new("file_id", DataType::Utf8, true),
231
+ json_field("metadata", true),
232
+ Field::new("created_at", DataType::Utf8, false),
233
+ json_field("snapshot_content", true),
234
+ ]))
235
+ }
236
+
237
+ fn change_projection_for_scan(projection: Option<&Vec<usize>>) -> Vec<ChangeColumn> {
238
+ let all_columns = vec![
239
+ ChangeColumn::Id,
240
+ ChangeColumn::EntityId,
241
+ ChangeColumn::SchemaKey,
242
+ ChangeColumn::SchemaVersion,
243
+ ChangeColumn::FileId,
244
+ ChangeColumn::Metadata,
245
+ ChangeColumn::CreatedAt,
246
+ ChangeColumn::SnapshotContent,
247
+ ];
248
+ projection.map_or(all_columns.clone(), |indices| {
249
+ indices
250
+ .iter()
251
+ .filter_map(|index| all_columns.get(*index).copied())
252
+ .collect()
253
+ })
254
+ }
255
+
256
+ fn projected_schema(schema: &SchemaRef, projection: Option<&Vec<usize>>) -> SchemaRef {
257
+ match projection {
258
+ Some(projection) => Arc::new(schema.project(projection).expect("projection is valid")),
259
+ None => Arc::clone(schema),
260
+ }
261
+ }
262
+
263
+ fn change_record_batch(
264
+ projection: &[ChangeColumn],
265
+ changes: &[MaterializedCanonicalChange],
266
+ ) -> Result<RecordBatch> {
267
+ let arrays = projection
268
+ .iter()
269
+ .map(|column| match column {
270
+ ChangeColumn::Id => string_array(changes.iter().map(|row| Some(row.id.as_str()))),
271
+ ChangeColumn::EntityId => Arc::new(StringArray::from(
272
+ changes
273
+ .iter()
274
+ .map(|row| {
275
+ Some(
276
+ row.entity_id
277
+ .as_string()
278
+ .expect("canonical change entity identity should project"),
279
+ )
280
+ })
281
+ .collect::<Vec<_>>(),
282
+ )) as ArrayRef,
283
+ ChangeColumn::SchemaKey => {
284
+ string_array(changes.iter().map(|row| Some(row.schema_key.as_str())))
285
+ }
286
+ ChangeColumn::SchemaVersion => {
287
+ string_array(changes.iter().map(|row| Some(row.schema_version.as_str())))
288
+ }
289
+ ChangeColumn::FileId => string_array(changes.iter().map(|row| row.file_id.as_deref())),
290
+ ChangeColumn::Metadata => Arc::new(StringArray::from(
291
+ changes
292
+ .iter()
293
+ .map(|row| row.metadata.as_ref().map(serialize_row_metadata))
294
+ .collect::<Vec<_>>(),
295
+ )),
296
+ ChangeColumn::CreatedAt => {
297
+ string_array(changes.iter().map(|row| Some(row.created_at.as_str())))
298
+ }
299
+ ChangeColumn::SnapshotContent => {
300
+ string_array(changes.iter().map(|row| row.snapshot_content.as_deref()))
301
+ }
302
+ })
303
+ .collect::<Vec<_>>();
304
+ record_batch_with_row_count(change_schema(projection), arrays, changes.len()).map_err(|error| {
305
+ DataFusionError::Execution(format!("failed to build lix_change batch: {error}"))
306
+ })
307
+ }
308
+
309
+ fn change_schema(projection: &[ChangeColumn]) -> SchemaRef {
310
+ Arc::new(Schema::new(
311
+ projection
312
+ .iter()
313
+ .map(|column| match column {
314
+ ChangeColumn::Id => Field::new("id", DataType::Utf8, false),
315
+ ChangeColumn::EntityId => Field::new("entity_id", DataType::Utf8, false),
316
+ ChangeColumn::SchemaKey => Field::new("schema_key", DataType::Utf8, false),
317
+ ChangeColumn::SchemaVersion => Field::new("schema_version", DataType::Utf8, false),
318
+ ChangeColumn::FileId => Field::new("file_id", DataType::Utf8, true),
319
+ ChangeColumn::Metadata => json_field("metadata", true),
320
+ ChangeColumn::CreatedAt => Field::new("created_at", DataType::Utf8, false),
321
+ ChangeColumn::SnapshotContent => json_field("snapshot_content", true),
322
+ })
323
+ .collect::<Vec<_>>(),
324
+ ))
325
+ }
326
+
327
+ fn string_array<'a>(values: impl Iterator<Item = Option<&'a str>>) -> ArrayRef {
328
+ Arc::new(StringArray::from(values.collect::<Vec<_>>())) as ArrayRef
329
+ }
330
+
331
+ fn datafusion_error_to_lix_error(error: DataFusionError) -> LixError {
332
+ super::error::datafusion_error_to_lix_error(error)
333
+ }
334
+
335
+ fn lix_error_to_datafusion_error(error: LixError) -> DataFusionError {
336
+ super::error::lix_error_to_datafusion_error(error)
337
+ }
@@ -0,0 +1,147 @@
1
+ use datafusion::sql::sqlparser::ast::{
2
+ FromTable, ObjectName, Query, SetExpr, Statement, TableFactor, TableObject, TableWithJoins,
3
+ };
4
+ use datafusion::sql::sqlparser::dialect::GenericDialect;
5
+ use datafusion::sql::sqlparser::parser::Parser;
6
+
7
+ use crate::LixError;
8
+
9
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
10
+ pub(crate) enum SqlStatementKind {
11
+ Read,
12
+ Write,
13
+ Other,
14
+ }
15
+
16
+ pub(crate) fn classify_statement(sql: &str) -> Result<SqlStatementKind, LixError> {
17
+ let statements = parse_sql_statements(sql)?;
18
+ let [statement] = statements.as_slice() else {
19
+ return Ok(SqlStatementKind::Other);
20
+ };
21
+ Ok(classify_ast_statement(statement))
22
+ }
23
+
24
+ pub(crate) fn validate_supported_statement_ast(sql: &str) -> Result<(), LixError> {
25
+ let statements = parse_sql_statements(sql)?;
26
+ let [statement] = statements.as_slice() else {
27
+ return Err(unsupported_sql_error(
28
+ "Lix SQL only supports one statement per execute() call",
29
+ ));
30
+ };
31
+ validate_supported_ast_statement(statement)
32
+ }
33
+
34
+ pub(crate) fn dml_target_table_names(sql: &str) -> Result<Vec<String>, LixError> {
35
+ let statements = parse_sql_statements(sql)?;
36
+ let [statement] = statements.as_slice() else {
37
+ return Ok(Vec::new());
38
+ };
39
+ let mut targets = Vec::new();
40
+ collect_dml_target_table_names(statement, &mut targets);
41
+ Ok(targets)
42
+ }
43
+
44
+ fn parse_sql_statements(sql: &str) -> Result<Vec<Statement>, LixError> {
45
+ Parser::parse_sql(&GenericDialect {}, sql).map_err(|error| {
46
+ LixError::new(
47
+ LixError::CODE_PARSE_ERROR,
48
+ format!("sql2 SQL parse error: {error}"),
49
+ )
50
+ })
51
+ }
52
+
53
+ fn collect_dml_target_table_names(statement: &Statement, targets: &mut Vec<String>) {
54
+ match statement {
55
+ Statement::Insert(insert) => {
56
+ if let TableObject::TableName(name) = &insert.table {
57
+ if let Some(table_name) = object_name_table_part(name) {
58
+ targets.push(table_name);
59
+ }
60
+ }
61
+ }
62
+ Statement::Update(update) => {
63
+ collect_table_with_joins_target(&update.table, targets);
64
+ }
65
+ Statement::Delete(delete) => {
66
+ let tables = match &delete.from {
67
+ FromTable::WithFromKeyword(tables) | FromTable::WithoutKeyword(tables) => tables,
68
+ };
69
+ for table in tables {
70
+ collect_table_with_joins_target(table, targets);
71
+ }
72
+ }
73
+ Statement::Explain { statement, .. } => {
74
+ collect_dml_target_table_names(statement.as_ref(), targets);
75
+ }
76
+ _ => {}
77
+ }
78
+ }
79
+
80
+ fn collect_table_with_joins_target(table: &TableWithJoins, targets: &mut Vec<String>) {
81
+ if let TableFactor::Table { name, .. } = &table.relation {
82
+ if let Some(table_name) = object_name_table_part(name) {
83
+ targets.push(table_name);
84
+ }
85
+ }
86
+ }
87
+
88
+ fn object_name_table_part(name: &ObjectName) -> Option<String> {
89
+ name.0
90
+ .last()
91
+ .and_then(|part| part.as_ident())
92
+ .map(|ident| ident.value.clone())
93
+ }
94
+
95
+ fn classify_ast_statement(statement: &Statement) -> SqlStatementKind {
96
+ match statement {
97
+ Statement::Insert(_) | Statement::Update(_) | Statement::Delete(_) => {
98
+ SqlStatementKind::Write
99
+ }
100
+ Statement::Query(_) => SqlStatementKind::Read,
101
+ Statement::Explain { statement, .. } => classify_ast_statement(statement.as_ref()),
102
+ _ => SqlStatementKind::Other,
103
+ }
104
+ }
105
+
106
+ fn validate_supported_ast_statement(statement: &Statement) -> Result<(), LixError> {
107
+ match statement {
108
+ Statement::Query(query) => validate_supported_query(query),
109
+ Statement::Insert(_) | Statement::Update(_) | Statement::Delete(_) => Ok(()),
110
+ Statement::Explain { statement, .. } => validate_supported_ast_statement(statement),
111
+ _ => Err(unsupported_sql_error(format!(
112
+ "SQL statement is not supported by Lix SQL: {statement}"
113
+ ))),
114
+ }
115
+ }
116
+
117
+ fn validate_supported_query(query: &Query) -> Result<(), LixError> {
118
+ if query.with.as_ref().is_some_and(|with| with.recursive) {
119
+ return Err(
120
+ unsupported_sql_error("recursive CTEs are not supported by Lix SQL").with_hint(
121
+ "Use explicit commit graph surfaces such as lix_commit, lix_commit_edge, and lix_state_history instead of WITH RECURSIVE.",
122
+ ),
123
+ );
124
+ }
125
+
126
+ if let Some(with) = &query.with {
127
+ for cte in &with.cte_tables {
128
+ validate_supported_query(&cte.query)?;
129
+ }
130
+ }
131
+ validate_supported_set_expr(&query.body)
132
+ }
133
+
134
+ fn validate_supported_set_expr(expr: &SetExpr) -> Result<(), LixError> {
135
+ match expr {
136
+ SetExpr::Query(query) => validate_supported_query(query),
137
+ SetExpr::SetOperation { left, right, .. } => {
138
+ validate_supported_set_expr(left)?;
139
+ validate_supported_set_expr(right)
140
+ }
141
+ _ => Ok(()),
142
+ }
143
+ }
144
+
145
+ fn unsupported_sql_error(message: impl Into<String>) -> LixError {
146
+ LixError::new(LixError::CODE_UNSUPPORTED_SQL, message)
147
+ }