@lix-js/sdk 0.6.0-preview.0 → 0.6.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -0
  2. package/SKILL.md +468 -0
  3. package/dist/engine-wasm/index.d.ts +15 -11
  4. package/dist/engine-wasm/index.js +105 -38
  5. package/dist/engine-wasm/wasm/lix_engine.d.ts +14 -2
  6. package/dist/engine-wasm/wasm/lix_engine.js +18 -17
  7. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  8. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +2 -1
  9. package/dist/generated/builtin-schemas.d.ts +31 -41
  10. package/dist/generated/builtin-schemas.js +52 -56
  11. package/dist/open-lix.d.ts +141 -24
  12. package/dist/open-lix.js +199 -35
  13. package/dist/sqlite/index.js +99 -22
  14. package/dist-engine-src/README.md +18 -0
  15. package/dist-engine-src/src/backend/kv.rs +358 -0
  16. package/dist-engine-src/src/backend/mod.rs +12 -0
  17. package/dist-engine-src/src/backend/testing.rs +658 -0
  18. package/dist-engine-src/src/backend/types.rs +96 -0
  19. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  20. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  21. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  22. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  23. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  24. package/dist-engine-src/src/binary_cas/types.rs +127 -0
  25. package/dist-engine-src/src/cel/context.rs +86 -0
  26. package/dist-engine-src/src/cel/error.rs +19 -0
  27. package/dist-engine-src/src/cel/mod.rs +8 -0
  28. package/dist-engine-src/src/cel/provider.rs +9 -0
  29. package/dist-engine-src/src/cel/runtime.rs +167 -0
  30. package/dist-engine-src/src/cel/value.rs +50 -0
  31. package/dist-engine-src/src/changelog/codec.rs +321 -0
  32. package/dist-engine-src/src/changelog/context.rs +92 -0
  33. package/dist-engine-src/src/changelog/materialization.rs +121 -0
  34. package/dist-engine-src/src/changelog/mod.rs +13 -0
  35. package/dist-engine-src/src/changelog/reader.rs +20 -0
  36. package/dist-engine-src/src/changelog/storage.rs +220 -0
  37. package/dist-engine-src/src/changelog/types.rs +38 -0
  38. package/dist-engine-src/src/commit_graph/context.rs +1588 -0
  39. package/dist-engine-src/src/commit_graph/mod.rs +12 -0
  40. package/dist-engine-src/src/commit_graph/types.rs +145 -0
  41. package/dist-engine-src/src/commit_graph/walker.rs +780 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +135 -0
  46. package/dist-engine-src/src/common/metadata.rs +35 -0
  47. package/dist-engine-src/src/common/mod.rs +23 -0
  48. package/dist-engine-src/src/common/types.rs +105 -0
  49. package/dist-engine-src/src/common/wire.rs +222 -0
  50. package/dist-engine-src/src/engine.rs +239 -0
  51. package/dist-engine-src/src/entity_identity.rs +285 -0
  52. package/dist-engine-src/src/functions/context.rs +327 -0
  53. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  54. package/dist-engine-src/src/functions/mod.rs +18 -0
  55. package/dist-engine-src/src/functions/provider.rs +130 -0
  56. package/dist-engine-src/src/functions/state.rs +363 -0
  57. package/dist-engine-src/src/functions/types.rs +37 -0
  58. package/dist-engine-src/src/init.rs +505 -0
  59. package/dist-engine-src/src/json_store/compression.rs +77 -0
  60. package/dist-engine-src/src/json_store/context.rs +129 -0
  61. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  62. package/dist-engine-src/src/json_store/mod.rs +9 -0
  63. package/dist-engine-src/src/json_store/store.rs +236 -0
  64. package/dist-engine-src/src/json_store/types.rs +52 -0
  65. package/dist-engine-src/src/lib.rs +61 -0
  66. package/dist-engine-src/src/live_state/context.rs +2241 -0
  67. package/dist-engine-src/src/live_state/mod.rs +15 -0
  68. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  69. package/dist-engine-src/src/live_state/reader.rs +23 -0
  70. package/dist-engine-src/src/live_state/types.rs +239 -0
  71. package/dist-engine-src/src/live_state/visibility.rs +218 -0
  72. package/dist-engine-src/src/plugin/archive.rs +441 -0
  73. package/dist-engine-src/src/plugin/component.rs +183 -0
  74. package/dist-engine-src/src/plugin/install.rs +637 -0
  75. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  76. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  77. package/dist-engine-src/src/plugin/mod.rs +33 -0
  78. package/dist-engine-src/src/plugin/plugin_manifest.json +119 -0
  79. package/dist-engine-src/src/plugin/storage.rs +74 -0
  80. package/dist-engine-src/src/schema/annotations/defaults.rs +280 -0
  81. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  82. package/dist-engine-src/src/schema/builtin/lix_account.json +22 -0
  83. package/dist-engine-src/src/schema/builtin/lix_active_account.json +30 -0
  84. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +30 -0
  85. package/dist-engine-src/src/schema/builtin/lix_change.json +62 -0
  86. package/dist-engine-src/src/schema/builtin/lix_change_author.json +46 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change_set.json +18 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +75 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +62 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +46 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +53 -0
  92. package/dist-engine-src/src/schema/builtin/lix_entity_label.json +63 -0
  93. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +53 -0
  94. package/dist-engine-src/src/schema/builtin/lix_key_value.json +41 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label.json +22 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +31 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +35 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +49 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +271 -0
  100. package/dist-engine-src/src/schema/definition.json +157 -0
  101. package/dist-engine-src/src/schema/definition.rs +636 -0
  102. package/dist-engine-src/src/schema/key.rs +206 -0
  103. package/dist-engine-src/src/schema/mod.rs +20 -0
  104. package/dist-engine-src/src/schema/seed.rs +14 -0
  105. package/dist-engine-src/src/schema/tests.rs +739 -0
  106. package/dist-engine-src/src/schema_registry.rs +294 -0
  107. package/dist-engine-src/src/session/context.rs +366 -0
  108. package/dist-engine-src/src/session/create_version.rs +80 -0
  109. package/dist-engine-src/src/session/execute.rs +447 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +62 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +437 -0
  116. package/dist-engine-src/src/session/mod.rs +25 -0
  117. package/dist-engine-src/src/session/switch_version.rs +121 -0
  118. package/dist-engine-src/src/sql2/change_provider.rs +337 -0
  119. package/dist-engine-src/src/sql2/classify.rs +147 -0
  120. package/dist-engine-src/src/sql2/commit_derived_provider.rs +591 -0
  121. package/dist-engine-src/src/sql2/context.rs +307 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +623 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2405 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +444 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +2700 -0
  127. package/dist-engine-src/src/sql2/error.rs +196 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3379 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +902 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3254 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1526 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +369 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +80 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +418 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +643 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2430 -0
  138. package/dist-engine-src/src/sql2/mod.rs +43 -0
  139. package/dist-engine-src/src/sql2/read_only.rs +65 -0
  140. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  141. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  142. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  143. package/dist-engine-src/src/sql2/session.rs +135 -0
  144. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  145. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  146. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  147. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  148. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  149. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  150. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  153. package/dist-engine-src/src/sql2/udfs/mod.rs +82 -0
  154. package/dist-engine-src/src/sql2/version_provider.rs +1187 -0
  155. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  156. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  157. package/dist-engine-src/src/storage/context.rs +356 -0
  158. package/dist-engine-src/src/storage/mod.rs +14 -0
  159. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  160. package/dist-engine-src/src/storage/types.rs +501 -0
  161. package/dist-engine-src/src/storage_bench.rs +3406 -0
  162. package/dist-engine-src/src/test_support.rs +81 -0
  163. package/dist-engine-src/src/tracked_state/by_file_index.rs +102 -0
  164. package/dist-engine-src/src/tracked_state/codec.rs +747 -0
  165. package/dist-engine-src/src/tracked_state/context.rs +983 -0
  166. package/dist-engine-src/src/tracked_state/diff.rs +494 -0
  167. package/dist-engine-src/src/tracked_state/materialization.rs +141 -0
  168. package/dist-engine-src/src/tracked_state/merge.rs +474 -0
  169. package/dist-engine-src/src/tracked_state/mod.rs +31 -0
  170. package/dist-engine-src/src/tracked_state/rebuild.rs +771 -0
  171. package/dist-engine-src/src/tracked_state/storage.rs +243 -0
  172. package/dist-engine-src/src/tracked_state/tree.rs +2744 -0
  173. package/dist-engine-src/src/tracked_state/tree_types.rs +176 -0
  174. package/dist-engine-src/src/tracked_state/types.rs +61 -0
  175. package/dist-engine-src/src/transaction/commit.rs +1224 -0
  176. package/dist-engine-src/src/transaction/context.rs +1307 -0
  177. package/dist-engine-src/src/transaction/live_state_overlay.rs +34 -0
  178. package/dist-engine-src/src/transaction/mod.rs +11 -0
  179. package/dist-engine-src/src/transaction/normalization.rs +1026 -0
  180. package/dist-engine-src/src/transaction/schema_resolver.rs +127 -0
  181. package/dist-engine-src/src/transaction/staging.rs +1436 -0
  182. package/dist-engine-src/src/transaction/types.rs +351 -0
  183. package/dist-engine-src/src/transaction/validation.rs +4811 -0
  184. package/dist-engine-src/src/untracked_state/codec.rs +363 -0
  185. package/dist-engine-src/src/untracked_state/context.rs +82 -0
  186. package/dist-engine-src/src/untracked_state/materialization.rs +157 -0
  187. package/dist-engine-src/src/untracked_state/mod.rs +17 -0
  188. package/dist-engine-src/src/untracked_state/storage.rs +348 -0
  189. package/dist-engine-src/src/untracked_state/types.rs +96 -0
  190. package/dist-engine-src/src/version/context.rs +52 -0
  191. package/dist-engine-src/src/version/mod.rs +12 -0
  192. package/dist-engine-src/src/version/refs.rs +421 -0
  193. package/dist-engine-src/src/version/stage_rows.rs +71 -0
  194. package/dist-engine-src/src/version/types.rs +21 -0
  195. package/dist-engine-src/src/wasm/mod.rs +60 -0
  196. package/package.json +68 -63
@@ -0,0 +1,43 @@
1
+ mod change_provider;
2
+ mod classify;
3
+ mod commit_derived_provider;
4
+ mod context;
5
+ mod directory_history_provider;
6
+ mod directory_provider;
7
+ mod dml;
8
+ mod entity_history_provider;
9
+ mod entity_provider;
10
+ mod error;
11
+ mod execute;
12
+ mod file_history_provider;
13
+ mod file_provider;
14
+ mod filesystem_planner;
15
+ mod filesystem_predicates;
16
+ mod filesystem_visibility;
17
+ mod history_projection;
18
+ mod history_provider;
19
+ mod history_route;
20
+ mod lix_state_provider;
21
+ mod read_only;
22
+ mod record_batch;
23
+ mod result_metadata;
24
+ mod runtime;
25
+ mod session;
26
+ mod udfs;
27
+ mod version_provider;
28
+ mod version_scope;
29
+ mod write_normalization;
30
+
31
+ pub(crate) use classify::{
32
+ classify_statement, dml_target_table_names, validate_supported_statement_ast, SqlStatementKind,
33
+ };
34
+ pub(crate) use context::{
35
+ ChangelogQuerySource, SqlChangelogQuerySource, SqlExecutionContext, SqlJsonReader,
36
+ SqlWriteContext, SqlWriteExecutionContext, WriteAccess, WriteContextLiveStateReader,
37
+ WriteContextVersionRefReader,
38
+ };
39
+ #[allow(unused_imports)]
40
+ pub(crate) use execute::{
41
+ create_logical_plan, create_write_logical_plan, execute_logical_plan, execute_sql,
42
+ SqlLogicalPlan,
43
+ };
@@ -0,0 +1,65 @@
1
+ use datafusion::error::DataFusionError;
2
+
3
+ use crate::transaction::types::StageRow;
4
+ use crate::LixError;
5
+
6
+ pub(crate) fn reject_read_only_entity_surface(
7
+ schema_key: &str,
8
+ operation: &str,
9
+ ) -> Result<(), DataFusionError> {
10
+ if schema_key == "lix_directory_descriptor" {
11
+ return Err(read_only_error(
12
+ operation,
13
+ schema_key,
14
+ "Use the writable lix_directory surface to create, update, or delete directories.",
15
+ ));
16
+ }
17
+ if let Some(message) = read_only_schema_message(schema_key) {
18
+ return Err(read_only_error(operation, schema_key, message));
19
+ }
20
+ Ok(())
21
+ }
22
+
23
+ pub(crate) fn reject_read_only_stage_rows(
24
+ rows: &[StageRow],
25
+ operation: &str,
26
+ ) -> Result<(), DataFusionError> {
27
+ for row in rows {
28
+ if let Some(message) = read_only_schema_message(&row.schema_key) {
29
+ return Err(read_only_error(operation, &row.schema_key, message));
30
+ }
31
+ }
32
+ Ok(())
33
+ }
34
+
35
+ fn read_only_error(operation: &str, schema_key: &str, message: &'static str) -> DataFusionError {
36
+ super::error::lix_error_to_datafusion_error(
37
+ LixError::new(
38
+ LixError::CODE_READ_ONLY,
39
+ format!("{operation} cannot write read-only surface '{schema_key}'"),
40
+ )
41
+ .with_hint(message),
42
+ )
43
+ }
44
+
45
+ fn read_only_schema_message(schema_key: &str) -> Option<&'static str> {
46
+ match schema_key {
47
+ "lix_version_descriptor" | "lix_version_ref" => {
48
+ Some("Use the writable lix_version surface to create, update, or delete versions.")
49
+ }
50
+ "lix_file_descriptor" => {
51
+ Some("Use the writable lix_file surface to create, update, or delete files.")
52
+ }
53
+ "lix_binary_blob_ref" => {
54
+ Some("Use the writable lix_file data column to create, update, or delete file contents.")
55
+ }
56
+ "lix_commit"
57
+ | "lix_commit_edge"
58
+ | "lix_change"
59
+ | "lix_change_set"
60
+ | "lix_change_set_element" => Some(
61
+ "Commit graph and changelog surfaces are read-only; Lix creates them when transactions commit.",
62
+ ),
63
+ _ => None,
64
+ }
65
+ }
@@ -0,0 +1,17 @@
1
+ use datafusion::arrow::array::ArrayRef;
2
+ use datafusion::arrow::datatypes::SchemaRef;
3
+ use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions};
4
+ use datafusion::common::{DataFusionError, Result};
5
+
6
+ pub(crate) fn record_batch_with_row_count(
7
+ schema: SchemaRef,
8
+ columns: Vec<ArrayRef>,
9
+ row_count: usize,
10
+ ) -> Result<RecordBatch> {
11
+ if schema.fields().is_empty() {
12
+ let options = RecordBatchOptions::new().with_row_count(Some(row_count));
13
+ return RecordBatch::try_new_with_options(schema, columns, &options)
14
+ .map_err(DataFusionError::from);
15
+ }
16
+ RecordBatch::try_new(schema, columns).map_err(DataFusionError::from)
17
+ }
@@ -0,0 +1,29 @@
1
+ use std::collections::HashMap;
2
+
3
+ use datafusion::arrow::datatypes::Field;
4
+
5
+ pub(crate) const LIX_VALUE_TYPE_METADATA_KEY: &str = "lix.value_type";
6
+ pub(crate) const LIX_VALUE_TYPE_JSON: &str = "json";
7
+
8
+ pub(crate) fn json_field(name: impl Into<String>, nullable: bool) -> Field {
9
+ Field::new(name, datafusion::arrow::datatypes::DataType::Utf8, nullable)
10
+ .with_metadata(json_field_metadata_map())
11
+ }
12
+
13
+ pub(crate) fn mark_json_field(field: Field) -> Field {
14
+ field.with_metadata(json_field_metadata_map())
15
+ }
16
+
17
+ pub(crate) fn field_is_json(field: &Field) -> bool {
18
+ field
19
+ .metadata()
20
+ .get(LIX_VALUE_TYPE_METADATA_KEY)
21
+ .is_some_and(|value| value == LIX_VALUE_TYPE_JSON)
22
+ }
23
+
24
+ fn json_field_metadata_map() -> HashMap<String, String> {
25
+ HashMap::from([(
26
+ LIX_VALUE_TYPE_METADATA_KEY.to_string(),
27
+ LIX_VALUE_TYPE_JSON.to_string(),
28
+ )])
29
+ }
@@ -0,0 +1,60 @@
1
+ use std::sync::Arc;
2
+
3
+ use datafusion::arrow::record_batch::RecordBatch;
4
+ use datafusion::dataframe::DataFrame;
5
+ use datafusion::error::Result;
6
+ use datafusion::execution::TaskContext;
7
+ use datafusion::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
8
+ use futures_util::TryStreamExt;
9
+
10
+ pub(crate) async fn collect_dataframe(dataframe: DataFrame) -> Result<Vec<RecordBatch>> {
11
+ let task_ctx = Arc::new(dataframe.task_ctx());
12
+ let plan = dataframe.create_physical_plan().await?;
13
+ collect_input_plan(plan, task_ctx).await
14
+ }
15
+
16
+ pub(crate) async fn collect_input_plan(
17
+ plan: Arc<dyn ExecutionPlan>,
18
+ task_ctx: Arc<TaskContext>,
19
+ ) -> Result<Vec<RecordBatch>> {
20
+ validate_physical_plan(&plan)?;
21
+ let partition_count = plan.output_partitioning().partition_count();
22
+ let mut batches = Vec::new();
23
+ for partition in 0..partition_count {
24
+ let partition_batches = plan
25
+ .execute(partition, Arc::clone(&task_ctx))?
26
+ .try_collect::<Vec<_>>()
27
+ .await?;
28
+ batches.extend(partition_batches);
29
+ }
30
+ Ok(batches)
31
+ }
32
+
33
+ #[cfg(not(target_arch = "wasm32"))]
34
+ fn validate_physical_plan(_plan: &Arc<dyn ExecutionPlan>) -> Result<()> {
35
+ Ok(())
36
+ }
37
+
38
+ #[cfg(target_arch = "wasm32")]
39
+ fn validate_physical_plan(plan: &Arc<dyn ExecutionPlan>) -> Result<()> {
40
+ let operator_name = plan.name();
41
+ if is_wasm_unsafe_operator(operator_name) {
42
+ return Err(datafusion::error::DataFusionError::Plan(format!(
43
+ "SQL physical operator '{operator_name}' is not supported by the WebAssembly runtime yet"
44
+ )));
45
+ }
46
+
47
+ for child in plan.children() {
48
+ validate_physical_plan(child)?;
49
+ }
50
+
51
+ Ok(())
52
+ }
53
+
54
+ #[cfg(target_arch = "wasm32")]
55
+ fn is_wasm_unsafe_operator(operator_name: &str) -> bool {
56
+ matches!(
57
+ operator_name,
58
+ "CoalescePartitionsExec" | "RepartitionExec" | "SortPreservingMergeExec"
59
+ )
60
+ }
@@ -0,0 +1,135 @@
1
+ use std::sync::Arc;
2
+
3
+ use datafusion::prelude::{SessionConfig, SessionContext};
4
+
5
+ use crate::LixError;
6
+
7
+ use super::change_provider::register_lix_change_provider;
8
+ use super::commit_derived_provider::register_commit_derived_providers;
9
+ use super::directory_history_provider::register_lix_directory_history_provider;
10
+ use super::directory_provider::{
11
+ register_lix_directory_providers, register_lix_directory_write_providers,
12
+ };
13
+ use super::entity_provider::{register_entity_providers, register_entity_write_providers};
14
+ use super::file_history_provider::register_lix_file_history_provider;
15
+ use super::file_provider::{register_lix_file_providers, register_lix_file_write_providers};
16
+ use super::history_provider::register_history_providers;
17
+ use super::lix_state_provider::{register_lix_state_providers, register_lix_state_write_providers};
18
+ use super::udfs::register_sql2_functions;
19
+ use super::version_provider::{register_lix_version_provider, register_lix_version_write_provider};
20
+ use super::{SqlExecutionContext, SqlWriteContext, SqlWriteExecutionContext};
21
+
22
+ pub(crate) async fn build_read_session(
23
+ ctx: &dyn SqlExecutionContext,
24
+ ) -> Result<SessionContext, LixError> {
25
+ let session = new_sql_session_context();
26
+ let version_ref = ctx.version_ref();
27
+ let active_version_commit_id = version_ref
28
+ .load_head(ctx.active_version_id())
29
+ .await?
30
+ .map(|head| head.commit_id);
31
+ register_sql2_functions(&session, ctx.functions(), active_version_commit_id);
32
+ register_lix_state_providers(
33
+ &session,
34
+ ctx.active_version_id(),
35
+ ctx.live_state(),
36
+ Arc::clone(&version_ref),
37
+ )
38
+ .await?;
39
+ register_lix_version_provider(&session, ctx.live_state(), Arc::clone(&version_ref)).await?;
40
+ let changelog_query_source = ctx.changelog_query_source();
41
+ register_lix_change_provider(&session, changelog_query_source.clone()).await?;
42
+ let commit_graph = ctx.commit_graph();
43
+ register_commit_derived_providers(&session, commit_graph, Arc::clone(&version_ref)).await?;
44
+ let state_history_commit_graph = ctx.commit_graph();
45
+ register_history_providers(
46
+ &session,
47
+ state_history_commit_graph,
48
+ changelog_query_source.clone(),
49
+ )
50
+ .await?;
51
+ let file_history_commit_graph = ctx.commit_graph();
52
+ register_lix_file_history_provider(
53
+ &session,
54
+ file_history_commit_graph,
55
+ changelog_query_source.clone(),
56
+ ctx.blob_reader(),
57
+ )
58
+ .await?;
59
+ let directory_history_commit_graph = ctx.commit_graph();
60
+ register_lix_directory_history_provider(
61
+ &session,
62
+ directory_history_commit_graph,
63
+ changelog_query_source.clone(),
64
+ )
65
+ .await?;
66
+ let entity_commit_graph = Arc::new(tokio::sync::Mutex::new(ctx.commit_graph()));
67
+ register_lix_directory_providers(
68
+ &session,
69
+ ctx.active_version_id(),
70
+ ctx.live_state(),
71
+ Arc::clone(&version_ref),
72
+ ctx.functions(),
73
+ )
74
+ .await?;
75
+ register_lix_file_providers(
76
+ &session,
77
+ ctx.active_version_id(),
78
+ ctx.live_state(),
79
+ Arc::clone(&version_ref),
80
+ ctx.blob_reader(),
81
+ ctx.functions(),
82
+ )
83
+ .await?;
84
+ register_entity_providers(
85
+ &session,
86
+ ctx.active_version_id(),
87
+ ctx.live_state(),
88
+ Arc::clone(&version_ref),
89
+ entity_commit_graph,
90
+ changelog_query_source,
91
+ &ctx.list_visible_schemas()?,
92
+ )
93
+ .await?;
94
+
95
+ Ok(session)
96
+ }
97
+
98
+ pub(crate) async fn build_write_session(
99
+ ctx: &mut dyn SqlWriteExecutionContext,
100
+ ) -> Result<SessionContext, LixError> {
101
+ let session = new_sql_session_context();
102
+ let write_ctx = SqlWriteContext::new(ctx);
103
+ let active_version_commit_id = write_ctx
104
+ .load_version_head(&write_ctx.active_version_id())
105
+ .await?;
106
+ register_sql2_functions(&session, write_ctx.functions(), active_version_commit_id);
107
+
108
+ register_lix_state_write_providers(&session, write_ctx.clone()).await?;
109
+ register_lix_version_write_provider(&session, write_ctx.clone()).await?;
110
+
111
+ register_lix_directory_write_providers(&session, write_ctx.clone()).await?;
112
+ register_lix_file_write_providers(&session, write_ctx.clone()).await?;
113
+ register_entity_write_providers(
114
+ &session,
115
+ write_ctx.clone(),
116
+ &write_ctx.list_visible_schemas()?,
117
+ )
118
+ .await?;
119
+
120
+ Ok(session)
121
+ }
122
+
123
+ fn new_sql_session_context() -> SessionContext {
124
+ SessionContext::new_with_config(
125
+ SessionConfig::new()
126
+ .with_information_schema(true)
127
+ .with_target_partitions(1)
128
+ .set_bool("datafusion.optimizer.repartition_aggregations", false)
129
+ .set_bool("datafusion.optimizer.repartition_joins", false)
130
+ .set_bool("datafusion.optimizer.repartition_sorts", false)
131
+ .set_bool("datafusion.optimizer.repartition_windows", false)
132
+ .set_bool("datafusion.optimizer.repartition_file_scans", false)
133
+ .set_bool("datafusion.optimizer.enable_round_robin_repartition", false),
134
+ )
135
+ }
@@ -0,0 +1,295 @@
1
+ use std::sync::Arc;
2
+
3
+ use datafusion::arrow::array::{
4
+ Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
5
+ Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray, UInt16Array,
6
+ UInt32Array, UInt64Array, UInt8Array,
7
+ };
8
+ use datafusion::common::{plan_err, DataFusionError, Result};
9
+ use datafusion::logical_expr::ColumnarValue;
10
+ use serde_json::Value as JsonValue;
11
+
12
+ pub(super) fn scalar_inputs(args: &[ColumnarValue]) -> bool {
13
+ args.iter()
14
+ .all(|value| matches!(value, ColumnarValue::Scalar(_)))
15
+ }
16
+
17
+ pub(super) fn json_value_to_serde(array: &dyn Array, row: usize) -> Result<Option<JsonValue>> {
18
+ let Some(raw) = text_like_value(array, row)? else {
19
+ return Ok(None);
20
+ };
21
+ serde_json::from_str::<JsonValue>(&raw)
22
+ .map(Some)
23
+ .map_err(|error| {
24
+ DataFusionError::Execution(format!(
25
+ "JSON function expected valid JSON text in its first argument, got error: {error}"
26
+ ))
27
+ })
28
+ }
29
+
30
+ pub(super) fn text_like_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
31
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
32
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
33
+ }
34
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
35
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
36
+ }
37
+ if let Some(value) = numeric_value(array, row)? {
38
+ return Ok(Some(value));
39
+ }
40
+ if let Some(array) = array.as_any().downcast_ref::<BooleanArray>() {
41
+ return Ok((!array.is_null(row)).then(|| {
42
+ if array.value(row) {
43
+ "true".to_string()
44
+ } else {
45
+ "false".to_string()
46
+ }
47
+ }));
48
+ }
49
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
50
+ return Ok(
51
+ (!array.is_null(row)).then(|| String::from_utf8_lossy(array.value(row)).to_string())
52
+ );
53
+ }
54
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
55
+ return Ok(
56
+ (!array.is_null(row)).then(|| String::from_utf8_lossy(array.value(row)).to_string())
57
+ );
58
+ }
59
+ Err(DataFusionError::Execution(format!(
60
+ "unsupported argument type for JSON/text function: {:?}",
61
+ array.data_type()
62
+ )))
63
+ }
64
+
65
+ pub(super) fn numeric_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
66
+ macro_rules! numeric_array {
67
+ ($ty:ty) => {
68
+ if let Some(array) = array.as_any().downcast_ref::<$ty>() {
69
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
70
+ }
71
+ };
72
+ }
73
+
74
+ numeric_array!(Int8Array);
75
+ numeric_array!(Int16Array);
76
+ numeric_array!(Int32Array);
77
+ numeric_array!(Int64Array);
78
+ numeric_array!(UInt8Array);
79
+ numeric_array!(UInt16Array);
80
+ numeric_array!(UInt32Array);
81
+ numeric_array!(UInt64Array);
82
+ numeric_array!(Float32Array);
83
+ numeric_array!(Float64Array);
84
+ Ok(None)
85
+ }
86
+
87
+ pub(super) fn decode_utf8_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
88
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
89
+ return (!array.is_null(row))
90
+ .then(|| String::from_utf8(array.value(row).to_vec()))
91
+ .transpose()
92
+ .map_err(|error| {
93
+ DataFusionError::Execution(format!(
94
+ "lix_text_decode() expected valid UTF8 bytes: {error}"
95
+ ))
96
+ });
97
+ }
98
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
99
+ return (!array.is_null(row))
100
+ .then(|| String::from_utf8(array.value(row).to_vec()))
101
+ .transpose()
102
+ .map_err(|error| {
103
+ DataFusionError::Execution(format!(
104
+ "lix_text_decode() expected valid UTF8 bytes: {error}"
105
+ ))
106
+ });
107
+ }
108
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
109
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
110
+ }
111
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
112
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
113
+ }
114
+ Err(DataFusionError::Execution(format!(
115
+ "lix_text_decode() expected Binary or Utf8, got {:?}",
116
+ array.data_type()
117
+ )))
118
+ }
119
+
120
+ pub(super) fn encode_utf8_value(array: &dyn Array, row: usize) -> Result<Option<Vec<u8>>> {
121
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
122
+ return Ok((!array.is_null(row)).then(|| array.value(row).as_bytes().to_vec()));
123
+ }
124
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
125
+ return Ok((!array.is_null(row)).then(|| array.value(row).as_bytes().to_vec()));
126
+ }
127
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
128
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_vec()));
129
+ }
130
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
131
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_vec()));
132
+ }
133
+ Err(DataFusionError::Execution(format!(
134
+ "lix_text_encode() expected Utf8 or Binary, got {:?}",
135
+ array.data_type()
136
+ )))
137
+ }
138
+
139
+ pub(super) fn validate_utf8_encoding_arg(
140
+ fn_name: &str,
141
+ encoding: Option<&ColumnarValue>,
142
+ ) -> Result<()> {
143
+ let Some(encoding) = encoding else {
144
+ return Ok(());
145
+ };
146
+ let arrays = ColumnarValue::values_to_arrays(std::slice::from_ref(encoding))?;
147
+ let array = &arrays[0];
148
+ if array.len() == 0 {
149
+ return Ok(());
150
+ }
151
+ let Some(value) = text_like_value(array.as_ref(), 0)? else {
152
+ return Ok(());
153
+ };
154
+ let normalized = value.trim().to_ascii_uppercase().replace('-', "");
155
+ if normalized == "UTF8" {
156
+ Ok(())
157
+ } else {
158
+ plan_err!("{fn_name}() only supports UTF8 encoding, got '{value}'")
159
+ }
160
+ }
161
+
162
+ pub(super) fn extract_json_path(
163
+ fn_name: &str,
164
+ arrays: &[ArrayRef],
165
+ row: usize,
166
+ ) -> Result<Option<JsonValue>> {
167
+ let Some(mut current) = json_value_to_serde(arrays[0].as_ref(), row)? else {
168
+ return Ok(None);
169
+ };
170
+
171
+ for path in &arrays[1..] {
172
+ let Some(segment) = json_path_segment(fn_name, path.as_ref(), row)? else {
173
+ return Ok(None);
174
+ };
175
+ let next = match segment {
176
+ JsonPathSegment::Key(key) => current.get(&key).cloned(),
177
+ JsonPathSegment::Index(index) => current
178
+ .as_array()
179
+ .and_then(|values| values.get(index))
180
+ .cloned(),
181
+ };
182
+ let Some(value) = next else {
183
+ return Ok(None);
184
+ };
185
+ current = value;
186
+ }
187
+
188
+ Ok(Some(current))
189
+ }
190
+
191
+ pub(super) fn json_text_value(value: &JsonValue) -> Result<String> {
192
+ match value {
193
+ JsonValue::String(text) => Ok(text.clone()),
194
+ JsonValue::Number(number) => Ok(number.to_string()),
195
+ JsonValue::Bool(boolean) => Ok(if *boolean {
196
+ "true".to_string()
197
+ } else {
198
+ "false".to_string()
199
+ }),
200
+ JsonValue::Array(_) | JsonValue::Object(_) => {
201
+ serde_json::to_string(value).map_err(|error| {
202
+ DataFusionError::Execution(format!(
203
+ "lix_json_get_text() could not render JSON value: {error}"
204
+ ))
205
+ })
206
+ }
207
+ JsonValue::Null => Ok("null".to_string()),
208
+ }
209
+ }
210
+
211
+ pub(super) fn json_json_value(value: &JsonValue) -> Result<String> {
212
+ serde_json::to_string(value).map_err(|error| {
213
+ DataFusionError::Execution(format!(
214
+ "lix_json_get() could not render JSON value: {error}"
215
+ ))
216
+ })
217
+ }
218
+
219
+ enum JsonPathSegment {
220
+ Key(String),
221
+ Index(usize),
222
+ }
223
+
224
+ fn json_path_segment(
225
+ fn_name: &str,
226
+ array: &dyn Array,
227
+ row: usize,
228
+ ) -> Result<Option<JsonPathSegment>> {
229
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
230
+ if array.is_null(row) {
231
+ return Ok(None);
232
+ }
233
+ let value = array.value(row).to_string();
234
+ validate_json_path_key_segment(fn_name, &value)?;
235
+ return Ok(Some(JsonPathSegment::Key(value)));
236
+ }
237
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
238
+ if array.is_null(row) {
239
+ return Ok(None);
240
+ }
241
+ let value = array.value(row).to_string();
242
+ validate_json_path_key_segment(fn_name, &value)?;
243
+ return Ok(Some(JsonPathSegment::Key(value)));
244
+ }
245
+ macro_rules! index_array {
246
+ ($ty:ty) => {
247
+ if let Some(array) = array.as_any().downcast_ref::<$ty>() {
248
+ if array.is_null(row) {
249
+ return Ok(None);
250
+ }
251
+ let value = array.value(row);
252
+ let index = usize::try_from(value).map_err(|_| {
253
+ DataFusionError::Execution(format!(
254
+ "{fn_name}() path indexes must be non-negative integers"
255
+ ))
256
+ })?;
257
+ return Ok(Some(JsonPathSegment::Index(index)));
258
+ }
259
+ };
260
+ }
261
+ index_array!(UInt8Array);
262
+ index_array!(UInt16Array);
263
+ index_array!(UInt32Array);
264
+ index_array!(UInt64Array);
265
+ index_array!(Int8Array);
266
+ index_array!(Int16Array);
267
+ index_array!(Int32Array);
268
+ index_array!(Int64Array);
269
+ Err(DataFusionError::Execution(format!(
270
+ "{fn_name}() path arguments must be strings or non-negative integers, got {:?}",
271
+ array.data_type()
272
+ )))
273
+ }
274
+
275
+ fn validate_json_path_key_segment(fn_name: &str, value: &str) -> Result<()> {
276
+ if value == "$" || value.starts_with("$.") || value.starts_with("$[") || value.starts_with('/')
277
+ {
278
+ return Err(DataFusionError::Execution(format!(
279
+ "{fn_name}() uses variadic path segments, not JSONPath or JSON Pointer; got '{value}'"
280
+ )));
281
+ }
282
+ Ok(())
283
+ }
284
+
285
+ pub(super) fn binary_array_from_owned(values: &[Option<Vec<u8>>]) -> BinaryArray {
286
+ let refs = values
287
+ .iter()
288
+ .map(|value| value.as_deref())
289
+ .collect::<Vec<_>>();
290
+ BinaryArray::from(refs)
291
+ }
292
+
293
+ pub(super) fn array_ref<T: Array + 'static>(array: T) -> ArrayRef {
294
+ Arc::new(array)
295
+ }
@@ -0,0 +1,53 @@
1
+ use std::any::Any;
2
+
3
+ use datafusion::arrow::datatypes::DataType;
4
+ use datafusion::common::{plan_err, Result, ScalarValue};
5
+ use datafusion::logical_expr::{
6
+ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
7
+ };
8
+
9
+ #[derive(Clone, PartialEq, Eq, Hash)]
10
+ pub(super) struct LixActiveVersionCommitId {
11
+ commit_id: Option<String>,
12
+ }
13
+
14
+ impl LixActiveVersionCommitId {
15
+ pub(super) fn new(commit_id: Option<String>) -> Self {
16
+ Self { commit_id }
17
+ }
18
+ }
19
+
20
+ impl std::fmt::Debug for LixActiveVersionCommitId {
21
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22
+ f.debug_struct("LixActiveVersionCommitId").finish()
23
+ }
24
+ }
25
+
26
+ impl ScalarUDFImpl for LixActiveVersionCommitId {
27
+ fn as_any(&self) -> &dyn Any {
28
+ self
29
+ }
30
+
31
+ fn name(&self) -> &str {
32
+ "lix_active_version_commit_id"
33
+ }
34
+
35
+ fn signature(&self) -> &Signature {
36
+ static SIGNATURE: std::sync::LazyLock<Signature> =
37
+ std::sync::LazyLock::new(|| Signature::nullary(Volatility::Stable));
38
+ &SIGNATURE
39
+ }
40
+
41
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
42
+ Ok(DataType::Utf8)
43
+ }
44
+
45
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
46
+ if !args.args.is_empty() {
47
+ return plan_err!("lix_active_version_commit_id requires no arguments");
48
+ }
49
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
50
+ self.commit_id.clone(),
51
+ )))
52
+ }
53
+ }