@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,295 @@
1
+ use std::sync::Arc;
2
+
3
+ use datafusion::arrow::array::{
4
+ Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
5
+ Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray, UInt16Array,
6
+ UInt32Array, UInt64Array, UInt8Array,
7
+ };
8
+ use datafusion::common::{plan_err, DataFusionError, Result};
9
+ use datafusion::logical_expr::ColumnarValue;
10
+ use serde_json::Value as JsonValue;
11
+
12
+ pub(super) fn scalar_inputs(args: &[ColumnarValue]) -> bool {
13
+ args.iter()
14
+ .all(|value| matches!(value, ColumnarValue::Scalar(_)))
15
+ }
16
+
17
+ pub(super) fn json_value_to_serde(array: &dyn Array, row: usize) -> Result<Option<JsonValue>> {
18
+ let Some(raw) = text_like_value(array, row)? else {
19
+ return Ok(None);
20
+ };
21
+ serde_json::from_str::<JsonValue>(&raw)
22
+ .map(Some)
23
+ .map_err(|error| {
24
+ DataFusionError::Execution(format!(
25
+ "JSON function expected valid JSON text in its first argument, got error: {error}"
26
+ ))
27
+ })
28
+ }
29
+
30
+ pub(super) fn text_like_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
31
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
32
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
33
+ }
34
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
35
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
36
+ }
37
+ if let Some(value) = numeric_value(array, row)? {
38
+ return Ok(Some(value));
39
+ }
40
+ if let Some(array) = array.as_any().downcast_ref::<BooleanArray>() {
41
+ return Ok((!array.is_null(row)).then(|| {
42
+ if array.value(row) {
43
+ "true".to_string()
44
+ } else {
45
+ "false".to_string()
46
+ }
47
+ }));
48
+ }
49
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
50
+ return Ok(
51
+ (!array.is_null(row)).then(|| String::from_utf8_lossy(array.value(row)).to_string())
52
+ );
53
+ }
54
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
55
+ return Ok(
56
+ (!array.is_null(row)).then(|| String::from_utf8_lossy(array.value(row)).to_string())
57
+ );
58
+ }
59
+ Err(DataFusionError::Execution(format!(
60
+ "unsupported argument type for JSON/text function: {:?}",
61
+ array.data_type()
62
+ )))
63
+ }
64
+
65
+ pub(super) fn numeric_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
66
+ macro_rules! numeric_array {
67
+ ($ty:ty) => {
68
+ if let Some(array) = array.as_any().downcast_ref::<$ty>() {
69
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
70
+ }
71
+ };
72
+ }
73
+
74
+ numeric_array!(Int8Array);
75
+ numeric_array!(Int16Array);
76
+ numeric_array!(Int32Array);
77
+ numeric_array!(Int64Array);
78
+ numeric_array!(UInt8Array);
79
+ numeric_array!(UInt16Array);
80
+ numeric_array!(UInt32Array);
81
+ numeric_array!(UInt64Array);
82
+ numeric_array!(Float32Array);
83
+ numeric_array!(Float64Array);
84
+ Ok(None)
85
+ }
86
+
87
+ pub(super) fn decode_utf8_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
88
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
89
+ return (!array.is_null(row))
90
+ .then(|| String::from_utf8(array.value(row).to_vec()))
91
+ .transpose()
92
+ .map_err(|error| {
93
+ DataFusionError::Execution(format!(
94
+ "lix_text_decode() expected valid UTF8 bytes: {error}"
95
+ ))
96
+ });
97
+ }
98
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
99
+ return (!array.is_null(row))
100
+ .then(|| String::from_utf8(array.value(row).to_vec()))
101
+ .transpose()
102
+ .map_err(|error| {
103
+ DataFusionError::Execution(format!(
104
+ "lix_text_decode() expected valid UTF8 bytes: {error}"
105
+ ))
106
+ });
107
+ }
108
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
109
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
110
+ }
111
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
112
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_string()));
113
+ }
114
+ Err(DataFusionError::Execution(format!(
115
+ "lix_text_decode() expected Binary or Utf8, got {:?}",
116
+ array.data_type()
117
+ )))
118
+ }
119
+
120
+ pub(super) fn encode_utf8_value(array: &dyn Array, row: usize) -> Result<Option<Vec<u8>>> {
121
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
122
+ return Ok((!array.is_null(row)).then(|| array.value(row).as_bytes().to_vec()));
123
+ }
124
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
125
+ return Ok((!array.is_null(row)).then(|| array.value(row).as_bytes().to_vec()));
126
+ }
127
+ if let Some(array) = array.as_any().downcast_ref::<BinaryArray>() {
128
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_vec()));
129
+ }
130
+ if let Some(array) = array.as_any().downcast_ref::<LargeBinaryArray>() {
131
+ return Ok((!array.is_null(row)).then(|| array.value(row).to_vec()));
132
+ }
133
+ Err(DataFusionError::Execution(format!(
134
+ "lix_text_encode() expected Utf8 or Binary, got {:?}",
135
+ array.data_type()
136
+ )))
137
+ }
138
+
139
+ pub(super) fn validate_utf8_encoding_arg(
140
+ fn_name: &str,
141
+ encoding: Option<&ColumnarValue>,
142
+ ) -> Result<()> {
143
+ let Some(encoding) = encoding else {
144
+ return Ok(());
145
+ };
146
+ let arrays = ColumnarValue::values_to_arrays(std::slice::from_ref(encoding))?;
147
+ let array = &arrays[0];
148
+ if array.len() == 0 {
149
+ return Ok(());
150
+ }
151
+ let Some(value) = text_like_value(array.as_ref(), 0)? else {
152
+ return Ok(());
153
+ };
154
+ let normalized = value.trim().to_ascii_uppercase().replace('-', "");
155
+ if normalized == "UTF8" {
156
+ Ok(())
157
+ } else {
158
+ plan_err!("{fn_name}() only supports UTF8 encoding, got '{value}'")
159
+ }
160
+ }
161
+
162
+ pub(super) fn extract_json_path(
163
+ fn_name: &str,
164
+ arrays: &[ArrayRef],
165
+ row: usize,
166
+ ) -> Result<Option<JsonValue>> {
167
+ let Some(mut current) = json_value_to_serde(arrays[0].as_ref(), row)? else {
168
+ return Ok(None);
169
+ };
170
+
171
+ for path in &arrays[1..] {
172
+ let Some(segment) = json_path_segment(fn_name, path.as_ref(), row)? else {
173
+ return Ok(None);
174
+ };
175
+ let next = match segment {
176
+ JsonPathSegment::Key(key) => current.get(&key).cloned(),
177
+ JsonPathSegment::Index(index) => current
178
+ .as_array()
179
+ .and_then(|values| values.get(index))
180
+ .cloned(),
181
+ };
182
+ let Some(value) = next else {
183
+ return Ok(None);
184
+ };
185
+ current = value;
186
+ }
187
+
188
+ Ok(Some(current))
189
+ }
190
+
191
+ pub(super) fn json_text_value(value: &JsonValue) -> Result<String> {
192
+ match value {
193
+ JsonValue::String(text) => Ok(text.clone()),
194
+ JsonValue::Number(number) => Ok(number.to_string()),
195
+ JsonValue::Bool(boolean) => Ok(if *boolean {
196
+ "true".to_string()
197
+ } else {
198
+ "false".to_string()
199
+ }),
200
+ JsonValue::Array(_) | JsonValue::Object(_) => {
201
+ serde_json::to_string(value).map_err(|error| {
202
+ DataFusionError::Execution(format!(
203
+ "lix_json_get_text() could not render JSON value: {error}"
204
+ ))
205
+ })
206
+ }
207
+ JsonValue::Null => Ok("null".to_string()),
208
+ }
209
+ }
210
+
211
+ pub(super) fn json_json_value(value: &JsonValue) -> Result<String> {
212
+ serde_json::to_string(value).map_err(|error| {
213
+ DataFusionError::Execution(format!(
214
+ "lix_json_get() could not render JSON value: {error}"
215
+ ))
216
+ })
217
+ }
218
+
219
+ enum JsonPathSegment {
220
+ Key(String),
221
+ Index(usize),
222
+ }
223
+
224
+ fn json_path_segment(
225
+ fn_name: &str,
226
+ array: &dyn Array,
227
+ row: usize,
228
+ ) -> Result<Option<JsonPathSegment>> {
229
+ if let Some(array) = array.as_any().downcast_ref::<StringArray>() {
230
+ if array.is_null(row) {
231
+ return Ok(None);
232
+ }
233
+ let value = array.value(row).to_string();
234
+ validate_json_path_key_segment(fn_name, &value)?;
235
+ return Ok(Some(JsonPathSegment::Key(value)));
236
+ }
237
+ if let Some(array) = array.as_any().downcast_ref::<LargeStringArray>() {
238
+ if array.is_null(row) {
239
+ return Ok(None);
240
+ }
241
+ let value = array.value(row).to_string();
242
+ validate_json_path_key_segment(fn_name, &value)?;
243
+ return Ok(Some(JsonPathSegment::Key(value)));
244
+ }
245
+ macro_rules! index_array {
246
+ ($ty:ty) => {
247
+ if let Some(array) = array.as_any().downcast_ref::<$ty>() {
248
+ if array.is_null(row) {
249
+ return Ok(None);
250
+ }
251
+ let value = array.value(row);
252
+ let index = usize::try_from(value).map_err(|_| {
253
+ DataFusionError::Execution(format!(
254
+ "{fn_name}() path indexes must be non-negative integers"
255
+ ))
256
+ })?;
257
+ return Ok(Some(JsonPathSegment::Index(index)));
258
+ }
259
+ };
260
+ }
261
+ index_array!(UInt8Array);
262
+ index_array!(UInt16Array);
263
+ index_array!(UInt32Array);
264
+ index_array!(UInt64Array);
265
+ index_array!(Int8Array);
266
+ index_array!(Int16Array);
267
+ index_array!(Int32Array);
268
+ index_array!(Int64Array);
269
+ Err(DataFusionError::Execution(format!(
270
+ "{fn_name}() path arguments must be strings or non-negative integers, got {:?}",
271
+ array.data_type()
272
+ )))
273
+ }
274
+
275
+ fn validate_json_path_key_segment(fn_name: &str, value: &str) -> Result<()> {
276
+ if value == "$" || value.starts_with("$.") || value.starts_with("$[") || value.starts_with('/')
277
+ {
278
+ return Err(DataFusionError::Execution(format!(
279
+ "{fn_name}() uses variadic path segments, not JSONPath or JSON Pointer; got '{value}'"
280
+ )));
281
+ }
282
+ Ok(())
283
+ }
284
+
285
+ pub(super) fn binary_array_from_owned(values: &[Option<Vec<u8>>]) -> BinaryArray {
286
+ let refs = values
287
+ .iter()
288
+ .map(|value| value.as_deref())
289
+ .collect::<Vec<_>>();
290
+ BinaryArray::from(refs)
291
+ }
292
+
293
+ pub(super) fn array_ref<T: Array + 'static>(array: T) -> ArrayRef {
294
+ Arc::new(array)
295
+ }
@@ -0,0 +1,53 @@
1
+ use std::any::Any;
2
+
3
+ use datafusion::arrow::datatypes::DataType;
4
+ use datafusion::common::{plan_err, Result, ScalarValue};
5
+ use datafusion::logical_expr::{
6
+ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
7
+ };
8
+
9
+ #[derive(Clone, PartialEq, Eq, Hash)]
10
+ pub(super) struct LixActiveVersionCommitId {
11
+ commit_id: Option<String>,
12
+ }
13
+
14
+ impl LixActiveVersionCommitId {
15
+ pub(super) fn new(commit_id: Option<String>) -> Self {
16
+ Self { commit_id }
17
+ }
18
+ }
19
+
20
+ impl std::fmt::Debug for LixActiveVersionCommitId {
21
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22
+ f.debug_struct("LixActiveVersionCommitId").finish()
23
+ }
24
+ }
25
+
26
+ impl ScalarUDFImpl for LixActiveVersionCommitId {
27
+ fn as_any(&self) -> &dyn Any {
28
+ self
29
+ }
30
+
31
+ fn name(&self) -> &str {
32
+ "lix_active_version_commit_id"
33
+ }
34
+
35
+ fn signature(&self) -> &Signature {
36
+ static SIGNATURE: std::sync::LazyLock<Signature> =
37
+ std::sync::LazyLock::new(|| Signature::nullary(Volatility::Stable));
38
+ &SIGNATURE
39
+ }
40
+
41
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
42
+ Ok(DataType::Utf8)
43
+ }
44
+
45
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
46
+ if !args.args.is_empty() {
47
+ return plan_err!("lix_active_version_commit_id requires no arguments");
48
+ }
49
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
50
+ self.commit_id.clone(),
51
+ )))
52
+ }
53
+ }
@@ -0,0 +1,47 @@
1
+ use std::any::Any;
2
+
3
+ use datafusion::arrow::datatypes::DataType;
4
+ use datafusion::common::{Result, ScalarValue};
5
+ use datafusion::logical_expr::{
6
+ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
7
+ };
8
+
9
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
10
+ pub(super) struct LixEmptyBlob;
11
+
12
+ impl ScalarUDFImpl for LixEmptyBlob {
13
+ fn as_any(&self) -> &dyn Any {
14
+ self
15
+ }
16
+
17
+ fn name(&self) -> &str {
18
+ "lix_empty_blob"
19
+ }
20
+
21
+ fn signature(&self) -> &Signature {
22
+ static SIGNATURE: std::sync::LazyLock<Signature> =
23
+ std::sync::LazyLock::new(|| Signature::nullary(Volatility::Immutable));
24
+ &SIGNATURE
25
+ }
26
+
27
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
28
+ Ok(DataType::Binary)
29
+ }
30
+
31
+ fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
32
+ Ok(ColumnarValue::Scalar(ScalarValue::Binary(Some(Vec::new()))))
33
+ }
34
+ }
35
+
36
+ #[cfg(test)]
37
+ mod tests {
38
+ use super::super::test_support::single_binary;
39
+
40
+ #[tokio::test]
41
+ async fn returns_empty_binary_value() {
42
+ assert_eq!(
43
+ single_binary("SELECT lix_empty_blob()").await,
44
+ Some(Vec::new())
45
+ );
46
+ }
47
+ }
@@ -0,0 +1,100 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use datafusion::arrow::array::{Array, StringArray};
5
+ use datafusion::arrow::datatypes::{DataType, FieldRef};
6
+ use datafusion::common::{plan_err, DataFusionError, Result, ScalarValue};
7
+ use datafusion::logical_expr::{
8
+ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
9
+ };
10
+ use serde_json::Value as JsonValue;
11
+
12
+ use crate::sql2::result_metadata::json_field;
13
+
14
+ use super::common::{scalar_inputs, text_like_value};
15
+
16
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
17
+ pub(super) struct LixJson;
18
+
19
+ impl ScalarUDFImpl for LixJson {
20
+ fn as_any(&self) -> &dyn Any {
21
+ self
22
+ }
23
+
24
+ fn name(&self) -> &str {
25
+ "lix_json"
26
+ }
27
+
28
+ fn signature(&self) -> &Signature {
29
+ static SIGNATURE: std::sync::LazyLock<Signature> =
30
+ std::sync::LazyLock::new(|| Signature::any(1, Volatility::Immutable));
31
+ &SIGNATURE
32
+ }
33
+
34
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
35
+ Ok(DataType::Utf8)
36
+ }
37
+
38
+ fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> {
39
+ Ok(Arc::new(json_field(self.name(), true)))
40
+ }
41
+
42
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
43
+ if args.args.len() != 1 {
44
+ return plan_err!("lix_json requires exactly 1 argument");
45
+ }
46
+ let scalar_inputs = scalar_inputs(&args.args);
47
+ let arrays = ColumnarValue::values_to_arrays(&args.args)?;
48
+ let input = &arrays[0];
49
+ let len = input.len();
50
+ let mut values = Vec::with_capacity(len);
51
+ for row in 0..len {
52
+ values.push(json_value(input.as_ref(), row)?);
53
+ }
54
+ if scalar_inputs {
55
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
56
+ values.into_iter().next().flatten(),
57
+ )))
58
+ } else {
59
+ Ok(ColumnarValue::Array(Arc::new(StringArray::from(values))))
60
+ }
61
+ }
62
+ }
63
+
64
+ fn json_value(array: &dyn Array, row: usize) -> Result<Option<String>> {
65
+ if matches!(array.data_type(), DataType::Null) {
66
+ return Ok(Some("null".to_string()));
67
+ }
68
+ let Some(raw) = text_like_value(array, row)? else {
69
+ return Ok(Some("null".to_string()));
70
+ };
71
+ let parsed = serde_json::from_str::<JsonValue>(&raw).map_err(|error| {
72
+ DataFusionError::Execution(format!(
73
+ "lix_json() expected valid JSON text, got error: {error}"
74
+ ))
75
+ })?;
76
+ Ok(Some(serde_json::to_string(&parsed).map_err(|error| {
77
+ DataFusionError::Execution(format!("lix_json() could not render JSON: {error}"))
78
+ })?))
79
+ }
80
+
81
+ #[cfg(test)]
82
+ mod tests {
83
+ use super::super::test_support::single_text;
84
+
85
+ #[tokio::test]
86
+ async fn canonicalizes_json_text() {
87
+ assert_eq!(
88
+ single_text("SELECT lix_json('{ \"name\" : \"Ada\" }')").await,
89
+ Some("{\"name\":\"Ada\"}".to_string())
90
+ );
91
+ }
92
+
93
+ #[tokio::test]
94
+ async fn null_input_returns_json_null() {
95
+ assert_eq!(
96
+ single_text("SELECT lix_json(NULL)").await,
97
+ Some("null".to_string())
98
+ );
99
+ }
100
+ }
@@ -0,0 +1,99 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use datafusion::arrow::array::StringArray;
5
+ use datafusion::arrow::datatypes::{DataType, FieldRef};
6
+ use datafusion::common::{plan_err, Result, ScalarValue};
7
+ use datafusion::logical_expr::{
8
+ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
9
+ };
10
+ use serde_json::Value as JsonValue;
11
+
12
+ use crate::sql2::result_metadata::json_field;
13
+
14
+ use super::common::{extract_json_path, json_json_value, scalar_inputs};
15
+
16
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
17
+ pub(super) struct LixJsonGet {
18
+ signature: Signature,
19
+ }
20
+
21
+ impl LixJsonGet {
22
+ pub(super) fn new() -> Self {
23
+ Self {
24
+ signature: Signature::variadic_any(Volatility::Immutable),
25
+ }
26
+ }
27
+ }
28
+
29
+ impl ScalarUDFImpl for LixJsonGet {
30
+ fn as_any(&self) -> &dyn Any {
31
+ self
32
+ }
33
+
34
+ fn name(&self) -> &str {
35
+ "lix_json_get"
36
+ }
37
+
38
+ fn signature(&self) -> &Signature {
39
+ &self.signature
40
+ }
41
+
42
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
43
+ Ok(DataType::Utf8)
44
+ }
45
+
46
+ fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> {
47
+ Ok(Arc::new(json_field(self.name(), true)))
48
+ }
49
+
50
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
51
+ if args.args.len() < 2 {
52
+ return plan_err!("lix_json_get requires at least 2 arguments");
53
+ }
54
+
55
+ let scalar_inputs = scalar_inputs(&args.args);
56
+ let arrays = ColumnarValue::values_to_arrays(&args.args)?;
57
+ let len = arrays.first().map(|array| array.len()).unwrap_or(1);
58
+
59
+ let mut values = Vec::with_capacity(len);
60
+ for row in 0..len {
61
+ values.push(match extract_json_path(self.name(), &arrays, row)? {
62
+ None | Some(JsonValue::Null) => None,
63
+ Some(other) => Some(json_json_value(&other)?),
64
+ });
65
+ }
66
+ if scalar_inputs {
67
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
68
+ values.into_iter().next().flatten(),
69
+ )))
70
+ } else {
71
+ Ok(ColumnarValue::Array(Arc::new(StringArray::from(values))))
72
+ }
73
+ }
74
+ }
75
+
76
+ #[cfg(test)]
77
+ mod tests {
78
+ use super::super::test_support::single_text;
79
+
80
+ #[tokio::test]
81
+ async fn returns_json_representation() {
82
+ assert_eq!(
83
+ single_text("SELECT lix_json_get('{\"name\":\"Ada\"}', 'name')").await,
84
+ Some("\"Ada\"".to_string())
85
+ );
86
+ assert_eq!(
87
+ single_text("SELECT lix_json_get('{\"tags\":[\"db\"]}', 'tags')").await,
88
+ Some("[\"db\"]".to_string())
89
+ );
90
+ }
91
+
92
+ #[tokio::test]
93
+ async fn missing_path_returns_null() {
94
+ assert_eq!(
95
+ single_text("SELECT lix_json_get('{\"name\":\"Ada\"}', 'missing')").await,
96
+ None
97
+ );
98
+ }
99
+ }
@@ -0,0 +1,99 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use datafusion::arrow::array::StringArray;
5
+ use datafusion::arrow::datatypes::DataType;
6
+ use datafusion::common::{plan_err, Result, ScalarValue};
7
+ use datafusion::logical_expr::{
8
+ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
9
+ };
10
+ use serde_json::Value as JsonValue;
11
+
12
+ use super::common::{extract_json_path, json_text_value, scalar_inputs};
13
+
14
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
15
+ pub(super) struct LixJsonGetText {
16
+ signature: Signature,
17
+ }
18
+
19
+ impl LixJsonGetText {
20
+ pub(super) fn new() -> Self {
21
+ Self {
22
+ signature: Signature::variadic_any(Volatility::Immutable),
23
+ }
24
+ }
25
+ }
26
+
27
+ impl ScalarUDFImpl for LixJsonGetText {
28
+ fn as_any(&self) -> &dyn Any {
29
+ self
30
+ }
31
+
32
+ fn name(&self) -> &str {
33
+ "lix_json_get_text"
34
+ }
35
+
36
+ fn signature(&self) -> &Signature {
37
+ &self.signature
38
+ }
39
+
40
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
41
+ Ok(DataType::Utf8)
42
+ }
43
+
44
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
45
+ if args.args.len() < 2 {
46
+ return plan_err!("lix_json_get_text requires at least 2 arguments");
47
+ }
48
+
49
+ let scalar_inputs = scalar_inputs(&args.args);
50
+ let arrays = ColumnarValue::values_to_arrays(&args.args)?;
51
+ let len = arrays.first().map(|array| array.len()).unwrap_or(1);
52
+
53
+ let mut values = Vec::with_capacity(len);
54
+ for row in 0..len {
55
+ values.push(match extract_json_path(self.name(), &arrays, row)? {
56
+ None | Some(JsonValue::Null) => None,
57
+ Some(JsonValue::Bool(value)) => Some(if value {
58
+ "true".to_string()
59
+ } else {
60
+ "false".to_string()
61
+ }),
62
+ Some(JsonValue::String(value)) => Some(value),
63
+ Some(other) => Some(json_text_value(&other)?),
64
+ });
65
+ }
66
+ if scalar_inputs {
67
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
68
+ values.into_iter().next().flatten(),
69
+ )))
70
+ } else {
71
+ Ok(ColumnarValue::Array(Arc::new(StringArray::from(values))))
72
+ }
73
+ }
74
+ }
75
+
76
+ #[cfg(test)]
77
+ mod tests {
78
+ use super::super::test_support::single_text;
79
+
80
+ #[tokio::test]
81
+ async fn returns_unwrapped_text() {
82
+ assert_eq!(
83
+ single_text("SELECT lix_json_get_text('{\"name\":\"Ada\"}', 'name')").await,
84
+ Some("Ada".to_string())
85
+ );
86
+ assert_eq!(
87
+ single_text("SELECT lix_json_get_text('{\"active\":true}', 'active')").await,
88
+ Some("true".to_string())
89
+ );
90
+ }
91
+
92
+ #[tokio::test]
93
+ async fn missing_path_returns_null() {
94
+ assert_eq!(
95
+ single_text("SELECT lix_json_get_text('{\"name\":\"Ada\"}', 'missing')").await,
96
+ None
97
+ );
98
+ }
99
+ }