@lix-js/sdk 0.6.0-preview.3 → 0.6.0-preview.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/README.md +1 -1
  2. package/SKILL.md +105 -65
  3. package/dist/engine-wasm/index.js +4 -4
  4. package/dist/engine-wasm/wasm/lix_engine.d.ts +30 -6
  5. package/dist/engine-wasm/wasm/lix_engine.js +187 -117
  6. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  7. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +14 -8
  8. package/dist/generated/builtin-schemas.d.ts +69 -69
  9. package/dist/generated/builtin-schemas.js +94 -94
  10. package/dist/open-lix.d.ts +42 -28
  11. package/dist/open-lix.js +49 -10
  12. package/dist/sqlite/index.js +86 -30
  13. package/dist-engine-src/README.md +3 -3
  14. package/dist-engine-src/src/backend/capabilities.rs +67 -0
  15. package/dist-engine-src/src/backend/conformance/baseline.rs +1127 -0
  16. package/dist-engine-src/src/backend/conformance/factory.rs +93 -0
  17. package/dist-engine-src/src/backend/conformance/failure_tests.rs +608 -0
  18. package/dist-engine-src/src/backend/conformance/fixtures.rs +26 -0
  19. package/dist-engine-src/src/backend/conformance/mod.rs +75 -0
  20. package/dist-engine-src/src/backend/conformance/model.rs +28 -0
  21. package/dist-engine-src/src/backend/conformance/model_based.rs +257 -0
  22. package/dist-engine-src/src/backend/conformance/persistence.rs +204 -0
  23. package/dist-engine-src/src/backend/conformance/projection.rs +21 -0
  24. package/dist-engine-src/src/backend/conformance/pushdown.rs +24 -0
  25. package/dist-engine-src/src/backend/conformance/runner.rs +90 -0
  26. package/dist-engine-src/src/backend/conformance/scan.rs +24 -0
  27. package/dist-engine-src/src/backend/conformance/write.rs +16 -0
  28. package/dist-engine-src/src/backend/error.rs +94 -0
  29. package/dist-engine-src/src/backend/in_memory.rs +670 -0
  30. package/dist-engine-src/src/backend/mod.rs +36 -9
  31. package/dist-engine-src/src/backend/predicate.rs +80 -0
  32. package/dist-engine-src/src/backend/traits.rs +260 -0
  33. package/dist-engine-src/src/backend/types.rs +224 -81
  34. package/dist-engine-src/src/binary_cas/context.rs +8 -8
  35. package/dist-engine-src/src/binary_cas/kv.rs +234 -259
  36. package/dist-engine-src/src/{version → branch}/context.rs +12 -12
  37. package/dist-engine-src/src/branch/lifecycle.rs +221 -0
  38. package/dist-engine-src/src/branch/mod.rs +13 -0
  39. package/dist-engine-src/src/branch/refs.rs +321 -0
  40. package/dist-engine-src/src/branch/stage_rows.rs +67 -0
  41. package/dist-engine-src/src/branch/types.rs +21 -0
  42. package/dist-engine-src/src/catalog/context.rs +18 -18
  43. package/dist-engine-src/src/catalog/snapshot.rs +8 -8
  44. package/dist-engine-src/src/changelog/bench_support.rs +785 -0
  45. package/dist-engine-src/src/changelog/change.rs +1 -0
  46. package/dist-engine-src/src/changelog/codec.rs +497 -0
  47. package/dist-engine-src/src/changelog/commit.rs +1 -0
  48. package/dist-engine-src/src/changelog/context.rs +1614 -0
  49. package/dist-engine-src/src/changelog/mod.rs +29 -0
  50. package/dist-engine-src/src/changelog/store.rs +163 -0
  51. package/dist-engine-src/src/changelog/test_support.rs +54 -0
  52. package/dist-engine-src/src/changelog/types.rs +213 -0
  53. package/dist-engine-src/src/commit_graph/context.rs +317 -274
  54. package/dist-engine-src/src/commit_graph/mod.rs +2 -4
  55. package/dist-engine-src/src/commit_graph/types.rs +22 -42
  56. package/dist-engine-src/src/commit_graph/walker.rs +133 -103
  57. package/dist-engine-src/src/common/error.rs +52 -18
  58. package/dist-engine-src/src/common/identity.rs +2 -2
  59. package/dist-engine-src/src/common/mod.rs +1 -1
  60. package/dist-engine-src/src/domain.rs +42 -46
  61. package/dist-engine-src/src/engine.rs +74 -96
  62. package/dist-engine-src/src/{entity_identity.rs → entity_pk.rs} +89 -92
  63. package/dist-engine-src/src/functions/context.rs +56 -52
  64. package/dist-engine-src/src/functions/state.rs +51 -52
  65. package/dist-engine-src/src/init.rs +288 -154
  66. package/dist-engine-src/src/json_store/context.rs +15 -266
  67. package/dist-engine-src/src/json_store/mod.rs +26 -0
  68. package/dist-engine-src/src/json_store/store.rs +103 -718
  69. package/dist-engine-src/src/json_store/types.rs +4 -9
  70. package/dist-engine-src/src/lib.rs +49 -19
  71. package/dist-engine-src/src/live_state/context.rs +654 -790
  72. package/dist-engine-src/src/live_state/mod.rs +9 -3
  73. package/dist-engine-src/src/live_state/overlay.rs +4 -4
  74. package/dist-engine-src/src/live_state/types.rs +30 -21
  75. package/dist-engine-src/src/live_state/visibility.rs +514 -71
  76. package/dist-engine-src/src/plugin/install.rs +48 -48
  77. package/dist-engine-src/src/plugin/manifest.rs +7 -7
  78. package/dist-engine-src/src/plugin/materializer.rs +0 -275
  79. package/dist-engine-src/src/plugin/plugin_manifest.json +4 -3
  80. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +2 -2
  81. package/dist-engine-src/src/schema/builtin/lix_branch_descriptor.json +34 -0
  82. package/dist-engine-src/src/schema/builtin/lix_branch_ref.json +48 -0
  83. package/dist-engine-src/src/schema/builtin/lix_change.json +3 -3
  84. package/dist-engine-src/src/schema/builtin/lix_commit.json +1 -1
  85. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +6 -6
  86. package/dist-engine-src/src/schema/builtin/mod.rs +18 -20
  87. package/dist-engine-src/src/schema/compatibility.rs +11 -11
  88. package/dist-engine-src/src/schema/definition.json +2 -2
  89. package/dist-engine-src/src/schema/definition.rs +5 -5
  90. package/dist-engine-src/src/schema/key.rs +3 -3
  91. package/dist-engine-src/src/schema/mod.rs +1 -1
  92. package/dist-engine-src/src/schema/tests.rs +18 -18
  93. package/dist-engine-src/src/session/context.rs +819 -124
  94. package/dist-engine-src/src/session/create_branch.rs +94 -0
  95. package/dist-engine-src/src/session/execute.rs +260 -57
  96. package/dist-engine-src/src/session/merge/analysis.rs +9 -3
  97. package/dist-engine-src/src/session/merge/{version.rs → branch.rs} +119 -129
  98. package/dist-engine-src/src/session/merge/conflicts.rs +2 -2
  99. package/dist-engine-src/src/session/merge/mod.rs +5 -6
  100. package/dist-engine-src/src/session/merge/stats.rs +7 -11
  101. package/dist-engine-src/src/session/mod.rs +19 -16
  102. package/dist-engine-src/src/session/switch_branch.rs +113 -0
  103. package/dist-engine-src/src/session/transaction.rs +557 -0
  104. package/dist-engine-src/src/sql2/bind/classify.rs +102 -0
  105. package/dist-engine-src/src/sql2/bind/error.rs +5 -0
  106. package/dist-engine-src/src/sql2/bind/expr.rs +29 -0
  107. package/dist-engine-src/src/sql2/bind/mod.rs +12 -0
  108. package/dist-engine-src/src/sql2/{udfs/public_call.rs → bind/public_udf.rs} +98 -3
  109. package/dist-engine-src/src/sql2/bind/read.rs +65 -0
  110. package/dist-engine-src/src/sql2/bind/statement.rs +2236 -0
  111. package/dist-engine-src/src/sql2/bind/table.rs +273 -0
  112. package/dist-engine-src/src/sql2/bind/write.rs +86 -0
  113. package/dist-engine-src/src/sql2/branch_scope.rs +436 -0
  114. package/dist-engine-src/src/sql2/catalog/capability.rs +20 -0
  115. package/dist-engine-src/src/sql2/catalog/entity_surface.rs +296 -0
  116. package/dist-engine-src/src/sql2/catalog/mod.rs +15 -0
  117. package/dist-engine-src/src/sql2/catalog/registry.rs +556 -0
  118. package/dist-engine-src/src/sql2/catalog/schema.rs +88 -0
  119. package/dist-engine-src/src/sql2/catalog/surface.rs +41 -0
  120. package/dist-engine-src/src/sql2/change_materialization.rs +122 -0
  121. package/dist-engine-src/src/sql2/context.rs +36 -30
  122. package/dist-engine-src/src/sql2/error.rs +4 -5
  123. package/dist-engine-src/src/sql2/exec/bound_public_write.rs +1593 -0
  124. package/dist-engine-src/src/sql2/exec/datafusion.rs +5266 -0
  125. package/dist-engine-src/src/sql2/exec/fast_write.rs +82 -0
  126. package/dist-engine-src/src/sql2/exec/mod.rs +24 -0
  127. package/dist-engine-src/src/sql2/exec/write.rs +661 -0
  128. package/dist-engine-src/src/sql2/filesystem_planner.rs +72 -77
  129. package/dist-engine-src/src/sql2/filesystem_visibility.rs +21 -21
  130. package/dist-engine-src/src/sql2/history_projection.rs +8 -8
  131. package/dist-engine-src/src/sql2/history_route.rs +35 -31
  132. package/dist-engine-src/src/sql2/mod.rs +30 -24
  133. package/dist-engine-src/src/sql2/optimize/datafusion.rs +1 -0
  134. package/dist-engine-src/src/sql2/optimize/mod.rs +2 -0
  135. package/dist-engine-src/src/sql2/optimize/simple_write.rs +116 -0
  136. package/dist-engine-src/src/sql2/parse/mod.rs +69 -0
  137. package/dist-engine-src/src/sql2/parse/normalize.rs +1 -0
  138. package/dist-engine-src/src/sql2/plan/branch_scope.rs +24 -0
  139. package/dist-engine-src/src/sql2/plan/mod.rs +5 -0
  140. package/dist-engine-src/src/sql2/plan/predicate.rs +22 -0
  141. package/dist-engine-src/src/sql2/plan/write.rs +147 -0
  142. package/dist-engine-src/src/sql2/predicate_typecheck.rs +258 -0
  143. package/dist-engine-src/src/sql2/{version_provider.rs → providers/branch.rs} +218 -214
  144. package/dist-engine-src/src/sql2/{change_provider.rs → providers/change.rs} +156 -42
  145. package/dist-engine-src/src/sql2/{directory_provider.rs → providers/directory.rs} +291 -322
  146. package/dist-engine-src/src/sql2/{directory_history_provider.rs → providers/directory_history.rs} +56 -42
  147. package/dist-engine-src/src/sql2/providers/entity.rs +1484 -0
  148. package/dist-engine-src/src/sql2/{entity_history_provider.rs → providers/entity_history.rs} +43 -31
  149. package/dist-engine-src/src/sql2/{file_provider.rs → providers/file.rs} +323 -316
  150. package/dist-engine-src/src/sql2/{file_history_provider.rs → providers/file_history.rs} +60 -46
  151. package/dist-engine-src/src/sql2/{history_provider.rs → providers/history.rs} +46 -32
  152. package/dist-engine-src/src/sql2/{lix_state_provider.rs → providers/lix_state.rs} +359 -329
  153. package/dist-engine-src/src/sql2/providers/mod.rs +508 -0
  154. package/dist-engine-src/src/sql2/read_only.rs +2 -2
  155. package/dist-engine-src/src/sql2/session.rs +47 -96
  156. package/dist-engine-src/src/sql2/storage/constraints.rs +1 -0
  157. package/dist-engine-src/src/sql2/storage/mod.rs +1 -0
  158. package/dist-engine-src/src/sql2/test_support/differential.rs +712 -0
  159. package/dist-engine-src/src/sql2/test_support/generators.rs +354 -0
  160. package/dist-engine-src/src/sql2/test_support/mod.rs +2 -0
  161. package/dist-engine-src/src/sql2/udfs/{lix_active_version_commit_id.rs → lix_active_branch_commit_id.rs} +7 -7
  162. package/dist-engine-src/src/sql2/udfs/mod.rs +3 -6
  163. package/dist-engine-src/src/sql2/write_normalization.rs +45 -22
  164. package/dist-engine-src/src/storage/conformance.rs +399 -0
  165. package/dist-engine-src/src/storage/context.rs +552 -288
  166. package/dist-engine-src/src/storage/mod.rs +48 -10
  167. package/dist-engine-src/src/storage/point.rs +440 -0
  168. package/dist-engine-src/src/storage/read_scope.rs +43 -64
  169. package/dist-engine-src/src/storage/reader.rs +867 -0
  170. package/dist-engine-src/src/storage/scan.rs +784 -0
  171. package/dist-engine-src/src/storage/spaces.rs +236 -0
  172. package/dist-engine-src/src/storage/stats.rs +80 -0
  173. package/dist-engine-src/src/storage/write_set.rs +962 -0
  174. package/dist-engine-src/src/storage_bench.rs +136 -4828
  175. package/dist-engine-src/src/test_support.rs +360 -138
  176. package/dist-engine-src/src/tracked_state/bench_support.rs +394 -0
  177. package/dist-engine-src/src/tracked_state/codec.rs +155 -1057
  178. package/dist-engine-src/src/tracked_state/commit_root_rebuild.rs +358 -0
  179. package/dist-engine-src/src/tracked_state/context.rs +1927 -993
  180. package/dist-engine-src/src/tracked_state/diff.rs +1715 -261
  181. package/dist-engine-src/src/tracked_state/merge.rs +74 -88
  182. package/dist-engine-src/src/tracked_state/mod.rs +19 -16
  183. package/dist-engine-src/src/tracked_state/{materialization.rs → row_materialization.rs} +50 -178
  184. package/dist-engine-src/src/tracked_state/storage.rs +243 -191
  185. package/dist-engine-src/src/tracked_state/tree.rs +247 -371
  186. package/dist-engine-src/src/tracked_state/types.rs +49 -42
  187. package/dist-engine-src/src/transaction/bench_support.rs +407 -0
  188. package/dist-engine-src/src/transaction/commit.rs +821 -713
  189. package/dist-engine-src/src/transaction/context.rs +705 -600
  190. package/dist-engine-src/src/transaction/mod.rs +13 -2
  191. package/dist-engine-src/src/transaction/normalization.rs +63 -76
  192. package/dist-engine-src/src/transaction/prep.rs +13 -13
  193. package/dist-engine-src/src/transaction/schema_resolver.rs +19 -5
  194. package/dist-engine-src/src/transaction/staging.rs +228 -434
  195. package/dist-engine-src/src/transaction/types.rs +41 -98
  196. package/dist-engine-src/src/transaction/validation.rs +382 -446
  197. package/dist-engine-src/src/untracked_state/codec.rs +337 -29
  198. package/dist-engine-src/src/untracked_state/context.rs +7 -7
  199. package/dist-engine-src/src/untracked_state/materialization.rs +2 -2
  200. package/dist-engine-src/src/untracked_state/mod.rs +1 -1
  201. package/dist-engine-src/src/untracked_state/storage.rs +659 -157
  202. package/dist-engine-src/src/untracked_state/types.rs +21 -21
  203. package/package.json +71 -68
  204. package/dist-engine-src/src/backend/kv.rs +0 -358
  205. package/dist-engine-src/src/backend/testing.rs +0 -658
  206. package/dist-engine-src/src/commit_store/codec.rs +0 -887
  207. package/dist-engine-src/src/commit_store/context.rs +0 -944
  208. package/dist-engine-src/src/commit_store/materialization.rs +0 -84
  209. package/dist-engine-src/src/commit_store/mod.rs +0 -16
  210. package/dist-engine-src/src/commit_store/storage.rs +0 -600
  211. package/dist-engine-src/src/commit_store/types.rs +0 -215
  212. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -34
  213. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -48
  214. package/dist-engine-src/src/session/create_version.rs +0 -88
  215. package/dist-engine-src/src/session/merge/apply.rs +0 -23
  216. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +0 -100
  217. package/dist-engine-src/src/session/switch_version.rs +0 -109
  218. package/dist-engine-src/src/sql2/classify.rs +0 -182
  219. package/dist-engine-src/src/sql2/entity_provider.rs +0 -3211
  220. package/dist-engine-src/src/sql2/execute.rs +0 -3440
  221. package/dist-engine-src/src/sql2/public_bind/assignment.rs +0 -46
  222. package/dist-engine-src/src/sql2/public_bind/capability.rs +0 -41
  223. package/dist-engine-src/src/sql2/public_bind/dml.rs +0 -166
  224. package/dist-engine-src/src/sql2/public_bind/mod.rs +0 -25
  225. package/dist-engine-src/src/sql2/public_bind/table.rs +0 -168
  226. package/dist-engine-src/src/sql2/version_scope.rs +0 -394
  227. package/dist-engine-src/src/storage/types.rs +0 -501
  228. package/dist-engine-src/src/tracked_state/by_file_index.rs +0 -98
  229. package/dist-engine-src/src/tracked_state/materializer.rs +0 -488
  230. package/dist-engine-src/src/transaction/live_state_overlay.rs +0 -35
  231. package/dist-engine-src/src/version/lifecycle.rs +0 -221
  232. package/dist-engine-src/src/version/mod.rs +0 -13
  233. package/dist-engine-src/src/version/refs.rs +0 -330
  234. package/dist-engine-src/src/version/stage_rows.rs +0 -67
  235. package/dist-engine-src/src/version/types.rs +0 -21
@@ -1,4 +1,8 @@
1
+ use std::collections::{BTreeMap, BTreeSet};
2
+
1
3
  use datafusion::arrow::datatypes::{Field, Schema};
4
+ use datafusion::common::metadata::FieldMetadata;
5
+ use datafusion::common::tree_node::{Transformed, TreeNode};
2
6
  use datafusion::common::{DFSchema, DataFusionError, ScalarValue};
3
7
  use datafusion::logical_expr::expr::{Between, InList};
4
8
  use datafusion::logical_expr::{BinaryExpr, Expr, Like, Operator};
@@ -19,6 +23,17 @@ pub(crate) fn validate_json_predicate_filters(
19
23
  Ok(())
20
24
  }
21
25
 
26
+ pub(crate) fn canonicalize_json_identity_text_filters(
27
+ schema: &Schema,
28
+ filters: &[Expr],
29
+ ) -> Result<Vec<Expr>, DataFusionError> {
30
+ filters
31
+ .iter()
32
+ .cloned()
33
+ .map(|filter| canonicalize_json_identity_text_filter(schema, filter))
34
+ .collect()
35
+ }
36
+
22
37
  pub(crate) fn validate_json_predicate_expr_with_dfschema(
23
38
  schema: &DFSchema,
24
39
  expr: &Expr,
@@ -31,6 +46,140 @@ pub(crate) fn validate_json_predicate_expr_with_dfschema(
31
46
  })
32
47
  }
33
48
 
49
+ pub(crate) fn json_predicate_placeholder_indexes_with_dfschema(
50
+ schema: &DFSchema,
51
+ expr: &Expr,
52
+ ) -> BTreeSet<usize> {
53
+ let mut indexes = BTreeSet::new();
54
+ collect_json_predicate_placeholder_indexes(expr, &mut indexes, &|column| {
55
+ schema
56
+ .field_with_name(column.relation.as_ref(), &column.name)
57
+ .ok()
58
+ .map(|field| field.as_ref())
59
+ });
60
+ indexes
61
+ }
62
+
63
+ fn canonicalize_json_identity_text_filter(
64
+ schema: &Schema,
65
+ expr: Expr,
66
+ ) -> Result<Expr, DataFusionError> {
67
+ expr.transform(|expr| canonicalize_json_identity_text_expr(schema, expr))
68
+ .map(|transformed| transformed.data)
69
+ }
70
+
71
+ fn canonicalize_json_identity_text_expr(
72
+ schema: &Schema,
73
+ expr: Expr,
74
+ ) -> Result<Transformed<Expr>, DataFusionError> {
75
+ match expr {
76
+ Expr::BinaryExpr(binary) if is_comparison_operator(binary.op) => {
77
+ canonicalize_json_identity_text_binary(schema, binary)
78
+ }
79
+ Expr::InList(in_list) => canonicalize_json_identity_text_in_list(schema, in_list),
80
+ _ => Ok(Transformed::no(expr)),
81
+ }
82
+ }
83
+
84
+ fn canonicalize_json_identity_text_binary(
85
+ schema: &Schema,
86
+ binary: BinaryExpr,
87
+ ) -> Result<Transformed<Expr>, DataFusionError> {
88
+ let BinaryExpr { left, op, right } = binary;
89
+ let left_identity_json = is_identity_json_expr_for_arrow_schema(schema, &left);
90
+ let right_identity_json = is_identity_json_expr_for_arrow_schema(schema, &right);
91
+ let left = if right_identity_json {
92
+ Box::new(canonicalize_json_text_literal(*left)?)
93
+ } else {
94
+ left
95
+ };
96
+ let right = if left_identity_json {
97
+ Box::new(canonicalize_json_text_literal(*right)?)
98
+ } else {
99
+ right
100
+ };
101
+ Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr::new(
102
+ left, op, right,
103
+ ))))
104
+ }
105
+
106
+ fn canonicalize_json_identity_text_in_list(
107
+ schema: &Schema,
108
+ in_list: InList,
109
+ ) -> Result<Transformed<Expr>, DataFusionError> {
110
+ let expr_identity_json = is_identity_json_expr_for_arrow_schema(schema, &in_list.expr);
111
+ let list_has_identity_json = in_list
112
+ .list
113
+ .iter()
114
+ .any(|item| is_identity_json_expr_for_arrow_schema(schema, item));
115
+
116
+ let expr = if list_has_identity_json {
117
+ Box::new(canonicalize_json_text_literal(*in_list.expr)?)
118
+ } else {
119
+ in_list.expr
120
+ };
121
+ let list = if expr_identity_json {
122
+ in_list
123
+ .list
124
+ .into_iter()
125
+ .map(canonicalize_json_text_literal)
126
+ .collect::<Result<Vec<_>, _>>()?
127
+ } else {
128
+ in_list.list
129
+ };
130
+
131
+ Ok(Transformed::yes(Expr::InList(InList::new(
132
+ expr,
133
+ list,
134
+ in_list.negated,
135
+ ))))
136
+ }
137
+
138
+ fn canonicalize_json_text_literal(expr: Expr) -> Result<Expr, DataFusionError> {
139
+ let Expr::Literal(literal, metadata) = expr else {
140
+ return Ok(expr);
141
+ };
142
+ let canonical = match &literal {
143
+ ScalarValue::Utf8(Some(value))
144
+ | ScalarValue::Utf8View(Some(value))
145
+ | ScalarValue::LargeUtf8(Some(value)) => Some(canonical_json_text(value)?),
146
+ _ => None,
147
+ };
148
+ Ok(match canonical {
149
+ Some(value) => Expr::Literal(ScalarValue::Utf8(Some(value)), Some(json_field_metadata())),
150
+ None => Expr::Literal(literal, metadata),
151
+ })
152
+ }
153
+
154
+ fn canonical_json_text(raw: &str) -> Result<String, DataFusionError> {
155
+ serde_json::from_str::<serde_json::Value>(raw)
156
+ .map(|value| value.to_string())
157
+ .map_err(|error| {
158
+ lix_error_to_datafusion_error(LixError::new(
159
+ LixError::CODE_TYPE_MISMATCH,
160
+ format!("JSON comparison value is not valid JSON: {error}"),
161
+ ))
162
+ })
163
+ }
164
+
165
+ fn json_field_metadata() -> FieldMetadata {
166
+ FieldMetadata::new(BTreeMap::from([(
167
+ LIX_VALUE_TYPE_METADATA_KEY.to_string(),
168
+ LIX_VALUE_TYPE_JSON.to_string(),
169
+ )]))
170
+ }
171
+
172
+ fn is_identity_json_expr_for_arrow_schema(schema: &Schema, expr: &Expr) -> bool {
173
+ is_identity_json_expr(expr)
174
+ && is_json_expr(expr, &|column| {
175
+ schema
176
+ .fields()
177
+ .iter()
178
+ .find(|field| field.name() == &column.name)
179
+ .map(|field| field.as_ref())
180
+ })
181
+ }
182
+
34
183
  fn validate_json_predicate_expr_with_arrow_schema(
35
184
  schema: &Schema,
36
185
  expr: &Expr,
@@ -101,6 +250,105 @@ fn validate_expr<'a>(
101
250
  }
102
251
  }
103
252
 
253
+ fn collect_json_predicate_placeholder_indexes<'a>(
254
+ expr: &'a Expr,
255
+ indexes: &mut BTreeSet<usize>,
256
+ lookup_field: &impl Fn(&datafusion::common::Column) -> Option<&'a Field>,
257
+ ) {
258
+ match expr {
259
+ Expr::BinaryExpr(binary) if is_comparison_operator(binary.op) => {
260
+ collect_json_predicate_placeholder_indexes(&binary.left, indexes, lookup_field);
261
+ collect_json_predicate_placeholder_indexes(&binary.right, indexes, lookup_field);
262
+ if is_json_expr(&binary.left, lookup_field) {
263
+ collect_placeholder_indexes(&binary.right, indexes);
264
+ }
265
+ if is_json_expr(&binary.right, lookup_field) {
266
+ collect_placeholder_indexes(&binary.left, indexes);
267
+ }
268
+ }
269
+ Expr::BinaryExpr(binary) => {
270
+ collect_json_predicate_placeholder_indexes(&binary.left, indexes, lookup_field);
271
+ collect_json_predicate_placeholder_indexes(&binary.right, indexes, lookup_field);
272
+ }
273
+ Expr::InList(in_list) => {
274
+ collect_json_predicate_placeholder_indexes(&in_list.expr, indexes, lookup_field);
275
+ for item in &in_list.list {
276
+ collect_json_predicate_placeholder_indexes(item, indexes, lookup_field);
277
+ }
278
+ if is_json_expr(&in_list.expr, lookup_field) {
279
+ for item in &in_list.list {
280
+ collect_placeholder_indexes(item, indexes);
281
+ }
282
+ }
283
+ for item in &in_list.list {
284
+ if is_json_expr(item, lookup_field) {
285
+ collect_placeholder_indexes(&in_list.expr, indexes);
286
+ }
287
+ }
288
+ }
289
+ Expr::Between(between) => {
290
+ collect_json_predicate_placeholder_indexes(&between.expr, indexes, lookup_field);
291
+ collect_json_predicate_placeholder_indexes(&between.low, indexes, lookup_field);
292
+ collect_json_predicate_placeholder_indexes(&between.high, indexes, lookup_field);
293
+ if is_json_expr(&between.expr, lookup_field) {
294
+ collect_placeholder_indexes(&between.low, indexes);
295
+ collect_placeholder_indexes(&between.high, indexes);
296
+ }
297
+ }
298
+ Expr::Alias(alias) => {
299
+ collect_json_predicate_placeholder_indexes(&alias.expr, indexes, lookup_field)
300
+ }
301
+ Expr::Not(inner)
302
+ | Expr::IsNotNull(inner)
303
+ | Expr::IsNull(inner)
304
+ | Expr::IsTrue(inner)
305
+ | Expr::IsFalse(inner)
306
+ | Expr::IsUnknown(inner)
307
+ | Expr::IsNotTrue(inner)
308
+ | Expr::IsNotFalse(inner)
309
+ | Expr::IsNotUnknown(inner)
310
+ | Expr::Negative(inner) => {
311
+ collect_json_predicate_placeholder_indexes(inner, indexes, lookup_field)
312
+ }
313
+ Expr::Cast(cast) => {
314
+ collect_json_predicate_placeholder_indexes(&cast.expr, indexes, lookup_field)
315
+ }
316
+ Expr::TryCast(cast) => {
317
+ collect_json_predicate_placeholder_indexes(&cast.expr, indexes, lookup_field)
318
+ }
319
+ Expr::ScalarFunction(function) => {
320
+ for arg in &function.args {
321
+ collect_json_predicate_placeholder_indexes(arg, indexes, lookup_field);
322
+ }
323
+ }
324
+ Expr::Case(case) => {
325
+ if let Some(expr) = &case.expr {
326
+ collect_json_predicate_placeholder_indexes(expr, indexes, lookup_field);
327
+ }
328
+ for (when, then) in &case.when_then_expr {
329
+ collect_json_predicate_placeholder_indexes(when, indexes, lookup_field);
330
+ collect_json_predicate_placeholder_indexes(then, indexes, lookup_field);
331
+ }
332
+ if let Some(expr) = &case.else_expr {
333
+ collect_json_predicate_placeholder_indexes(expr, indexes, lookup_field);
334
+ }
335
+ }
336
+ _ => {}
337
+ }
338
+ }
339
+
340
+ fn collect_placeholder_indexes(expr: &Expr, indexes: &mut BTreeSet<usize>) {
341
+ if let Expr::Placeholder(placeholder) = expr {
342
+ if let Some(index) = placeholder
343
+ .id
344
+ .strip_prefix('$')
345
+ .and_then(|value| value.parse::<usize>().ok())
346
+ {
347
+ indexes.insert(index);
348
+ }
349
+ }
350
+ }
351
+
104
352
  fn validate_binary_expr<'a>(
105
353
  binary: &'a BinaryExpr,
106
354
  lookup_field: &impl Fn(&datafusion::common::Column) -> Option<&'a Field>,
@@ -219,6 +467,16 @@ fn is_json_expr<'a>(
219
467
  }
220
468
  }
221
469
 
470
+ fn is_identity_json_expr(expr: &Expr) -> bool {
471
+ match expr {
472
+ Expr::Column(column) => matches!(column.name.as_str(), "entity_pk" | "lixcol_entity_pk"),
473
+ Expr::Alias(alias) => is_identity_json_expr(&alias.expr),
474
+ Expr::Cast(cast) => is_identity_json_expr(&cast.expr),
475
+ Expr::TryCast(cast) => is_identity_json_expr(&cast.expr),
476
+ _ => false,
477
+ }
478
+ }
479
+
222
480
  fn is_null_literal(expr: &Expr) -> bool {
223
481
  matches!(expr, Expr::Literal(value, _) if matches!(value, ScalarValue::Null))
224
482
  }