@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,657 @@
1
+ use std::collections::BTreeMap;
2
+ use std::sync::Arc;
3
+
4
+ use datafusion::common::ScalarValue;
5
+ use datafusion::logical_expr::expr::InList;
6
+ use datafusion::logical_expr::{Expr, Operator};
7
+ use tokio::sync::Mutex;
8
+
9
+ use crate::commit_graph::{CommitGraphChangeHistoryRequest, CommitGraphReader};
10
+ use crate::entity_identity::EntityIdentity;
11
+ use crate::LixError;
12
+
13
+ use super::SqlJsonReader;
14
+ use crate::commit_store::{materialize_change, MaterializedChange};
15
+
16
+ /// Shared routing state for commit-shaped history SQL surfaces.
17
+ ///
18
+ /// History providers differ in how they shape rows, but they should not drift
19
+ /// in how they interpret filters such as `start_commit_id IN (...)`, entity
20
+ /// filters, or depth ranges.
21
+ #[derive(Debug, Clone, Default, PartialEq, Eq)]
22
+ pub(crate) struct HistoryRoute {
23
+ pub(crate) start_commit_ids: Vec<String>,
24
+ pub(crate) entity_ids: Vec<String>,
25
+ pub(crate) schema_keys: Vec<String>,
26
+ pub(crate) file_ids: Vec<String>,
27
+ pub(crate) min_depth: Option<i64>,
28
+ pub(crate) max_depth: Option<i64>,
29
+ pub(crate) contradictory: bool,
30
+ }
31
+
32
+ impl HistoryRoute {
33
+ pub(crate) fn from_filters(filters: &[Expr], column_style: HistoryColumnStyle) -> Self {
34
+ let mut route = Self::default();
35
+ for filter in filters {
36
+ apply_history_filter(filter, &mut route, column_style);
37
+ }
38
+ route
39
+ }
40
+
41
+ /// Returns the part of the route that is safe to apply before a shaped
42
+ /// history provider has built its output rows.
43
+ ///
44
+ /// Surface providers such as `lix_file_history` may be caused by different
45
+ /// canonical event schemas than the schema they expose. For those providers,
46
+ /// identity/schema filters must be evaluated against the shaped output row,
47
+ /// not against the canonical event row.
48
+ pub(crate) fn traversal_only(&self) -> Self {
49
+ Self {
50
+ start_commit_ids: self.start_commit_ids.clone(),
51
+ min_depth: self.min_depth,
52
+ max_depth: self.max_depth,
53
+ contradictory: self.contradictory,
54
+ ..Self::default()
55
+ }
56
+ }
57
+
58
+ /// Returns only the explicit history starts.
59
+ ///
60
+ /// Shaped history providers use this for context loading: path/data shaping
61
+ /// often needs ancestor descriptor rows even when the event route is
62
+ /// restricted to a specific depth.
63
+ pub(crate) fn starts_only(&self) -> Self {
64
+ Self {
65
+ start_commit_ids: self.start_commit_ids.clone(),
66
+ contradictory: self.contradictory,
67
+ ..Self::default()
68
+ }
69
+ }
70
+
71
+ pub(crate) fn is_contradictory(&self) -> bool {
72
+ self.contradictory
73
+ || self
74
+ .min_depth
75
+ .zip(self.max_depth)
76
+ .is_some_and(|(min, max)| min > max)
77
+ || self.min_depth.is_some_and(|depth| depth < 0)
78
+ || self.max_depth.is_some_and(|depth| depth < 0)
79
+ }
80
+
81
+ /// Checks filters that refer to the row exposed by a shaped history surface.
82
+ pub(crate) fn matches_surface_row(
83
+ &self,
84
+ schema_key: &str,
85
+ entity_id: &str,
86
+ file_id: Option<&str>,
87
+ depth: u32,
88
+ ) -> bool {
89
+ if self.is_contradictory() {
90
+ return false;
91
+ }
92
+ if !self.schema_keys.is_empty()
93
+ && !self
94
+ .schema_keys
95
+ .iter()
96
+ .any(|candidate| candidate == schema_key)
97
+ {
98
+ return false;
99
+ }
100
+ if !self.entity_ids.is_empty()
101
+ && !self
102
+ .entity_ids
103
+ .iter()
104
+ .any(|candidate| candidate == entity_id)
105
+ {
106
+ return false;
107
+ }
108
+ if !self.file_ids.is_empty() {
109
+ let Some(file_id) = file_id else {
110
+ return false;
111
+ };
112
+ if !self.file_ids.iter().any(|candidate| candidate == file_id) {
113
+ return false;
114
+ }
115
+ }
116
+ if self
117
+ .min_depth
118
+ .is_some_and(|min_depth| i64::from(depth) < min_depth)
119
+ {
120
+ return false;
121
+ }
122
+ if self
123
+ .max_depth
124
+ .is_some_and(|max_depth| i64::from(depth) > max_depth)
125
+ {
126
+ return false;
127
+ }
128
+ true
129
+ }
130
+ }
131
+
132
+ /// Commit-graph history entry enriched with commit metadata needed by SQL
133
+ /// history surfaces.
134
+ #[derive(Debug, Clone)]
135
+ pub(crate) struct HistoryEntry {
136
+ pub(crate) change: MaterializedChange,
137
+ pub(crate) observed_commit_id: String,
138
+ pub(crate) commit_created_at: String,
139
+ pub(crate) start_commit_id: String,
140
+ pub(crate) depth: u32,
141
+ }
142
+
143
+ pub(crate) const HISTORY_COL_ENTITY_ID: &str = "lixcol_entity_id";
144
+ pub(crate) const HISTORY_COL_SCHEMA_KEY: &str = "lixcol_schema_key";
145
+ pub(crate) const HISTORY_COL_FILE_ID: &str = "lixcol_file_id";
146
+ pub(crate) const HISTORY_COL_SNAPSHOT_CONTENT: &str = "lixcol_snapshot_content";
147
+ pub(crate) const HISTORY_COL_METADATA: &str = "lixcol_metadata";
148
+ pub(crate) const HISTORY_COL_CHANGE_ID: &str = "lixcol_change_id";
149
+ pub(crate) const HISTORY_COL_OBSERVED_COMMIT_ID: &str = "lixcol_observed_commit_id";
150
+ pub(crate) const HISTORY_COL_COMMIT_CREATED_AT: &str = "lixcol_commit_created_at";
151
+ pub(crate) const HISTORY_COL_START_COMMIT_ID: &str = "lixcol_start_commit_id";
152
+ pub(crate) const HISTORY_COL_DEPTH: &str = "lixcol_depth";
153
+
154
+ pub(crate) struct HistoryViewDescriptor<'a> {
155
+ pub(crate) view_name: &'a str,
156
+ pub(crate) start_commit_column: &'a str,
157
+ }
158
+
159
+ #[derive(Debug, Clone, Copy)]
160
+ pub(crate) enum HistoryColumnStyle {
161
+ Bare,
162
+ Prefixed,
163
+ }
164
+
165
+ /// Shaped history views expose delete events as tombstone rows.
166
+ ///
167
+ /// If the current event is the descriptor tombstone itself, the provider must
168
+ /// use that tombstone row instead of looking through to an earlier live
169
+ /// descriptor. This keeps one contract across typed entity, file, directory,
170
+ /// and state history: `snapshot_content IS NULL` means projected user/domain
171
+ /// columns are NULL while metadata columns still identify the event.
172
+ pub(crate) fn history_descriptor_event_matches(
173
+ descriptor_entry: &HistoryEntry,
174
+ event_depth: u32,
175
+ event_change_id: &str,
176
+ ) -> bool {
177
+ descriptor_entry.depth == event_depth && descriptor_entry.change.id == event_change_id
178
+ }
179
+
180
+ pub(crate) fn parse_history_filter(expr: &Expr, column_style: HistoryColumnStyle) -> Option<()> {
181
+ parse_history_filter_terms(expr, column_style).map(|_| ())
182
+ }
183
+
184
+ pub(crate) fn commit_graph_history_request(
185
+ route: &HistoryRoute,
186
+ schema_keys: Vec<String>,
187
+ ) -> Option<CommitGraphChangeHistoryRequest> {
188
+ let schema_keys = effective_schema_keys(route, schema_keys)?;
189
+ Some(CommitGraphChangeHistoryRequest {
190
+ entity_ids: route
191
+ .entity_ids
192
+ .iter()
193
+ .filter_map(|entity_id| EntityIdentity::from_json_array_text(entity_id).ok())
194
+ .collect(),
195
+ schema_keys,
196
+ file_ids: route.file_ids.clone(),
197
+ min_depth: route.min_depth.and_then(nonnegative_u32),
198
+ max_depth: route.max_depth.and_then(nonnegative_u32),
199
+ include_tombstones: true,
200
+ })
201
+ }
202
+
203
+ /// Loads commit-graph history once for all SQL history providers.
204
+ ///
205
+ /// Providers pass the schema keys they know how to shape. An empty list means
206
+ /// "do not constrain by provider schema"; this is what `lix_state_history` uses.
207
+ pub(crate) async fn load_history_entries(
208
+ descriptor: HistoryViewDescriptor<'_>,
209
+ commit_graph: Arc<Mutex<Box<dyn CommitGraphReader>>>,
210
+ mut json_reader: SqlJsonReader,
211
+ route: &HistoryRoute,
212
+ schema_keys: Vec<String>,
213
+ ) -> Result<Vec<HistoryEntry>, LixError> {
214
+ if route.is_contradictory() {
215
+ return Ok(Vec::new());
216
+ }
217
+ if route.start_commit_ids.is_empty() {
218
+ return Err(LixError::new(
219
+ LixError::CODE_HISTORY_FILTER_REQUIRED,
220
+ format!(
221
+ "{} requires a {} filter",
222
+ descriptor.view_name, descriptor.start_commit_column
223
+ ),
224
+ )
225
+ .with_hint(format!(
226
+ "Use WHERE {} = lix_active_version_commit_id() to inspect {} from the active version head.",
227
+ descriptor.start_commit_column, descriptor.view_name
228
+ )));
229
+ }
230
+ let Some(request) = commit_graph_history_request(route, schema_keys) else {
231
+ return Ok(Vec::new());
232
+ };
233
+
234
+ let mut rows = Vec::new();
235
+ for start_commit_id in &route.start_commit_ids {
236
+ let (entries, reachable_commits) = {
237
+ let mut guard = commit_graph.lock().await;
238
+ let entries = guard
239
+ .change_history_from_commit(start_commit_id, &request)
240
+ .await?;
241
+ let reachable_commits = guard.reachable_commits(start_commit_id).await?;
242
+ (entries, reachable_commits)
243
+ };
244
+ let commit_created_at_by_id = reachable_commits
245
+ .into_iter()
246
+ .map(|reachable| {
247
+ (
248
+ reachable.commit.commit_id.clone(),
249
+ reachable.commit.change.created_at.clone(),
250
+ )
251
+ })
252
+ .collect::<BTreeMap<_, _>>();
253
+
254
+ for entry in entries {
255
+ let change = materialize_change(&mut json_reader, entry.located_change).await?;
256
+ rows.push(HistoryEntry {
257
+ commit_created_at: commit_created_at_by_id
258
+ .get(&entry.observed_commit_id)
259
+ .cloned()
260
+ .unwrap_or_else(|| change.created_at.clone()),
261
+ change,
262
+ observed_commit_id: entry.observed_commit_id,
263
+ start_commit_id: entry.start_commit_id,
264
+ depth: entry.depth,
265
+ });
266
+ }
267
+ }
268
+
269
+ Ok(rows)
270
+ }
271
+
272
+ fn effective_schema_keys(
273
+ route: &HistoryRoute,
274
+ surface_schema_keys: Vec<String>,
275
+ ) -> Option<Vec<String>> {
276
+ if surface_schema_keys.is_empty() {
277
+ return Some(route.schema_keys.clone());
278
+ }
279
+ if route.schema_keys.is_empty() {
280
+ return Some(surface_schema_keys);
281
+ }
282
+
283
+ let mut effective = Vec::new();
284
+ for schema_key in surface_schema_keys {
285
+ if route.schema_keys.contains(&schema_key) && !effective.contains(&schema_key) {
286
+ effective.push(schema_key);
287
+ }
288
+ }
289
+ if effective.is_empty() {
290
+ None
291
+ } else {
292
+ Some(effective)
293
+ }
294
+ }
295
+
296
+ fn parse_history_filter_terms(
297
+ expr: &Expr,
298
+ column_style: HistoryColumnStyle,
299
+ ) -> Option<Vec<HistoryFilterTerm>> {
300
+ match expr {
301
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
302
+ let mut terms = parse_history_filter_terms(&binary_expr.left, column_style)?;
303
+ terms.extend(parse_history_filter_terms(
304
+ &binary_expr.right,
305
+ column_style,
306
+ )?);
307
+ Some(terms)
308
+ }
309
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
310
+ parse_history_disjunction(binary_expr, column_style)
311
+ }
312
+ Expr::BinaryExpr(binary_expr) => {
313
+ parse_history_binary_filter(binary_expr, column_style).map(|term| vec![term])
314
+ }
315
+ Expr::InList(in_list) => {
316
+ parse_history_in_list_filter(in_list, column_style).map(|term| vec![term])
317
+ }
318
+ _ => None,
319
+ }
320
+ }
321
+
322
+ fn collect_history_route_terms(
323
+ expr: &Expr,
324
+ column_style: HistoryColumnStyle,
325
+ ) -> Vec<HistoryFilterTerm> {
326
+ match expr {
327
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
328
+ let mut terms = collect_history_route_terms(&binary_expr.left, column_style);
329
+ terms.extend(collect_history_route_terms(
330
+ &binary_expr.right,
331
+ column_style,
332
+ ));
333
+ terms
334
+ }
335
+ // OR filters are only safe to route when the entire disjunction is a
336
+ // supported history predicate. Partially routing one side would change
337
+ // SQL semantics before DataFusion can apply the residual filter.
338
+ Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
339
+ parse_history_disjunction(binary_expr, column_style).unwrap_or_default()
340
+ }
341
+ Expr::BinaryExpr(binary_expr) => parse_history_binary_filter(binary_expr, column_style)
342
+ .map(|term| vec![term])
343
+ .unwrap_or_default(),
344
+ Expr::InList(in_list) => parse_history_in_list_filter(in_list, column_style)
345
+ .map(|term| vec![term])
346
+ .unwrap_or_default(),
347
+ _ => Vec::new(),
348
+ }
349
+ }
350
+
351
+ fn parse_history_disjunction(
352
+ binary_expr: &datafusion::logical_expr::BinaryExpr,
353
+ column_style: HistoryColumnStyle,
354
+ ) -> Option<Vec<HistoryFilterTerm>> {
355
+ let left = parse_history_filter_terms(&binary_expr.left, column_style)?;
356
+ let right = parse_history_filter_terms(&binary_expr.right, column_style)?;
357
+ let [left] = left.as_slice() else {
358
+ return None;
359
+ };
360
+ let [right] = right.as_slice() else {
361
+ return None;
362
+ };
363
+ merge_history_disjunction_terms(left.clone(), right.clone()).map(|term| vec![term])
364
+ }
365
+
366
+ #[derive(Debug, Clone, PartialEq, Eq)]
367
+ enum HistoryFilterTerm {
368
+ StartCommitIds(Vec<String>),
369
+ EntityIds(Vec<String>),
370
+ SchemaKeys(Vec<String>),
371
+ FileIds(Vec<String>),
372
+ MinDepth(i64),
373
+ MaxDepth(i64),
374
+ ExactDepth(i64),
375
+ }
376
+
377
+ fn merge_history_disjunction_terms(
378
+ left: HistoryFilterTerm,
379
+ right: HistoryFilterTerm,
380
+ ) -> Option<HistoryFilterTerm> {
381
+ match (left, right) {
382
+ (HistoryFilterTerm::StartCommitIds(mut left), HistoryFilterTerm::StartCommitIds(right)) => {
383
+ extend_unique(&mut left, right);
384
+ Some(HistoryFilterTerm::StartCommitIds(left))
385
+ }
386
+ (HistoryFilterTerm::EntityIds(mut left), HistoryFilterTerm::EntityIds(right)) => {
387
+ extend_unique(&mut left, right);
388
+ Some(HistoryFilterTerm::EntityIds(left))
389
+ }
390
+ (HistoryFilterTerm::FileIds(mut left), HistoryFilterTerm::FileIds(right)) => {
391
+ extend_unique(&mut left, right);
392
+ Some(HistoryFilterTerm::FileIds(left))
393
+ }
394
+ (HistoryFilterTerm::SchemaKeys(mut left), HistoryFilterTerm::SchemaKeys(right)) => {
395
+ extend_unique(&mut left, right);
396
+ Some(HistoryFilterTerm::SchemaKeys(left))
397
+ }
398
+ _ => None,
399
+ }
400
+ }
401
+
402
+ fn parse_history_binary_filter(
403
+ binary_expr: &datafusion::logical_expr::BinaryExpr,
404
+ column_style: HistoryColumnStyle,
405
+ ) -> Option<HistoryFilterTerm> {
406
+ let Expr::Column(column) = &*binary_expr.left else {
407
+ return None;
408
+ };
409
+ let column_name = canonical_history_column_name(column.name.as_str(), column_style)?;
410
+ let right = &*binary_expr.right;
411
+ match (column_name, &binary_expr.op, right) {
412
+ ("start_commit_id", Operator::Eq, Expr::Literal(ScalarValue::Utf8(Some(value)), _))
413
+ | ("schema_key", Operator::Eq, Expr::Literal(ScalarValue::Utf8(Some(value)), _))
414
+ | ("file_id", Operator::Eq, Expr::Literal(ScalarValue::Utf8(Some(value)), _)) => {
415
+ Some(match column_name {
416
+ "start_commit_id" => HistoryFilterTerm::StartCommitIds(vec![value.clone()]),
417
+ "schema_key" => HistoryFilterTerm::SchemaKeys(vec![value.clone()]),
418
+ "file_id" => HistoryFilterTerm::FileIds(vec![value.clone()]),
419
+ _ => unreachable!(),
420
+ })
421
+ }
422
+ ("entity_id", Operator::Eq, Expr::Literal(ScalarValue::Utf8(Some(value)), _)) => {
423
+ canonical_entity_id_value(value).map(|value| HistoryFilterTerm::EntityIds(vec![value]))
424
+ }
425
+ ("depth", Operator::Eq, depth_expr) => {
426
+ scalar_i64_literal(depth_expr).map(HistoryFilterTerm::ExactDepth)
427
+ }
428
+ ("depth", Operator::Gt, depth_expr) => {
429
+ scalar_i64_literal(depth_expr).map(|value| HistoryFilterTerm::MinDepth(value + 1))
430
+ }
431
+ ("depth", Operator::GtEq, depth_expr) => {
432
+ scalar_i64_literal(depth_expr).map(HistoryFilterTerm::MinDepth)
433
+ }
434
+ ("depth", Operator::Lt, depth_expr) => {
435
+ scalar_i64_literal(depth_expr).map(|value| HistoryFilterTerm::MaxDepth(value - 1))
436
+ }
437
+ ("depth", Operator::LtEq, depth_expr) => {
438
+ scalar_i64_literal(depth_expr).map(HistoryFilterTerm::MaxDepth)
439
+ }
440
+ _ => None,
441
+ }
442
+ }
443
+
444
+ fn parse_history_in_list_filter(
445
+ in_list: &InList,
446
+ column_style: HistoryColumnStyle,
447
+ ) -> Option<HistoryFilterTerm> {
448
+ if in_list.negated {
449
+ return None;
450
+ }
451
+
452
+ let Expr::Column(column) = in_list.expr.as_ref() else {
453
+ return None;
454
+ };
455
+ let column_name = canonical_history_column_name(column.name.as_str(), column_style)?;
456
+ let values = in_list
457
+ .list
458
+ .iter()
459
+ .map(string_literal)
460
+ .collect::<Option<Vec<_>>>()?;
461
+ if values.is_empty() {
462
+ return None;
463
+ }
464
+
465
+ match column_name {
466
+ "start_commit_id" => Some(HistoryFilterTerm::StartCommitIds(values)),
467
+ "entity_id" => canonical_entity_id_values(values).map(HistoryFilterTerm::EntityIds),
468
+ "schema_key" => Some(HistoryFilterTerm::SchemaKeys(values)),
469
+ "file_id" => Some(HistoryFilterTerm::FileIds(values)),
470
+ _ => None,
471
+ }
472
+ }
473
+
474
+ fn apply_history_filter(expr: &Expr, route: &mut HistoryRoute, column_style: HistoryColumnStyle) {
475
+ for term in collect_history_route_terms(expr, column_style) {
476
+ match term {
477
+ HistoryFilterTerm::StartCommitIds(values) => {
478
+ route.contradictory |=
479
+ apply_conjunctive_values_filter(&mut route.start_commit_ids, values)
480
+ }
481
+ HistoryFilterTerm::EntityIds(values) => {
482
+ route.contradictory |=
483
+ apply_conjunctive_values_filter(&mut route.entity_ids, values)
484
+ }
485
+ HistoryFilterTerm::SchemaKeys(values) => {
486
+ route.contradictory |=
487
+ apply_conjunctive_values_filter(&mut route.schema_keys, values)
488
+ }
489
+ HistoryFilterTerm::FileIds(values) => {
490
+ route.contradictory |= apply_conjunctive_values_filter(&mut route.file_ids, values)
491
+ }
492
+ HistoryFilterTerm::ExactDepth(value) => {
493
+ route.min_depth = Some(value);
494
+ route.max_depth = Some(value);
495
+ }
496
+ HistoryFilterTerm::MinDepth(value) => {
497
+ route.min_depth = Some(route.min_depth.map_or(value, |current| current.max(value)));
498
+ }
499
+ HistoryFilterTerm::MaxDepth(value) => {
500
+ route.max_depth = Some(route.max_depth.map_or(value, |current| current.min(value)));
501
+ }
502
+ }
503
+ }
504
+ }
505
+
506
+ fn apply_conjunctive_values_filter(bucket: &mut Vec<String>, incoming_values: Vec<String>) -> bool {
507
+ let mut values = Vec::new();
508
+ extend_unique(&mut values, incoming_values);
509
+ if values.is_empty() {
510
+ return true;
511
+ }
512
+ if bucket.is_empty() {
513
+ extend_unique(bucket, values);
514
+ return false;
515
+ }
516
+
517
+ bucket.retain(|existing| values.contains(existing));
518
+ bucket.is_empty()
519
+ }
520
+
521
+ fn canonical_entity_id_values(values: Vec<String>) -> Option<Vec<String>> {
522
+ values
523
+ .into_iter()
524
+ .map(|value| canonical_entity_id_value(&value))
525
+ .collect()
526
+ }
527
+
528
+ fn canonical_entity_id_value(value: &str) -> Option<String> {
529
+ EntityIdentity::from_json_array_text(value)
530
+ .ok()?
531
+ .as_json_array_text()
532
+ .ok()
533
+ }
534
+
535
+ fn canonical_history_column_name(name: &str, column_style: HistoryColumnStyle) -> Option<&str> {
536
+ match (column_style, name) {
537
+ (HistoryColumnStyle::Bare, "start_commit_id")
538
+ | (HistoryColumnStyle::Prefixed, "lixcol_start_commit_id") => Some("start_commit_id"),
539
+ (HistoryColumnStyle::Bare, "entity_id")
540
+ | (HistoryColumnStyle::Prefixed, "lixcol_entity_id") => Some("entity_id"),
541
+ (HistoryColumnStyle::Bare, "schema_key")
542
+ | (HistoryColumnStyle::Prefixed, "lixcol_schema_key") => Some("schema_key"),
543
+ (HistoryColumnStyle::Bare, "file_id")
544
+ | (HistoryColumnStyle::Prefixed, "lixcol_file_id") => Some("file_id"),
545
+ (HistoryColumnStyle::Bare, "depth") | (HistoryColumnStyle::Prefixed, "lixcol_depth") => {
546
+ Some("depth")
547
+ }
548
+ _ => None,
549
+ }
550
+ }
551
+
552
+ fn nonnegative_u32(value: i64) -> Option<u32> {
553
+ u32::try_from(value).ok()
554
+ }
555
+
556
+ fn extend_unique(bucket: &mut Vec<String>, values: Vec<String>) {
557
+ for value in values {
558
+ if !bucket.contains(&value) {
559
+ bucket.push(value);
560
+ }
561
+ }
562
+ }
563
+
564
+ fn string_literal(expr: &Expr) -> Option<String> {
565
+ match expr {
566
+ Expr::Literal(ScalarValue::Utf8(Some(value)), _) => Some(value.clone()),
567
+ _ => None,
568
+ }
569
+ }
570
+
571
+ fn scalar_i64_literal(expr: &Expr) -> Option<i64> {
572
+ match expr {
573
+ Expr::Literal(ScalarValue::Int8(Some(value)), _) => Some(i64::from(*value)),
574
+ Expr::Literal(ScalarValue::Int16(Some(value)), _) => Some(i64::from(*value)),
575
+ Expr::Literal(ScalarValue::Int32(Some(value)), _) => Some(i64::from(*value)),
576
+ Expr::Literal(ScalarValue::Int64(Some(value)), _) => Some(*value),
577
+ Expr::Literal(ScalarValue::UInt8(Some(value)), _) => Some(i64::from(*value)),
578
+ Expr::Literal(ScalarValue::UInt16(Some(value)), _) => Some(i64::from(*value)),
579
+ Expr::Literal(ScalarValue::UInt32(Some(value)), _) => Some(i64::from(*value)),
580
+ Expr::Literal(ScalarValue::UInt64(Some(value)), _) => i64::try_from(*value).ok(),
581
+ _ => None,
582
+ }
583
+ }
584
+
585
+ #[cfg(test)]
586
+ mod tests {
587
+ use datafusion::common::{Column, ScalarValue};
588
+ use datafusion::logical_expr::{BinaryExpr, Expr, Like, Operator};
589
+
590
+ use super::{parse_history_filter, HistoryColumnStyle, HistoryRoute};
591
+
592
+ #[test]
593
+ fn route_extraction_keeps_supported_terms_from_mixed_and_filter() {
594
+ let filter = and(
595
+ eq(col("start_commit_id"), str_lit("commit-1")),
596
+ Expr::Like(Like::new(
597
+ false,
598
+ Box::new(col("path")),
599
+ Box::new(str_lit("/docs/%")),
600
+ None,
601
+ false,
602
+ )),
603
+ );
604
+
605
+ assert!(
606
+ parse_history_filter(&filter, HistoryColumnStyle::Bare).is_none(),
607
+ "mixed filters must not be advertised as exact pushdown"
608
+ );
609
+
610
+ let route = HistoryRoute::from_filters(&[filter], HistoryColumnStyle::Bare);
611
+ assert_eq!(route.start_commit_ids, vec!["commit-1".to_string()]);
612
+ }
613
+
614
+ #[test]
615
+ fn route_extraction_does_not_partially_route_mixed_or_filter() {
616
+ let filter = or(
617
+ eq(col("start_commit_id"), str_lit("commit-1")),
618
+ Expr::Like(Like::new(
619
+ false,
620
+ Box::new(col("path")),
621
+ Box::new(str_lit("/docs/%")),
622
+ None,
623
+ false,
624
+ )),
625
+ );
626
+
627
+ let route = HistoryRoute::from_filters(&[filter], HistoryColumnStyle::Bare);
628
+ assert!(
629
+ route.start_commit_ids.is_empty(),
630
+ "partial OR pushdown would change SQL semantics"
631
+ );
632
+ }
633
+
634
+ fn and(left: Expr, right: Expr) -> Expr {
635
+ binary(left, Operator::And, right)
636
+ }
637
+
638
+ fn or(left: Expr, right: Expr) -> Expr {
639
+ binary(left, Operator::Or, right)
640
+ }
641
+
642
+ fn eq(left: Expr, right: Expr) -> Expr {
643
+ binary(left, Operator::Eq, right)
644
+ }
645
+
646
+ fn binary(left: Expr, op: Operator, right: Expr) -> Expr {
647
+ Expr::BinaryExpr(BinaryExpr::new(Box::new(left), op, Box::new(right)))
648
+ }
649
+
650
+ fn col(name: &str) -> Expr {
651
+ Expr::Column(Column::from_name(name))
652
+ }
653
+
654
+ fn str_lit(value: &str) -> Expr {
655
+ Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
656
+ }
657
+ }