@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,3187 @@
1
+ use std::{
2
+ collections::{BTreeMap, VecDeque},
3
+ future::Future,
4
+ ops::Range,
5
+ pin::Pin,
6
+ };
7
+
8
+ use crate::storage::{StorageReader, StorageWriteSet};
9
+ use crate::tracked_state::codec::{
10
+ boundary_trigger, child_summary_from_node, decode_key, decode_key_with_trusted_prefix,
11
+ decode_node, decode_node_ref, decode_value, decode_visible_value, encode_internal_node,
12
+ encode_internal_node_refs, encode_key, encode_leaf_node, encode_leaf_node_refs,
13
+ encode_schema_file_prefix, encode_schema_key_prefix, ChildSummary, ChildSummaryRef,
14
+ DecodedLeafNodeRef, DecodedNode, DecodedNodeRef, EncodedLeafEntry, EncodedLeafEntryRef,
15
+ PendingChunkWrite,
16
+ };
17
+ use crate::tracked_state::storage;
18
+ use crate::tracked_state::types::{
19
+ TrackedStateApplyResult, TrackedStateIndexValue, TrackedStateKey, TrackedStateMutation,
20
+ TrackedStateRootId, TrackedStateTreeDiffEntry, TrackedStateTreeScanRequest,
21
+ TRACKED_STATE_HASH_BYTES,
22
+ };
23
+ use crate::{LixError, NullableKeyFilter};
24
+
25
+ #[derive(Debug, Clone, PartialEq, Eq)]
26
+ pub(crate) struct TrackedStateTreeOptions {
27
+ pub(crate) target_chunk_bytes: usize,
28
+ pub(crate) min_chunk_bytes: usize,
29
+ pub(crate) max_chunk_bytes: usize,
30
+ }
31
+
32
+ enum MutationApply<T> {
33
+ Applied(TrackedStateApplyResult),
34
+ Fallback(T),
35
+ }
36
+
37
+ impl Default for TrackedStateTreeOptions {
38
+ fn default() -> Self {
39
+ Self {
40
+ target_chunk_bytes: 4 * 1024,
41
+ min_chunk_bytes: 512,
42
+ max_chunk_bytes: 16 * 1024,
43
+ }
44
+ }
45
+ }
46
+
47
+ /// Content-addressed tracked-state tree operations.
48
+ ///
49
+ /// This type owns tracked-state tree mechanics only. Version refs, untracked overlay,
50
+ /// and SQL visibility remain outside the tree.
51
+ #[derive(Debug, Clone)]
52
+ pub(crate) struct TrackedStateTree {
53
+ options: TrackedStateTreeOptions,
54
+ }
55
+
56
+ impl TrackedStateTree {
57
+ pub(crate) fn new() -> Self {
58
+ Self {
59
+ options: TrackedStateTreeOptions::default(),
60
+ }
61
+ }
62
+
63
+ #[allow(dead_code)]
64
+ pub(crate) fn with_options(options: TrackedStateTreeOptions) -> Self {
65
+ Self { options }
66
+ }
67
+
68
+ pub(crate) async fn load_root(
69
+ &self,
70
+ store: &mut (impl StorageReader + ?Sized),
71
+ commit_id: &str,
72
+ ) -> Result<Option<TrackedStateRootId>, LixError> {
73
+ storage::load_root(store, commit_id).await
74
+ }
75
+
76
+ #[cfg(test)]
77
+ pub(crate) async fn get(
78
+ &self,
79
+ store: &mut impl StorageReader,
80
+ root_id: &TrackedStateRootId,
81
+ key: &TrackedStateKey,
82
+ ) -> Result<Option<TrackedStateIndexValue>, LixError> {
83
+ let encoded_key = encode_key(key);
84
+ let mut current = *root_id.as_bytes();
85
+ loop {
86
+ match self.load_node(store, &current).await? {
87
+ DecodedNode::Leaf(leaf) => {
88
+ let entry = leaf
89
+ .entries()
90
+ .binary_search_by(|entry| entry.key.as_slice().cmp(&encoded_key))
91
+ .ok()
92
+ .map(|index| &leaf.entries()[index]);
93
+ return entry.map(|entry| decode_value(&entry.value)).transpose();
94
+ }
95
+ DecodedNode::Internal(internal) => {
96
+ let child = internal
97
+ .children()
98
+ .iter()
99
+ .find(|child| child.last_key.as_slice() >= encoded_key.as_slice())
100
+ .or_else(|| internal.children().last())
101
+ .ok_or_else(|| {
102
+ LixError::new(
103
+ "LIX_ERROR_UNKNOWN",
104
+ "tracked-state tree internal node has no children",
105
+ )
106
+ })?;
107
+ current = child.child_hash;
108
+ }
109
+ }
110
+ }
111
+ }
112
+
113
+ pub(crate) async fn get_many(
114
+ &self,
115
+ store: &mut impl StorageReader,
116
+ root_id: &TrackedStateRootId,
117
+ keys: &[TrackedStateKey],
118
+ ) -> Result<Vec<Option<TrackedStateIndexValue>>, LixError> {
119
+ if keys.is_empty() {
120
+ return Ok(Vec::new());
121
+ }
122
+
123
+ let mut encoded_keys = keys
124
+ .iter()
125
+ .enumerate()
126
+ .map(|(index, key)| (index, encode_key(key)))
127
+ .collect::<Vec<_>>();
128
+ encoded_keys.sort_by(|left, right| left.1.cmp(&right.1));
129
+
130
+ let mut values = vec![None; keys.len()];
131
+ self.get_many_node(store, *root_id.as_bytes(), &encoded_keys, &mut values)
132
+ .await?;
133
+ Ok(values)
134
+ }
135
+
136
+ pub(crate) async fn row_count(
137
+ &self,
138
+ store: &mut impl StorageReader,
139
+ root_id: &TrackedStateRootId,
140
+ ) -> Result<usize, LixError> {
141
+ match self.load_node(store, root_id.as_bytes()).await? {
142
+ DecodedNode::Leaf(leaf) => Ok(leaf.entries().len()),
143
+ DecodedNode::Internal(internal) => Ok(internal
144
+ .children()
145
+ .iter()
146
+ .map(|child| child.subtree_count as usize)
147
+ .sum()),
148
+ }
149
+ }
150
+
151
+ pub(crate) async fn scan(
152
+ &self,
153
+ store: &mut impl StorageReader,
154
+ root_id: &TrackedStateRootId,
155
+ request: &TrackedStateTreeScanRequest,
156
+ ) -> Result<Vec<(TrackedStateKey, TrackedStateIndexValue)>, LixError> {
157
+ if request.limit == Some(0) {
158
+ return Ok(Vec::new());
159
+ }
160
+
161
+ let ranges = scan_ranges(request);
162
+ let key_decode_hint = scan_key_decode_hint(request, &ranges);
163
+ let mut rows = Vec::new();
164
+ self.scan_node(
165
+ store,
166
+ *root_id.as_bytes(),
167
+ request,
168
+ &ranges,
169
+ key_decode_hint,
170
+ &mut rows,
171
+ )
172
+ .await?;
173
+ Ok(rows)
174
+ }
175
+
176
+ pub(crate) async fn count_matching_keys(
177
+ &self,
178
+ store: &mut impl StorageReader,
179
+ root_id: &TrackedStateRootId,
180
+ request: &TrackedStateTreeScanRequest,
181
+ ) -> Result<usize, LixError> {
182
+ if request.limit == Some(0) {
183
+ return Ok(0);
184
+ }
185
+
186
+ let ranges = scan_ranges(request);
187
+ self.count_matching_keys_node(store, *root_id.as_bytes(), request, &ranges)
188
+ .await
189
+ }
190
+
191
+ pub(crate) async fn diff(
192
+ &self,
193
+ store: &mut impl StorageReader,
194
+ left_root: Option<&TrackedStateRootId>,
195
+ right_root: Option<&TrackedStateRootId>,
196
+ request: &TrackedStateTreeScanRequest,
197
+ ) -> Result<Vec<TrackedStateTreeDiffEntry>, LixError> {
198
+ match (left_root, right_root) {
199
+ (None, None) => Ok(Vec::new()),
200
+ (Some(left), Some(right)) if left == right => Ok(Vec::new()),
201
+ (Some(left), Some(right)) => {
202
+ let mut out = Vec::new();
203
+ self.diff_nodes(
204
+ store,
205
+ *left.as_bytes(),
206
+ *right.as_bytes(),
207
+ request,
208
+ &mut out,
209
+ )
210
+ .await?;
211
+ Ok(out)
212
+ }
213
+ (Some(left), None) => Ok(self
214
+ .collect_filtered_entries(store, left, request)
215
+ .await?
216
+ .into_iter()
217
+ .map(|(key, value)| TrackedStateTreeDiffEntry {
218
+ before: Some((key, value)),
219
+ after: None,
220
+ })
221
+ .collect()),
222
+ (None, Some(right)) => Ok(self
223
+ .collect_filtered_entries(store, right, request)
224
+ .await?
225
+ .into_iter()
226
+ .map(|(key, value)| TrackedStateTreeDiffEntry {
227
+ before: None,
228
+ after: Some((key, value)),
229
+ })
230
+ .collect()),
231
+ }
232
+ }
233
+
234
+ pub(crate) async fn apply_mutations(
235
+ &self,
236
+ store: &mut (impl StorageReader + ?Sized),
237
+ writes: &mut StorageWriteSet,
238
+ base_root: Option<&TrackedStateRootId>,
239
+ mut mutations: Vec<TrackedStateMutation>,
240
+ commit_id: Option<&str>,
241
+ ) -> Result<TrackedStateApplyResult, LixError> {
242
+ let mut overlay = storage::TrackedStateChunkOverlay::new();
243
+ if let Some(root_id) = base_root {
244
+ if mutations.len() == 1 {
245
+ let mutation = mutations.pop().expect("single mutation should exist");
246
+ match self
247
+ .apply_single_mutation(
248
+ store,
249
+ writes,
250
+ &mut overlay,
251
+ root_id,
252
+ mutation,
253
+ commit_id,
254
+ )
255
+ .await?
256
+ {
257
+ MutationApply::Applied(result) => return Ok(result),
258
+ MutationApply::Fallback(mutation) => mutations = vec![mutation],
259
+ }
260
+ } else if mutations.len() > 1 {
261
+ match self
262
+ .apply_sorted_mutations_chunker(
263
+ store,
264
+ writes,
265
+ &mut overlay,
266
+ root_id,
267
+ mutations,
268
+ commit_id,
269
+ )
270
+ .await?
271
+ {
272
+ MutationApply::Applied(result) => return Ok(result),
273
+ MutationApply::Fallback(fallback_mutations) => mutations = fallback_mutations,
274
+ }
275
+ }
276
+ }
277
+
278
+ let mut entries = match base_root {
279
+ Some(root_id) => self
280
+ .collect_leaf_entries(store, root_id)
281
+ .await?
282
+ .into_iter()
283
+ .map(|entry| (entry.key, entry.value))
284
+ .collect::<BTreeMap<_, _>>(),
285
+ None => BTreeMap::new(),
286
+ };
287
+
288
+ // Apply in caller order so repeated writes to the same key behave like
289
+ // normal transaction staging: the latest mutation wins.
290
+ for mutation in mutations {
291
+ entries.insert(mutation.encoded_key, mutation.encoded_value);
292
+ }
293
+
294
+ let built = self.build_tree_from_entries(
295
+ entries
296
+ .into_iter()
297
+ .map(|(key, value)| EncodedLeafEntry { key, value })
298
+ .collect(),
299
+ )?;
300
+ overlay.stage_chunks(writes, &built.chunks);
301
+ let persisted_root = if let Some(commit_id) = commit_id {
302
+ storage::stage_root(writes, commit_id, &built.root_id);
303
+ true
304
+ } else {
305
+ false
306
+ };
307
+
308
+ Ok(TrackedStateApplyResult {
309
+ root_id: built.root_id,
310
+ row_count: built.row_count,
311
+ tree_height: built.tree_height,
312
+ chunk_count: built.chunks.len(),
313
+ chunk_bytes: built.chunk_bytes,
314
+ persisted_root,
315
+ })
316
+ }
317
+
318
+ async fn apply_single_mutation(
319
+ &self,
320
+ store: &mut (impl StorageReader + ?Sized),
321
+ writes: &mut StorageWriteSet,
322
+ overlay: &mut storage::TrackedStateChunkOverlay,
323
+ root_id: &TrackedStateRootId,
324
+ mutation: TrackedStateMutation,
325
+ commit_id: Option<&str>,
326
+ ) -> Result<MutationApply<TrackedStateMutation>, LixError> {
327
+ let mutation = match self
328
+ .apply_single_mutation_from_seek_path(
329
+ store, writes, overlay, root_id, mutation, commit_id,
330
+ )
331
+ .await?
332
+ {
333
+ MutationApply::Applied(result) => return Ok(MutationApply::Applied(result)),
334
+ MutationApply::Fallback(mutation) => mutation,
335
+ };
336
+
337
+ let TrackedStateMutation {
338
+ encoded_key,
339
+ encoded_value,
340
+ } = mutation;
341
+
342
+ let levels = self
343
+ .collect_summary_levels_with_overlay(store, overlay, root_id)
344
+ .await?;
345
+ let Some(leaves) = levels.first() else {
346
+ return Ok(MutationApply::Fallback(TrackedStateMutation {
347
+ encoded_key,
348
+ encoded_value,
349
+ }));
350
+ };
351
+ let target_leaf_index = leaves
352
+ .iter()
353
+ .position(|leaf| leaf.last_key.as_slice() >= encoded_key.as_slice())
354
+ .unwrap_or_else(|| leaves.len().saturating_sub(1));
355
+ let Some(target_leaf) = leaves.get(target_leaf_index).cloned() else {
356
+ return Ok(MutationApply::Fallback(TrackedStateMutation {
357
+ encoded_key,
358
+ encoded_value,
359
+ }));
360
+ };
361
+
362
+ let mut entries = self
363
+ .load_leaf_entries_with_overlay(store, overlay, &target_leaf.child_hash)
364
+ .await?;
365
+ let mutation_entry_index = match entries
366
+ .binary_search_by(|entry| entry.key.as_slice().cmp(encoded_key.as_slice()))
367
+ {
368
+ Ok(index) => {
369
+ if entries[index].value.as_slice() == encoded_value.as_slice() {
370
+ return Ok(MutationApply::Fallback(TrackedStateMutation {
371
+ encoded_key,
372
+ encoded_value,
373
+ }));
374
+ }
375
+ entries[index].value = encoded_value;
376
+ index
377
+ }
378
+ Err(index) => {
379
+ entries.insert(
380
+ index,
381
+ EncodedLeafEntry {
382
+ key: encoded_key,
383
+ value: encoded_value,
384
+ },
385
+ );
386
+ index
387
+ }
388
+ };
389
+
390
+ let mut chunks = BTreeMap::new();
391
+ let mut suffix_entries = entries;
392
+ let mut next_leaf_index = target_leaf_index + 1;
393
+ let mut replacement_leaves;
394
+ let old_leaf_count;
395
+
396
+ // Rechunk from the edited leaf until a generated leaf matches an
397
+ // existing post-mutation leaf, then reuse the rest of the old suffix.
398
+ loop {
399
+ let mut candidate_chunks = BTreeMap::new();
400
+ let candidate_summaries = self.build_leaf_level_from_refs(
401
+ suffix_entries.iter().map(EncodedLeafEntry::as_ref),
402
+ &mut candidate_chunks,
403
+ );
404
+
405
+ if let Some((generated_resync_index, existing_resync_index)) = first_resync_index(
406
+ &candidate_summaries,
407
+ &leaves[target_leaf_index..],
408
+ suffix_entries[mutation_entry_index].key.as_slice(),
409
+ ) {
410
+ for summary in &candidate_summaries[..generated_resync_index] {
411
+ if let Some(chunk) = candidate_chunks.remove(&summary.child_hash) {
412
+ chunks.entry(chunk.hash).or_insert(chunk);
413
+ }
414
+ }
415
+ replacement_leaves = candidate_summaries
416
+ .into_iter()
417
+ .take(generated_resync_index)
418
+ .collect();
419
+ old_leaf_count = existing_resync_index;
420
+ break;
421
+ }
422
+
423
+ if next_leaf_index >= leaves.len() {
424
+ chunks.extend(candidate_chunks);
425
+ replacement_leaves = candidate_summaries;
426
+ old_leaf_count = leaves.len() - target_leaf_index;
427
+ break;
428
+ }
429
+
430
+ suffix_entries.extend(
431
+ self.load_leaf_entries_with_overlay(
432
+ store,
433
+ overlay,
434
+ &leaves[next_leaf_index].child_hash,
435
+ )
436
+ .await?,
437
+ );
438
+ next_leaf_index += 1;
439
+ }
440
+
441
+ let built = self.build_tree_from_leaf_patch(
442
+ &levels,
443
+ target_leaf_index,
444
+ old_leaf_count,
445
+ std::mem::take(&mut replacement_leaves),
446
+ chunks,
447
+ suffix_entries[mutation_entry_index].key.as_slice(),
448
+ )?;
449
+ overlay.stage_chunks(writes, &built.chunks);
450
+ let persisted_root = if let Some(commit_id) = commit_id {
451
+ storage::stage_root(writes, commit_id, &built.root_id);
452
+ true
453
+ } else {
454
+ false
455
+ };
456
+
457
+ Ok(MutationApply::Applied(TrackedStateApplyResult {
458
+ root_id: built.root_id,
459
+ row_count: built.row_count,
460
+ tree_height: built.tree_height,
461
+ chunk_count: built.chunks.len(),
462
+ chunk_bytes: built.chunk_bytes,
463
+ persisted_root,
464
+ }))
465
+ }
466
+
467
+ fn diff_nodes<'a, S>(
468
+ &'a self,
469
+ store: &'a mut S,
470
+ left_hash: [u8; TRACKED_STATE_HASH_BYTES],
471
+ right_hash: [u8; TRACKED_STATE_HASH_BYTES],
472
+ request: &'a TrackedStateTreeScanRequest,
473
+ out: &'a mut Vec<TrackedStateTreeDiffEntry>,
474
+ ) -> Pin<Box<dyn Future<Output = Result<(), LixError>> + 'a>>
475
+ where
476
+ S: StorageReader + 'a,
477
+ {
478
+ Box::pin(async move {
479
+ if left_hash == right_hash {
480
+ return Ok(());
481
+ }
482
+
483
+ let left = self.load_node(store, &left_hash).await?;
484
+ let right = self.load_node(store, &right_hash).await?;
485
+ match (left, right) {
486
+ (DecodedNode::Leaf(left), DecodedNode::Leaf(right)) => {
487
+ self.diff_leaf_entries(left.entries(), right.entries(), request, out)?;
488
+ }
489
+ (DecodedNode::Internal(left), DecodedNode::Internal(right))
490
+ if internal_boundaries_match(left.children(), right.children()) =>
491
+ {
492
+ for (left_child, right_child) in left.children().iter().zip(right.children()) {
493
+ if left_child == right_child {
494
+ continue;
495
+ }
496
+ self.diff_nodes(
497
+ store,
498
+ left_child.child_hash,
499
+ right_child.child_hash,
500
+ request,
501
+ out,
502
+ )
503
+ .await?;
504
+ }
505
+ }
506
+ _ => {
507
+ self.diff_leaf_summary_cursors(store, left_hash, right_hash, request, out)
508
+ .await?;
509
+ }
510
+ }
511
+ Ok(())
512
+ })
513
+ }
514
+
515
+ async fn diff_leaf_summary_cursors(
516
+ &self,
517
+ store: &mut impl StorageReader,
518
+ left_hash: [u8; TRACKED_STATE_HASH_BYTES],
519
+ right_hash: [u8; TRACKED_STATE_HASH_BYTES],
520
+ request: &TrackedStateTreeScanRequest,
521
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
522
+ ) -> Result<(), LixError> {
523
+ let mut left = LeafSummaryCursor::new(self, store, left_hash).await?;
524
+ let mut right = LeafSummaryCursor::new(self, store, right_hash).await?;
525
+ let mut left_window = Vec::new();
526
+ let mut right_window = Vec::new();
527
+
528
+ loop {
529
+ match (left.current(), right.current()) {
530
+ (Some(left_leaf), Some(right_leaf)) if left_leaf == right_leaf => {
531
+ self.diff_leaf_summary_window(store, &left_window, &right_window, request, out)
532
+ .await?;
533
+ left_window.clear();
534
+ right_window.clear();
535
+ left.advance(self, store).await?;
536
+ right.advance(self, store).await?;
537
+ }
538
+ (Some(left_leaf), Some(right_leaf)) => {
539
+ match left_leaf.last_key.cmp(&right_leaf.last_key) {
540
+ std::cmp::Ordering::Less => {
541
+ left_window.push(left_leaf.clone());
542
+ left.advance(self, store).await?;
543
+ }
544
+ std::cmp::Ordering::Greater => {
545
+ right_window.push(right_leaf.clone());
546
+ right.advance(self, store).await?;
547
+ }
548
+ std::cmp::Ordering::Equal => {
549
+ left_window.push(left_leaf.clone());
550
+ right_window.push(right_leaf.clone());
551
+ left.advance(self, store).await?;
552
+ right.advance(self, store).await?;
553
+ }
554
+ }
555
+ }
556
+ (Some(left_leaf), None) => {
557
+ left_window.push(left_leaf.clone());
558
+ left.advance(self, store).await?;
559
+ }
560
+ (None, Some(right_leaf)) => {
561
+ right_window.push(right_leaf.clone());
562
+ right.advance(self, store).await?;
563
+ }
564
+ (None, None) => {
565
+ self.diff_leaf_summary_window(store, &left_window, &right_window, request, out)
566
+ .await?;
567
+ return Ok(());
568
+ }
569
+ }
570
+ }
571
+ }
572
+
573
+ async fn diff_leaf_summary_window(
574
+ &self,
575
+ store: &mut impl StorageReader,
576
+ left_leaves: &[ChildSummary],
577
+ right_leaves: &[ChildSummary],
578
+ request: &TrackedStateTreeScanRequest,
579
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
580
+ ) -> Result<(), LixError> {
581
+ if left_leaves.is_empty() && right_leaves.is_empty() {
582
+ return Ok(());
583
+ }
584
+ let left_entries = self
585
+ .collect_entries_from_leaf_summaries(store, left_leaves)
586
+ .await?;
587
+ let right_entries = self
588
+ .collect_entries_from_leaf_summaries(store, right_leaves)
589
+ .await?;
590
+ self.diff_leaf_entries(&left_entries, &right_entries, request, out)
591
+ }
592
+
593
+ fn diff_leaf_entries(
594
+ &self,
595
+ left: &[EncodedLeafEntry],
596
+ right: &[EncodedLeafEntry],
597
+ request: &TrackedStateTreeScanRequest,
598
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
599
+ ) -> Result<(), LixError> {
600
+ let mut left_index = 0usize;
601
+ let mut right_index = 0usize;
602
+ while left_index < left.len() && right_index < right.len() {
603
+ match left[left_index].key.cmp(&right[right_index].key) {
604
+ std::cmp::Ordering::Less => {
605
+ self.push_removed_diff(&left[left_index], request, out)?;
606
+ left_index += 1;
607
+ }
608
+ std::cmp::Ordering::Greater => {
609
+ self.push_added_diff(&right[right_index], request, out)?;
610
+ right_index += 1;
611
+ }
612
+ std::cmp::Ordering::Equal => {
613
+ if left[left_index].value != right[right_index].value {
614
+ self.push_modified_diff(
615
+ &left[left_index],
616
+ &right[right_index],
617
+ request,
618
+ out,
619
+ )?;
620
+ }
621
+ left_index += 1;
622
+ right_index += 1;
623
+ }
624
+ }
625
+ }
626
+ for entry in &left[left_index..] {
627
+ self.push_removed_diff(entry, request, out)?;
628
+ }
629
+ for entry in &right[right_index..] {
630
+ self.push_added_diff(entry, request, out)?;
631
+ }
632
+ Ok(())
633
+ }
634
+
635
+ fn push_removed_diff(
636
+ &self,
637
+ entry: &EncodedLeafEntry,
638
+ request: &TrackedStateTreeScanRequest,
639
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
640
+ ) -> Result<(), LixError> {
641
+ let (key, value) = decode_entry(entry)?;
642
+ if request.matches(&key, &value) {
643
+ out.push(TrackedStateTreeDiffEntry {
644
+ before: Some((key, value)),
645
+ after: None,
646
+ });
647
+ }
648
+ Ok(())
649
+ }
650
+
651
+ fn push_added_diff(
652
+ &self,
653
+ entry: &EncodedLeafEntry,
654
+ request: &TrackedStateTreeScanRequest,
655
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
656
+ ) -> Result<(), LixError> {
657
+ let (key, value) = decode_entry(entry)?;
658
+ if request.matches(&key, &value) {
659
+ out.push(TrackedStateTreeDiffEntry {
660
+ before: None,
661
+ after: Some((key, value)),
662
+ });
663
+ }
664
+ Ok(())
665
+ }
666
+
667
+ fn push_modified_diff(
668
+ &self,
669
+ left: &EncodedLeafEntry,
670
+ right: &EncodedLeafEntry,
671
+ request: &TrackedStateTreeScanRequest,
672
+ out: &mut Vec<TrackedStateTreeDiffEntry>,
673
+ ) -> Result<(), LixError> {
674
+ let (left_key, left_value) = decode_entry(left)?;
675
+ let (right_key, right_value) = decode_entry(right)?;
676
+ if request.matches(&left_key, &left_value) || request.matches(&right_key, &right_value) {
677
+ out.push(TrackedStateTreeDiffEntry {
678
+ before: Some((left_key, left_value)),
679
+ after: Some((right_key, right_value)),
680
+ });
681
+ }
682
+ Ok(())
683
+ }
684
+
685
+ async fn apply_sorted_mutations_chunker(
686
+ &self,
687
+ store: &mut (impl StorageReader + ?Sized),
688
+ writes: &mut StorageWriteSet,
689
+ overlay: &mut storage::TrackedStateChunkOverlay,
690
+ root_id: &TrackedStateRootId,
691
+ mutations: Vec<TrackedStateMutation>,
692
+ commit_id: Option<&str>,
693
+ ) -> Result<MutationApply<Vec<TrackedStateMutation>>, LixError> {
694
+ let mut mutation_map = BTreeMap::new();
695
+ for mutation in mutations {
696
+ mutation_map.insert(mutation.encoded_key, mutation.encoded_value);
697
+ }
698
+ if mutation_map.is_empty() {
699
+ return Ok(MutationApply::Fallback(Vec::new()));
700
+ }
701
+
702
+ let levels = self
703
+ .collect_summary_levels_with_overlay(store, overlay, root_id)
704
+ .await?;
705
+ let Some(leaves) = levels.first() else {
706
+ return Ok(MutationApply::Fallback(
707
+ mutation_map
708
+ .into_iter()
709
+ .map(|(encoded_key, encoded_value)| TrackedStateMutation {
710
+ encoded_key,
711
+ encoded_value,
712
+ })
713
+ .collect(),
714
+ ));
715
+ };
716
+
717
+ let base_row_count = leaves
718
+ .iter()
719
+ .map(|leaf| leaf.subtree_count as usize)
720
+ .sum::<usize>();
721
+ let first_mutation_key = mutation_map
722
+ .keys()
723
+ .next()
724
+ .expect("non-empty mutation map should have first key");
725
+ let append_only = leaves
726
+ .last()
727
+ .is_some_and(|leaf| first_mutation_key.as_slice() > leaf.last_key.as_slice());
728
+ if !append_only && mutation_map.len() * 2 > base_row_count {
729
+ return Ok(MutationApply::Fallback(
730
+ mutation_map
731
+ .into_iter()
732
+ .map(|(encoded_key, encoded_value)| TrackedStateMutation {
733
+ encoded_key,
734
+ encoded_value,
735
+ })
736
+ .collect(),
737
+ ));
738
+ }
739
+
740
+ let mut mutations = mutation_map.into_iter().collect::<VecDeque<_>>();
741
+ let mut output_leaves = Vec::new();
742
+ let mut chunks = BTreeMap::new();
743
+ let mut leaf_index = 0usize;
744
+
745
+ while leaf_index < leaves.len() {
746
+ let current_leaf_has_mutation = mutations
747
+ .front()
748
+ .is_some_and(|(key, _)| key.as_slice() <= leaves[leaf_index].last_key.as_slice());
749
+ if !current_leaf_has_mutation {
750
+ output_leaves.push(leaves[leaf_index].clone());
751
+ leaf_index += 1;
752
+ continue;
753
+ }
754
+
755
+ let window_start = leaf_index;
756
+ let mut window_entries = BTreeMap::new();
757
+ let mut window_mutation_ceiling = mutations
758
+ .front()
759
+ .map(|(key, _)| key.clone())
760
+ .expect("window with mutation should have front mutation");
761
+
762
+ loop {
763
+ if leaf_index < leaves.len() {
764
+ let leaf = &leaves[leaf_index];
765
+ for entry in self
766
+ .load_leaf_entries_with_overlay(store, overlay, &leaf.child_hash)
767
+ .await?
768
+ {
769
+ window_entries.insert(entry.key, entry.value);
770
+ }
771
+
772
+ while mutations
773
+ .front()
774
+ .is_some_and(|(key, _)| key.as_slice() <= leaf.last_key.as_slice())
775
+ {
776
+ let (key, value) = mutations
777
+ .pop_front()
778
+ .expect("front mutation should be present");
779
+ window_mutation_ceiling = key.clone();
780
+ window_entries.insert(key, value);
781
+ }
782
+ leaf_index += 1;
783
+ }
784
+
785
+ while let Some((key, _)) = mutations.front() {
786
+ if leaf_index < leaves.len()
787
+ && key.as_slice() >= leaves[leaf_index].first_key.as_slice()
788
+ {
789
+ break;
790
+ }
791
+ let (key, value) = mutations
792
+ .pop_front()
793
+ .expect("front mutation should be present");
794
+ window_mutation_ceiling = key.clone();
795
+ window_entries.insert(key, value);
796
+ }
797
+
798
+ if leaf_index < leaves.len()
799
+ && mutations.front().is_some_and(|(key, _)| {
800
+ key.as_slice() <= leaves[leaf_index].last_key.as_slice()
801
+ })
802
+ {
803
+ continue;
804
+ }
805
+
806
+ let mut candidate_chunks = BTreeMap::new();
807
+ let candidate_leaves = self.build_leaf_level_from_refs(
808
+ window_entries
809
+ .iter()
810
+ .map(|(key, value)| EncodedLeafEntryRef { key, value }),
811
+ &mut candidate_chunks,
812
+ );
813
+
814
+ if let Some((generated_resync_index, existing_resync_index)) = first_resync_index(
815
+ &candidate_leaves,
816
+ &leaves[window_start..],
817
+ &window_mutation_ceiling,
818
+ ) {
819
+ for summary in &candidate_leaves[..generated_resync_index] {
820
+ if let Some(chunk) = candidate_chunks.remove(&summary.child_hash) {
821
+ chunks.entry(chunk.hash).or_insert(chunk);
822
+ }
823
+ }
824
+ output_leaves.extend(candidate_leaves.into_iter().take(generated_resync_index));
825
+ leaf_index = window_start + existing_resync_index;
826
+ break;
827
+ }
828
+
829
+ if leaf_index >= leaves.len() {
830
+ chunks.extend(candidate_chunks);
831
+ output_leaves.extend(candidate_leaves);
832
+ break;
833
+ }
834
+ }
835
+ }
836
+
837
+ if !mutations.is_empty() {
838
+ let entries = mutations
839
+ .into_iter()
840
+ .map(|(key, value)| EncodedLeafEntry { key, value })
841
+ .collect();
842
+ output_leaves.extend(self.build_leaf_level(entries, &mut chunks));
843
+ }
844
+
845
+ let built = self.build_tree_from_leaf_summaries(output_leaves, chunks)?;
846
+ Ok(MutationApply::Applied(
847
+ self.persist_built_tree(writes, overlay, built, commit_id)
848
+ .await?,
849
+ ))
850
+ }
851
+
852
+ async fn apply_single_mutation_from_seek_path(
853
+ &self,
854
+ store: &mut (impl StorageReader + ?Sized),
855
+ writes: &mut StorageWriteSet,
856
+ overlay: &mut storage::TrackedStateChunkOverlay,
857
+ root_id: &TrackedStateRootId,
858
+ mutation: TrackedStateMutation,
859
+ commit_id: Option<&str>,
860
+ ) -> Result<MutationApply<TrackedStateMutation>, LixError> {
861
+ let TrackedStateMutation {
862
+ encoded_key,
863
+ encoded_value,
864
+ } = mutation;
865
+ let mut current = *root_id.as_bytes();
866
+ let mut path = Vec::new();
867
+ let mut entries = loop {
868
+ match self
869
+ .load_node_with_overlay(store, overlay, &current)
870
+ .await?
871
+ {
872
+ DecodedNode::Leaf(leaf) => break leaf.entries().to_vec(),
873
+ DecodedNode::Internal(internal) => {
874
+ let children = internal.children().to_vec();
875
+ let child_index = children
876
+ .iter()
877
+ .position(|child| child.last_key.as_slice() >= encoded_key.as_slice())
878
+ .or_else(|| (!children.is_empty()).then_some(children.len() - 1))
879
+ .ok_or_else(|| {
880
+ LixError::new(
881
+ "LIX_ERROR_UNKNOWN",
882
+ "tracked-state tree internal node has no children",
883
+ )
884
+ })?;
885
+ current = children[child_index].child_hash;
886
+ path.push(SeekPathFrame {
887
+ children,
888
+ child_index,
889
+ });
890
+ }
891
+ }
892
+ };
893
+
894
+ let mutation_entry_index = match entries
895
+ .binary_search_by(|entry| entry.key.as_slice().cmp(encoded_key.as_slice()))
896
+ {
897
+ Ok(index) => {
898
+ if entries[index].value.as_slice() == encoded_value.as_slice() {
899
+ return Ok(MutationApply::Fallback(TrackedStateMutation {
900
+ encoded_key,
901
+ encoded_value,
902
+ }));
903
+ }
904
+ entries[index].value = encoded_value;
905
+ index
906
+ }
907
+ Err(index) => {
908
+ entries.insert(
909
+ index,
910
+ EncodedLeafEntry {
911
+ key: encoded_key,
912
+ value: encoded_value,
913
+ },
914
+ );
915
+ index
916
+ }
917
+ };
918
+
919
+ let mut chunks = BTreeMap::new();
920
+ let mut replacement_children;
921
+ let mut old_child_count;
922
+
923
+ let Some(leaf_parent) = path.pop() else {
924
+ let built = self.build_tree_from_entries(entries)?;
925
+ return Ok(MutationApply::Applied(
926
+ self.persist_built_tree(writes, overlay, built, commit_id)
927
+ .await?,
928
+ ));
929
+ };
930
+ let mutation_is_right_edge = leaf_parent.child_index + 1 == leaf_parent.children.len()
931
+ && path
932
+ .iter()
933
+ .all(|frame| frame.child_index + 1 == frame.children.len());
934
+
935
+ let mut leaf_entries = entries;
936
+ let mut next_leaf_index = leaf_parent.child_index + 1;
937
+ loop {
938
+ let mut candidate_chunks = BTreeMap::new();
939
+ let candidate_leaves = self.build_leaf_level_from_refs(
940
+ leaf_entries.iter().map(EncodedLeafEntry::as_ref),
941
+ &mut candidate_chunks,
942
+ );
943
+ if let Some((generated_resync_index, existing_resync_index)) = first_resync_index(
944
+ &candidate_leaves,
945
+ &leaf_parent.children[leaf_parent.child_index..],
946
+ leaf_entries[mutation_entry_index].key.as_slice(),
947
+ ) {
948
+ for summary in &candidate_leaves[..generated_resync_index] {
949
+ if let Some(chunk) = candidate_chunks.remove(&summary.child_hash) {
950
+ chunks.entry(chunk.hash).or_insert(chunk);
951
+ }
952
+ }
953
+ replacement_children = candidate_leaves
954
+ .into_iter()
955
+ .take(generated_resync_index)
956
+ .collect();
957
+ old_child_count = existing_resync_index;
958
+ break;
959
+ }
960
+
961
+ if next_leaf_index >= leaf_parent.children.len() {
962
+ if !mutation_is_right_edge {
963
+ let entry = leaf_entries.remove(mutation_entry_index);
964
+ return Ok(MutationApply::Fallback(TrackedStateMutation {
965
+ encoded_key: entry.key,
966
+ encoded_value: entry.value,
967
+ }));
968
+ }
969
+ chunks.extend(candidate_chunks);
970
+ replacement_children = candidate_leaves;
971
+ old_child_count = leaf_parent.children.len() - leaf_parent.child_index;
972
+ break;
973
+ }
974
+
975
+ leaf_entries.extend(
976
+ self.load_leaf_entries_with_overlay(
977
+ store,
978
+ overlay,
979
+ &leaf_parent.children[next_leaf_index].child_hash,
980
+ )
981
+ .await?,
982
+ );
983
+ next_leaf_index += 1;
984
+ }
985
+
986
+ let mut child_index = leaf_parent.child_index;
987
+ let mut children = leaf_parent.children;
988
+ let mut parent_level = 1usize;
989
+ loop {
990
+ children.splice(
991
+ child_index..child_index + old_child_count,
992
+ replacement_children,
993
+ );
994
+ replacement_children = self.build_internal_level(children, parent_level, &mut chunks);
995
+ old_child_count = 1;
996
+
997
+ let Some(frame) = path.pop() else {
998
+ let mut summaries = replacement_children;
999
+ let mut tree_height = parent_level + 1;
1000
+ while summaries.len() > 1 {
1001
+ summaries = self.build_internal_level(summaries, tree_height, &mut chunks);
1002
+ tree_height += 1;
1003
+ }
1004
+ let root = summaries.pop().ok_or_else(|| {
1005
+ LixError::new(
1006
+ "LIX_ERROR_UNKNOWN",
1007
+ "tracked-state seek-path mutation produced no root",
1008
+ )
1009
+ })?;
1010
+ let chunks = chunks.into_values().collect::<Vec<_>>();
1011
+ let chunk_bytes = chunks.iter().map(|chunk| chunk.data.len()).sum();
1012
+ let built = BuiltTree {
1013
+ root_id: TrackedStateRootId::new(root.child_hash),
1014
+ chunks,
1015
+ row_count: root.subtree_count as usize,
1016
+ tree_height,
1017
+ chunk_bytes,
1018
+ };
1019
+ return Ok(MutationApply::Applied(
1020
+ self.persist_built_tree(writes, overlay, built, commit_id)
1021
+ .await?,
1022
+ ));
1023
+ };
1024
+
1025
+ child_index = frame.child_index;
1026
+ children = frame.children;
1027
+ parent_level += 1;
1028
+ }
1029
+ }
1030
+
1031
+ async fn persist_built_tree(
1032
+ &self,
1033
+ writes: &mut StorageWriteSet,
1034
+ overlay: &mut storage::TrackedStateChunkOverlay,
1035
+ built: BuiltTree,
1036
+ commit_id: Option<&str>,
1037
+ ) -> Result<TrackedStateApplyResult, LixError> {
1038
+ overlay.stage_chunks(writes, &built.chunks);
1039
+ let persisted_root = if let Some(commit_id) = commit_id {
1040
+ storage::stage_root(writes, commit_id, &built.root_id);
1041
+ true
1042
+ } else {
1043
+ false
1044
+ };
1045
+ Ok(TrackedStateApplyResult {
1046
+ root_id: built.root_id,
1047
+ row_count: built.row_count,
1048
+ tree_height: built.tree_height,
1049
+ chunk_count: built.chunks.len(),
1050
+ chunk_bytes: built.chunk_bytes,
1051
+ persisted_root,
1052
+ })
1053
+ }
1054
+
1055
+ fn build_tree_from_entries(
1056
+ &self,
1057
+ entries: Vec<EncodedLeafEntry>,
1058
+ ) -> Result<BuiltTree, LixError> {
1059
+ let row_count = entries.len();
1060
+ let mut chunks = BTreeMap::<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>::new();
1061
+ let mut summaries = self.build_leaf_level(entries, &mut chunks);
1062
+ let mut tree_height = 1usize;
1063
+ while summaries.len() > 1 {
1064
+ summaries = self.build_internal_level(summaries, tree_height, &mut chunks);
1065
+ tree_height += 1;
1066
+ }
1067
+ let root = summaries.pop().ok_or_else(|| {
1068
+ LixError::new(
1069
+ "LIX_ERROR_UNKNOWN",
1070
+ "tracked-state tree tree build produced no root",
1071
+ )
1072
+ })?;
1073
+ let chunks = chunks.into_values().collect::<Vec<_>>();
1074
+ let chunk_bytes = chunks.iter().map(|chunk| chunk.data.len()).sum();
1075
+ Ok(BuiltTree {
1076
+ root_id: TrackedStateRootId::new(root.child_hash),
1077
+ chunks,
1078
+ row_count,
1079
+ tree_height,
1080
+ chunk_bytes,
1081
+ })
1082
+ }
1083
+
1084
+ fn build_tree_from_leaf_summaries(
1085
+ &self,
1086
+ leaf_summaries: Vec<ChildSummary>,
1087
+ mut chunks: BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1088
+ ) -> Result<BuiltTree, LixError> {
1089
+ let row_count = leaf_summaries
1090
+ .iter()
1091
+ .map(|summary| summary.subtree_count as usize)
1092
+ .sum();
1093
+ let mut summaries = leaf_summaries;
1094
+ let mut tree_height = 1usize;
1095
+ while summaries.len() > 1 {
1096
+ summaries = self.build_internal_level(summaries, tree_height, &mut chunks);
1097
+ tree_height += 1;
1098
+ }
1099
+ let root = summaries.pop().ok_or_else(|| {
1100
+ LixError::new(
1101
+ "LIX_ERROR_UNKNOWN",
1102
+ "tracked-state tree build from leaves produced no root",
1103
+ )
1104
+ })?;
1105
+ let chunks = chunks.into_values().collect::<Vec<_>>();
1106
+ let chunk_bytes = chunks.iter().map(|chunk| chunk.data.len()).sum();
1107
+ Ok(BuiltTree {
1108
+ root_id: TrackedStateRootId::new(root.child_hash),
1109
+ chunks,
1110
+ row_count,
1111
+ tree_height,
1112
+ chunk_bytes,
1113
+ })
1114
+ }
1115
+
1116
+ fn build_tree_from_leaf_patch(
1117
+ &self,
1118
+ levels: &[Vec<ChildSummary>],
1119
+ leaf_start: usize,
1120
+ old_leaf_count: usize,
1121
+ replacement_leaves: Vec<ChildSummary>,
1122
+ mut chunks: BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1123
+ mutation_key: &[u8],
1124
+ ) -> Result<BuiltTree, LixError> {
1125
+ if levels.len() <= 1 {
1126
+ let mut leaves = levels.first().cloned().unwrap_or_default();
1127
+ leaves.splice(leaf_start..leaf_start + old_leaf_count, replacement_leaves);
1128
+ return self.build_tree_from_leaf_summaries(leaves, chunks);
1129
+ }
1130
+
1131
+ let mut child_start = leaf_start;
1132
+ let mut old_child_count = old_leaf_count;
1133
+ let mut replacement_children = replacement_leaves;
1134
+
1135
+ for level in 0..levels.len() - 1 {
1136
+ let patch = self.patch_parent_level(
1137
+ &levels[level],
1138
+ &levels[level + 1],
1139
+ child_start,
1140
+ old_child_count,
1141
+ replacement_children,
1142
+ level + 1,
1143
+ &mut chunks,
1144
+ mutation_key,
1145
+ )?;
1146
+ child_start = patch.parent_start;
1147
+ old_child_count = patch.old_parent_count;
1148
+ replacement_children = patch.replacement_parents;
1149
+ }
1150
+
1151
+ let mut summaries = replacement_children;
1152
+ let mut tree_height = levels.len();
1153
+ while summaries.len() > 1 {
1154
+ summaries = self.build_internal_level(summaries, tree_height, &mut chunks);
1155
+ tree_height += 1;
1156
+ }
1157
+ let root = summaries.pop().ok_or_else(|| {
1158
+ LixError::new(
1159
+ "LIX_ERROR_UNKNOWN",
1160
+ "tracked-state patched tree produced no root",
1161
+ )
1162
+ })?;
1163
+ let chunks = chunks.into_values().collect::<Vec<_>>();
1164
+ let chunk_bytes = chunks.iter().map(|chunk| chunk.data.len()).sum();
1165
+ Ok(BuiltTree {
1166
+ root_id: TrackedStateRootId::new(root.child_hash),
1167
+ chunks,
1168
+ row_count: root.subtree_count as usize,
1169
+ tree_height,
1170
+ chunk_bytes,
1171
+ })
1172
+ }
1173
+
1174
+ fn patch_parent_level(
1175
+ &self,
1176
+ old_children: &[ChildSummary],
1177
+ old_parents: &[ChildSummary],
1178
+ child_start: usize,
1179
+ old_child_count: usize,
1180
+ replacement_children: Vec<ChildSummary>,
1181
+ parent_level: usize,
1182
+ chunks: &mut BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1183
+ mutation_key: &[u8],
1184
+ ) -> Result<ParentLevelPatch, LixError> {
1185
+ if old_parents.is_empty() {
1186
+ return Ok(ParentLevelPatch {
1187
+ parent_start: 0,
1188
+ old_parent_count: 0,
1189
+ replacement_parents: self.build_internal_level(
1190
+ replacement_children,
1191
+ parent_level,
1192
+ chunks,
1193
+ ),
1194
+ });
1195
+ }
1196
+
1197
+ let parent_start = parent_index_for_child_index(old_children, old_parents, child_start);
1198
+ let parent_child_range = child_range_for_parent(old_children, &old_parents[parent_start])?;
1199
+ let old_child_end = child_start + old_child_count;
1200
+ let parent_end = if old_child_count == 0 {
1201
+ parent_start
1202
+ } else {
1203
+ parent_index_for_child_index(old_children, old_parents, old_child_end - 1)
1204
+ };
1205
+ let parent_end_child_range =
1206
+ child_range_for_parent(old_children, &old_parents[parent_end])?;
1207
+ let mut window_children = Vec::new();
1208
+ window_children.extend(
1209
+ old_children[parent_child_range.start..child_start]
1210
+ .iter()
1211
+ .map(ChildSummary::as_ref),
1212
+ );
1213
+ window_children.extend(replacement_children.iter().map(ChildSummary::as_ref));
1214
+ window_children.extend(
1215
+ old_children[old_child_end..parent_end_child_range.end]
1216
+ .iter()
1217
+ .map(ChildSummary::as_ref),
1218
+ );
1219
+ let mut next_parent_index = parent_end + 1;
1220
+
1221
+ loop {
1222
+ let mut candidate_chunks = BTreeMap::new();
1223
+ let candidate_parents = self.build_internal_level_from_refs(
1224
+ window_children.iter().copied(),
1225
+ parent_level,
1226
+ &mut candidate_chunks,
1227
+ );
1228
+
1229
+ if let Some((generated_resync_index, existing_resync_index)) = first_resync_index(
1230
+ &candidate_parents,
1231
+ &old_parents[parent_start..],
1232
+ mutation_key,
1233
+ ) {
1234
+ for summary in &candidate_parents[..generated_resync_index] {
1235
+ if let Some(chunk) = candidate_chunks.remove(&summary.child_hash) {
1236
+ chunks.entry(chunk.hash).or_insert(chunk);
1237
+ }
1238
+ }
1239
+ return Ok(ParentLevelPatch {
1240
+ parent_start,
1241
+ old_parent_count: existing_resync_index,
1242
+ replacement_parents: candidate_parents
1243
+ .into_iter()
1244
+ .take(generated_resync_index)
1245
+ .collect(),
1246
+ });
1247
+ }
1248
+
1249
+ if next_parent_index >= old_parents.len() {
1250
+ chunks.extend(candidate_chunks);
1251
+ return Ok(ParentLevelPatch {
1252
+ parent_start,
1253
+ old_parent_count: old_parents.len() - parent_start,
1254
+ replacement_parents: candidate_parents,
1255
+ });
1256
+ }
1257
+
1258
+ let next_range = child_range_for_parent(old_children, &old_parents[next_parent_index])?;
1259
+ window_children.extend(old_children[next_range].iter().map(ChildSummary::as_ref));
1260
+ next_parent_index += 1;
1261
+ }
1262
+ }
1263
+
1264
+ fn build_leaf_level(
1265
+ &self,
1266
+ entries: Vec<EncodedLeafEntry>,
1267
+ chunks: &mut BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1268
+ ) -> Vec<ChildSummary> {
1269
+ let groups = chunk_leaf_entries(entries, &self.options);
1270
+ groups
1271
+ .into_iter()
1272
+ .map(|group| {
1273
+ let subtree_count = group.entries.len() as u64;
1274
+ let first_key = group
1275
+ .entries
1276
+ .first()
1277
+ .map(|entry| entry.key.clone())
1278
+ .unwrap_or_default();
1279
+ let last_key = group
1280
+ .entries
1281
+ .last()
1282
+ .map(|entry| entry.key.clone())
1283
+ .unwrap_or_default();
1284
+ let node = encode_leaf_node(&group.entries);
1285
+ let (chunk, summary) =
1286
+ child_summary_from_node(node, first_key, last_key, subtree_count);
1287
+ chunks.entry(chunk.hash).or_insert(chunk);
1288
+ summary
1289
+ })
1290
+ .collect()
1291
+ }
1292
+
1293
+ fn build_leaf_level_from_refs<'a>(
1294
+ &self,
1295
+ entries: impl IntoIterator<Item = EncodedLeafEntryRef<'a>>,
1296
+ chunks: &mut BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1297
+ ) -> Vec<ChildSummary> {
1298
+ let groups = chunk_leaf_entry_refs(entries, &self.options);
1299
+ groups
1300
+ .into_iter()
1301
+ .map(|group| {
1302
+ let subtree_count = group.entries.len() as u64;
1303
+ let first_key = group
1304
+ .entries
1305
+ .first()
1306
+ .map(|entry| entry.key.to_vec())
1307
+ .unwrap_or_default();
1308
+ let last_key = group
1309
+ .entries
1310
+ .last()
1311
+ .map(|entry| entry.key.to_vec())
1312
+ .unwrap_or_default();
1313
+ let node = encode_leaf_node_refs(&group.entries);
1314
+ let (chunk, summary) =
1315
+ child_summary_from_node(node, first_key, last_key, subtree_count);
1316
+ chunks.entry(chunk.hash).or_insert(chunk);
1317
+ summary
1318
+ })
1319
+ .collect()
1320
+ }
1321
+
1322
+ fn build_internal_level(
1323
+ &self,
1324
+ children: Vec<ChildSummary>,
1325
+ level: usize,
1326
+ chunks: &mut BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1327
+ ) -> Vec<ChildSummary> {
1328
+ let groups = chunk_internal_entries(children, &self.options, level);
1329
+ groups
1330
+ .into_iter()
1331
+ .map(|group| {
1332
+ let subtree_count = group.children.iter().map(|child| child.subtree_count).sum();
1333
+ let first_key = group
1334
+ .children
1335
+ .first()
1336
+ .map(|child| child.first_key.clone())
1337
+ .unwrap_or_default();
1338
+ let last_key = group
1339
+ .children
1340
+ .last()
1341
+ .map(|child| child.last_key.clone())
1342
+ .unwrap_or_default();
1343
+ let node = encode_internal_node(&group.children);
1344
+ let (chunk, summary) =
1345
+ child_summary_from_node(node, first_key, last_key, subtree_count);
1346
+ chunks.entry(chunk.hash).or_insert(chunk);
1347
+ summary
1348
+ })
1349
+ .collect()
1350
+ }
1351
+
1352
+ fn build_internal_level_from_refs<'a>(
1353
+ &self,
1354
+ children: impl IntoIterator<Item = ChildSummaryRef<'a>>,
1355
+ level: usize,
1356
+ chunks: &mut BTreeMap<[u8; TRACKED_STATE_HASH_BYTES], PendingChunkWrite>,
1357
+ ) -> Vec<ChildSummary> {
1358
+ let groups = chunk_internal_entry_refs(children, &self.options, level);
1359
+ groups
1360
+ .into_iter()
1361
+ .map(|group| {
1362
+ let subtree_count = group.children.iter().map(|child| child.subtree_count).sum();
1363
+ let first_key = group
1364
+ .children
1365
+ .first()
1366
+ .map(|child| child.first_key.to_vec())
1367
+ .unwrap_or_default();
1368
+ let last_key = group
1369
+ .children
1370
+ .last()
1371
+ .map(|child| child.last_key.to_vec())
1372
+ .unwrap_or_default();
1373
+ let node = encode_internal_node_refs(&group.children);
1374
+ let (chunk, summary) =
1375
+ child_summary_from_node(node, first_key, last_key, subtree_count);
1376
+ chunks.entry(chunk.hash).or_insert(chunk);
1377
+ summary
1378
+ })
1379
+ .collect()
1380
+ }
1381
+
1382
+ async fn collect_leaf_entries(
1383
+ &self,
1384
+ store: &mut (impl StorageReader + ?Sized),
1385
+ root_id: &TrackedStateRootId,
1386
+ ) -> Result<Vec<EncodedLeafEntry>, LixError> {
1387
+ let mut out = Vec::new();
1388
+ let mut current = vec![*root_id.as_bytes()];
1389
+ while !current.is_empty() {
1390
+ let mut next = Vec::new();
1391
+ for hash in current {
1392
+ match self.load_node(store, &hash).await? {
1393
+ DecodedNode::Leaf(leaf) => out.extend(leaf.entries().iter().cloned()),
1394
+ DecodedNode::Internal(internal) => {
1395
+ next.extend(internal.children().iter().map(|child| child.child_hash));
1396
+ }
1397
+ }
1398
+ }
1399
+ current = next;
1400
+ }
1401
+ Ok(out)
1402
+ }
1403
+
1404
+ async fn collect_filtered_entries(
1405
+ &self,
1406
+ store: &mut impl StorageReader,
1407
+ root_id: &TrackedStateRootId,
1408
+ request: &TrackedStateTreeScanRequest,
1409
+ ) -> Result<Vec<(TrackedStateKey, TrackedStateIndexValue)>, LixError> {
1410
+ self.scan(store, root_id, request).await
1411
+ }
1412
+
1413
+ fn scan_node<'a, S>(
1414
+ &'a self,
1415
+ store: &'a mut S,
1416
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
1417
+ request: &'a TrackedStateTreeScanRequest,
1418
+ ranges: &'a [EncodedScanRange],
1419
+ key_decode_hint: Option<ScanKeyDecodeHint<'a>>,
1420
+ rows: &'a mut Vec<(TrackedStateKey, TrackedStateIndexValue)>,
1421
+ ) -> Pin<Box<dyn Future<Output = Result<(), LixError>> + Send + 'a>>
1422
+ where
1423
+ S: StorageReader + Send + 'a,
1424
+ {
1425
+ Box::pin(async move {
1426
+ let bytes = self.load_node_bytes(store, &hash).await?;
1427
+ match decode_node_ref(&bytes)? {
1428
+ DecodedNodeRef::Leaf(leaf) => {
1429
+ for index in 0..leaf.len() {
1430
+ if scan_limit_reached(request, rows.len()) {
1431
+ break;
1432
+ }
1433
+ let entry = leaf.entry(index)?.ok_or_else(|| {
1434
+ LixError::new(
1435
+ "LIX_ERROR_UNKNOWN",
1436
+ "tracked-state leaf entry disappeared during scan",
1437
+ )
1438
+ })?;
1439
+ if !encoded_key_in_scan_ranges(entry.key, ranges) {
1440
+ continue;
1441
+ }
1442
+ let key = match key_decode_hint {
1443
+ Some(hint) => decode_key_with_trusted_prefix(
1444
+ entry.key,
1445
+ hint.schema_key,
1446
+ hint.file_id,
1447
+ hint.prefix_len,
1448
+ )?,
1449
+ None => decode_key(entry.key)?,
1450
+ };
1451
+ if key_decode_hint.is_none() && !key_matches_scan_filters(request, &key) {
1452
+ continue;
1453
+ }
1454
+ let Some(value) =
1455
+ decode_visible_value(entry.value, request.include_tombstones)?
1456
+ else {
1457
+ continue;
1458
+ };
1459
+ if key_decode_hint.is_some() || request.matches(&key, &value) {
1460
+ rows.push((key, value));
1461
+ }
1462
+ }
1463
+ }
1464
+ DecodedNodeRef::Internal(internal) => {
1465
+ for child in internal.children() {
1466
+ if scan_limit_reached(request, rows.len()) {
1467
+ break;
1468
+ }
1469
+ if child_summary_overlaps_scan_ranges(child, ranges) {
1470
+ self.scan_node(
1471
+ store,
1472
+ child.child_hash,
1473
+ request,
1474
+ ranges,
1475
+ key_decode_hint,
1476
+ rows,
1477
+ )
1478
+ .await?;
1479
+ }
1480
+ }
1481
+ }
1482
+ }
1483
+ Ok(())
1484
+ })
1485
+ }
1486
+
1487
+ fn get_many_node<'a, S>(
1488
+ &'a self,
1489
+ store: &'a mut S,
1490
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
1491
+ encoded_keys: &'a [(usize, Vec<u8>)],
1492
+ values: &'a mut [Option<TrackedStateIndexValue>],
1493
+ ) -> Pin<Box<dyn Future<Output = Result<(), LixError>> + Send + 'a>>
1494
+ where
1495
+ S: StorageReader + Send + 'a,
1496
+ {
1497
+ Box::pin(async move {
1498
+ if encoded_keys.is_empty() {
1499
+ return Ok(());
1500
+ }
1501
+
1502
+ let bytes = self.load_node_bytes(store, &hash).await?;
1503
+ match decode_node_ref(&bytes)? {
1504
+ DecodedNodeRef::Leaf(leaf) => {
1505
+ for (original_index, encoded_key) in encoded_keys {
1506
+ if let Some(entry_index) = binary_search_leaf_key(&leaf, encoded_key)? {
1507
+ let entry = leaf.entry(entry_index)?.ok_or_else(|| {
1508
+ LixError::new(
1509
+ "LIX_ERROR_UNKNOWN",
1510
+ "tracked-state leaf entry disappeared during get_many",
1511
+ )
1512
+ })?;
1513
+ values[*original_index] = Some(decode_value(entry.value)?);
1514
+ }
1515
+ }
1516
+ }
1517
+ DecodedNodeRef::Internal(internal) => {
1518
+ let mut start = 0usize;
1519
+ let children = internal.children();
1520
+ for (child_index, child) in children.iter().enumerate() {
1521
+ if start >= encoded_keys.len() {
1522
+ break;
1523
+ }
1524
+
1525
+ let mut end = start;
1526
+ if child_index + 1 == children.len() {
1527
+ end = encoded_keys.len();
1528
+ } else {
1529
+ while end < encoded_keys.len()
1530
+ && encoded_keys[end].1.as_slice() <= child.last_key.as_slice()
1531
+ {
1532
+ end += 1;
1533
+ }
1534
+ }
1535
+
1536
+ if start < end {
1537
+ self.get_many_node(
1538
+ store,
1539
+ child.child_hash,
1540
+ &encoded_keys[start..end],
1541
+ values,
1542
+ )
1543
+ .await?;
1544
+ }
1545
+ start = end;
1546
+ }
1547
+ }
1548
+ }
1549
+ Ok(())
1550
+ })
1551
+ }
1552
+
1553
+ fn count_matching_keys_node<'a, S>(
1554
+ &'a self,
1555
+ store: &'a mut S,
1556
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
1557
+ request: &'a TrackedStateTreeScanRequest,
1558
+ ranges: &'a [EncodedScanRange],
1559
+ ) -> Pin<Box<dyn Future<Output = Result<usize, LixError>> + Send + 'a>>
1560
+ where
1561
+ S: StorageReader + Send + 'a,
1562
+ {
1563
+ Box::pin(async move {
1564
+ let mut count = 0usize;
1565
+ match self.load_node(store, &hash).await? {
1566
+ DecodedNode::Leaf(leaf) => {
1567
+ for entry in leaf.entries() {
1568
+ if !encoded_key_in_scan_ranges(&entry.key, ranges) {
1569
+ continue;
1570
+ }
1571
+ let key = decode_key(&entry.key)?;
1572
+ if key_matches_scan_filters(request, &key) {
1573
+ count += 1;
1574
+ }
1575
+ }
1576
+ }
1577
+ DecodedNode::Internal(internal) => {
1578
+ for child in internal.children() {
1579
+ if child_summary_contained_by_scan_ranges(child, ranges)
1580
+ && request.entity_ids.is_empty()
1581
+ {
1582
+ count += child.subtree_count as usize;
1583
+ } else if child_summary_overlaps_scan_ranges(child, ranges) {
1584
+ count += self
1585
+ .count_matching_keys_node(store, child.child_hash, request, ranges)
1586
+ .await?;
1587
+ }
1588
+ }
1589
+ }
1590
+ }
1591
+ Ok(count)
1592
+ })
1593
+ }
1594
+
1595
+ async fn collect_entries_from_leaf_summaries(
1596
+ &self,
1597
+ store: &mut impl StorageReader,
1598
+ leaves: &[ChildSummary],
1599
+ ) -> Result<Vec<EncodedLeafEntry>, LixError> {
1600
+ let mut entries = Vec::new();
1601
+ for leaf in leaves {
1602
+ entries.extend(self.load_leaf_entries(store, &leaf.child_hash).await?);
1603
+ }
1604
+ Ok(entries)
1605
+ }
1606
+
1607
+ async fn collect_summary_levels_with_overlay(
1608
+ &self,
1609
+ store: &mut (impl StorageReader + ?Sized),
1610
+ overlay: &storage::TrackedStateChunkOverlay,
1611
+ root_id: &TrackedStateRootId,
1612
+ ) -> Result<Vec<Vec<ChildSummary>>, LixError> {
1613
+ let mut levels = Vec::new();
1614
+ self.collect_summary_levels_for_node_with_overlay(
1615
+ store,
1616
+ overlay,
1617
+ *root_id.as_bytes(),
1618
+ &mut levels,
1619
+ )
1620
+ .await?;
1621
+ Ok(levels)
1622
+ }
1623
+
1624
+ fn collect_summary_levels_for_node_with_overlay<'a, S>(
1625
+ &'a self,
1626
+ store: &'a mut S,
1627
+ overlay: &'a storage::TrackedStateChunkOverlay,
1628
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
1629
+ levels: &'a mut Vec<Vec<ChildSummary>>,
1630
+ ) -> Pin<Box<dyn Future<Output = Result<(ChildSummary, usize), LixError>> + 'a>>
1631
+ where
1632
+ S: StorageReader + ?Sized + 'a,
1633
+ {
1634
+ Box::pin(async move {
1635
+ match self.load_node_with_overlay(store, overlay, &hash).await? {
1636
+ DecodedNode::Leaf(leaf) => {
1637
+ let summary = leaf_summary(hash, leaf.entries());
1638
+ push_level_summary(levels, 0, summary.clone());
1639
+ Ok((summary, 0))
1640
+ }
1641
+ DecodedNode::Internal(internal) => {
1642
+ let children = internal.children().to_vec();
1643
+ let child_height = match children.first() {
1644
+ Some(child) => match self
1645
+ .load_node_with_overlay(store, overlay, &child.child_hash)
1646
+ .await?
1647
+ {
1648
+ DecodedNode::Leaf(_) => {
1649
+ if levels.is_empty() {
1650
+ levels.push(Vec::new());
1651
+ }
1652
+ levels[0].extend(children.iter().cloned());
1653
+ 0
1654
+ }
1655
+ DecodedNode::Internal(_) => {
1656
+ let mut child_height = None;
1657
+ for child in &children {
1658
+ let (_, height) = self
1659
+ .collect_summary_levels_for_node_with_overlay(
1660
+ store,
1661
+ overlay,
1662
+ child.child_hash,
1663
+ levels,
1664
+ )
1665
+ .await?;
1666
+ child_height = Some(height);
1667
+ }
1668
+ child_height.unwrap_or(0)
1669
+ }
1670
+ },
1671
+ None => 0,
1672
+ };
1673
+ let height = child_height + 1;
1674
+ let summary = internal_summary(hash, &children)?;
1675
+ push_level_summary(levels, height, summary.clone());
1676
+ Ok((summary, height))
1677
+ }
1678
+ }
1679
+ })
1680
+ }
1681
+
1682
+ async fn load_leaf_entries(
1683
+ &self,
1684
+ store: &mut (impl StorageReader + ?Sized),
1685
+ hash: &[u8; TRACKED_STATE_HASH_BYTES],
1686
+ ) -> Result<Vec<EncodedLeafEntry>, LixError> {
1687
+ match self.load_node(store, hash).await? {
1688
+ DecodedNode::Leaf(leaf) => Ok(leaf.entries().to_vec()),
1689
+ DecodedNode::Internal(_) => Err(LixError::new(
1690
+ "LIX_ERROR_UNKNOWN",
1691
+ "tracked-state expected leaf chunk but found internal node",
1692
+ )),
1693
+ }
1694
+ }
1695
+
1696
+ async fn load_leaf_entries_with_overlay(
1697
+ &self,
1698
+ store: &mut (impl StorageReader + ?Sized),
1699
+ overlay: &storage::TrackedStateChunkOverlay,
1700
+ hash: &[u8; TRACKED_STATE_HASH_BYTES],
1701
+ ) -> Result<Vec<EncodedLeafEntry>, LixError> {
1702
+ match self.load_node_with_overlay(store, overlay, hash).await? {
1703
+ DecodedNode::Leaf(leaf) => Ok(leaf.entries().to_vec()),
1704
+ DecodedNode::Internal(_) => Err(LixError::new(
1705
+ "LIX_ERROR_UNKNOWN",
1706
+ "tracked-state expected leaf chunk but found internal node",
1707
+ )),
1708
+ }
1709
+ }
1710
+
1711
+ async fn load_node(
1712
+ &self,
1713
+ store: &mut (impl StorageReader + ?Sized),
1714
+ hash: &[u8; TRACKED_STATE_HASH_BYTES],
1715
+ ) -> Result<DecodedNode, LixError> {
1716
+ let bytes = self.load_node_bytes(store, hash).await?;
1717
+ decode_node(&bytes)
1718
+ }
1719
+
1720
+ async fn load_node_bytes(
1721
+ &self,
1722
+ store: &mut (impl StorageReader + ?Sized),
1723
+ hash: &[u8; TRACKED_STATE_HASH_BYTES],
1724
+ ) -> Result<Vec<u8>, LixError> {
1725
+ let bytes = storage::read_chunk(store, hash).await?.ok_or_else(|| {
1726
+ LixError::new("LIX_ERROR_UNKNOWN", "tracked-state tree chunk is missing")
1727
+ })?;
1728
+ storage::verify_chunk_hash(hash, &bytes)?;
1729
+ Ok(bytes)
1730
+ }
1731
+
1732
+ async fn load_node_with_overlay(
1733
+ &self,
1734
+ store: &mut (impl StorageReader + ?Sized),
1735
+ overlay: &storage::TrackedStateChunkOverlay,
1736
+ hash: &[u8; TRACKED_STATE_HASH_BYTES],
1737
+ ) -> Result<DecodedNode, LixError> {
1738
+ let bytes = overlay.read_chunk(store, hash).await?.ok_or_else(|| {
1739
+ LixError::new("LIX_ERROR_UNKNOWN", "tracked-state tree chunk is missing")
1740
+ })?;
1741
+ storage::verify_chunk_hash(hash, &bytes)?;
1742
+ decode_node(&bytes)
1743
+ }
1744
+ }
1745
+
1746
+ #[derive(Debug)]
1747
+ struct BuiltTree {
1748
+ root_id: TrackedStateRootId,
1749
+ chunks: Vec<PendingChunkWrite>,
1750
+ row_count: usize,
1751
+ tree_height: usize,
1752
+ chunk_bytes: usize,
1753
+ }
1754
+
1755
+ struct ParentLevelPatch {
1756
+ parent_start: usize,
1757
+ old_parent_count: usize,
1758
+ replacement_parents: Vec<ChildSummary>,
1759
+ }
1760
+
1761
+ struct SeekPathFrame {
1762
+ children: Vec<ChildSummary>,
1763
+ child_index: usize,
1764
+ }
1765
+
1766
+ #[derive(Debug, Clone)]
1767
+ struct EncodedScanRange {
1768
+ start: Vec<u8>,
1769
+ end: Option<Vec<u8>>,
1770
+ }
1771
+
1772
+ #[derive(Debug, Clone, Copy)]
1773
+ struct ScanKeyDecodeHint<'a> {
1774
+ schema_key: &'a str,
1775
+ file_id: Option<&'a str>,
1776
+ prefix_len: usize,
1777
+ }
1778
+
1779
+ fn binary_search_leaf_key(
1780
+ leaf: &DecodedLeafNodeRef<'_>,
1781
+ encoded_key: &[u8],
1782
+ ) -> Result<Option<usize>, LixError> {
1783
+ let mut low = 0usize;
1784
+ let mut high = leaf.len();
1785
+ while low < high {
1786
+ let mid = low + (high - low) / 2;
1787
+ let key = leaf.key(mid)?.ok_or_else(|| {
1788
+ LixError::new(
1789
+ "LIX_ERROR_UNKNOWN",
1790
+ "tracked-state leaf key disappeared during binary search",
1791
+ )
1792
+ })?;
1793
+ match key.cmp(encoded_key) {
1794
+ std::cmp::Ordering::Less => low = mid + 1,
1795
+ std::cmp::Ordering::Equal => return Ok(Some(mid)),
1796
+ std::cmp::Ordering::Greater => high = mid,
1797
+ }
1798
+ }
1799
+ Ok(None)
1800
+ }
1801
+
1802
+ struct LeafSummaryCursor {
1803
+ stack: Vec<LeafSummaryCursorFrame>,
1804
+ current: Option<ChildSummary>,
1805
+ }
1806
+
1807
+ struct LeafSummaryCursorFrame {
1808
+ children: Vec<ChildSummary>,
1809
+ next_index: usize,
1810
+ children_are_leaves: bool,
1811
+ }
1812
+
1813
+ impl LeafSummaryCursor {
1814
+ async fn new(
1815
+ tree: &TrackedStateTree,
1816
+ store: &mut impl StorageReader,
1817
+ root_hash: [u8; TRACKED_STATE_HASH_BYTES],
1818
+ ) -> Result<Self, LixError> {
1819
+ let mut cursor = Self {
1820
+ stack: Vec::new(),
1821
+ current: None,
1822
+ };
1823
+ match tree.load_node(store, &root_hash).await? {
1824
+ DecodedNode::Leaf(leaf) => {
1825
+ cursor.current = Some(leaf_summary(root_hash, leaf.entries()));
1826
+ }
1827
+ DecodedNode::Internal(internal) => {
1828
+ let children = internal.children().to_vec();
1829
+ let children_are_leaves =
1830
+ child_summaries_are_leaves(tree, store, &children).await?;
1831
+ cursor.stack.push(LeafSummaryCursorFrame {
1832
+ children,
1833
+ next_index: 0,
1834
+ children_are_leaves,
1835
+ });
1836
+ cursor.advance(tree, store).await?;
1837
+ }
1838
+ }
1839
+ Ok(cursor)
1840
+ }
1841
+
1842
+ fn current(&self) -> Option<&ChildSummary> {
1843
+ self.current.as_ref()
1844
+ }
1845
+
1846
+ async fn advance(
1847
+ &mut self,
1848
+ tree: &TrackedStateTree,
1849
+ store: &mut impl StorageReader,
1850
+ ) -> Result<(), LixError> {
1851
+ self.current = None;
1852
+ while let Some(frame) = self.stack.last_mut() {
1853
+ if frame.next_index >= frame.children.len() {
1854
+ self.stack.pop();
1855
+ continue;
1856
+ }
1857
+
1858
+ let next = frame.children[frame.next_index].clone();
1859
+ let next_is_leaf = frame.children_are_leaves;
1860
+ frame.next_index += 1;
1861
+ if next_is_leaf {
1862
+ self.current = Some(next);
1863
+ return Ok(());
1864
+ }
1865
+ self.descend_to_leaf(tree, store, next).await?;
1866
+ return Ok(());
1867
+ }
1868
+ Ok(())
1869
+ }
1870
+
1871
+ async fn descend_to_leaf(
1872
+ &mut self,
1873
+ tree: &TrackedStateTree,
1874
+ store: &mut impl StorageReader,
1875
+ mut summary: ChildSummary,
1876
+ ) -> Result<(), LixError> {
1877
+ loop {
1878
+ match tree.load_node(store, &summary.child_hash).await? {
1879
+ DecodedNode::Leaf(_) => {
1880
+ self.current = Some(summary);
1881
+ return Ok(());
1882
+ }
1883
+ DecodedNode::Internal(internal) => {
1884
+ let children = internal.children().to_vec();
1885
+ let children_are_leaves =
1886
+ child_summaries_are_leaves(tree, store, &children).await?;
1887
+ let Some(first_child) = children.first().cloned() else {
1888
+ return Err(LixError::new(
1889
+ "LIX_ERROR_UNKNOWN",
1890
+ "tracked-state internal node has no children",
1891
+ ));
1892
+ };
1893
+ self.stack.push(LeafSummaryCursorFrame {
1894
+ children,
1895
+ next_index: 1,
1896
+ children_are_leaves,
1897
+ });
1898
+ if children_are_leaves {
1899
+ self.current = Some(first_child);
1900
+ return Ok(());
1901
+ } else {
1902
+ summary = first_child;
1903
+ }
1904
+ }
1905
+ }
1906
+ }
1907
+ }
1908
+ }
1909
+
1910
+ #[derive(Debug, Default)]
1911
+ struct LeafChunkAccumulator {
1912
+ entries: Vec<EncodedLeafEntry>,
1913
+ key_bytes: usize,
1914
+ value_bytes: usize,
1915
+ }
1916
+
1917
+ #[derive(Debug, Default)]
1918
+ struct LeafChunkRefAccumulator<'a> {
1919
+ entries: Vec<EncodedLeafEntryRef<'a>>,
1920
+ key_bytes: usize,
1921
+ value_bytes: usize,
1922
+ }
1923
+
1924
+ #[derive(Debug, Default)]
1925
+ struct InternalChunkAccumulator {
1926
+ children: Vec<ChildSummary>,
1927
+ first_key_bytes: usize,
1928
+ last_key_bytes: usize,
1929
+ }
1930
+
1931
+ #[derive(Debug, Default)]
1932
+ struct InternalChunkRefAccumulator<'a> {
1933
+ children: Vec<ChildSummaryRef<'a>>,
1934
+ first_key_bytes: usize,
1935
+ last_key_bytes: usize,
1936
+ }
1937
+
1938
+ fn chunk_leaf_entries(
1939
+ entries: Vec<EncodedLeafEntry>,
1940
+ options: &TrackedStateTreeOptions,
1941
+ ) -> Vec<LeafChunkAccumulator> {
1942
+ if entries.is_empty() {
1943
+ return vec![LeafChunkAccumulator::default()];
1944
+ }
1945
+ let mut groups = Vec::new();
1946
+ let mut current = LeafChunkAccumulator::default();
1947
+ for entry in entries {
1948
+ let item_size = estimate_leaf_entry_size(entry.key.len(), entry.value.len());
1949
+ let projected_size = estimate_leaf_chunk_size(
1950
+ current.entries.len() + 1,
1951
+ current.key_bytes + entry.key.len(),
1952
+ current.value_bytes + entry.value.len(),
1953
+ );
1954
+ if !current.entries.is_empty() && projected_size > options.max_chunk_bytes {
1955
+ groups.push(std::mem::take(&mut current));
1956
+ }
1957
+
1958
+ current.key_bytes += entry.key.len();
1959
+ current.value_bytes += entry.value.len();
1960
+ current.entries.push(entry);
1961
+ let current_size = estimate_leaf_chunk_size(
1962
+ current.entries.len(),
1963
+ current.key_bytes,
1964
+ current.value_bytes,
1965
+ );
1966
+ if current_size >= options.min_chunk_bytes
1967
+ && (current_size >= options.max_chunk_bytes
1968
+ || current.entries.last().is_some_and(|entry| {
1969
+ boundary_trigger(
1970
+ &entry.key,
1971
+ 0,
1972
+ current_size,
1973
+ item_size,
1974
+ options.target_chunk_bytes,
1975
+ )
1976
+ }))
1977
+ {
1978
+ groups.push(std::mem::take(&mut current));
1979
+ }
1980
+ }
1981
+ if !current.entries.is_empty() {
1982
+ groups.push(current);
1983
+ }
1984
+ groups
1985
+ }
1986
+
1987
+ fn chunk_leaf_entry_refs<'a>(
1988
+ entries: impl IntoIterator<Item = EncodedLeafEntryRef<'a>>,
1989
+ options: &TrackedStateTreeOptions,
1990
+ ) -> Vec<LeafChunkRefAccumulator<'a>> {
1991
+ let mut iter = entries.into_iter().peekable();
1992
+ if iter.peek().is_none() {
1993
+ return vec![LeafChunkRefAccumulator::default()];
1994
+ }
1995
+ let mut groups = Vec::new();
1996
+ let mut current = LeafChunkRefAccumulator::default();
1997
+ for entry in iter {
1998
+ let item_size = estimate_leaf_entry_size(entry.key.len(), entry.value.len());
1999
+ let projected_size = estimate_leaf_chunk_size(
2000
+ current.entries.len() + 1,
2001
+ current.key_bytes + entry.key.len(),
2002
+ current.value_bytes + entry.value.len(),
2003
+ );
2004
+ if !current.entries.is_empty() && projected_size > options.max_chunk_bytes {
2005
+ groups.push(std::mem::take(&mut current));
2006
+ }
2007
+
2008
+ current.key_bytes += entry.key.len();
2009
+ current.value_bytes += entry.value.len();
2010
+ current.entries.push(entry);
2011
+ let current_size = estimate_leaf_chunk_size(
2012
+ current.entries.len(),
2013
+ current.key_bytes,
2014
+ current.value_bytes,
2015
+ );
2016
+ if current_size >= options.min_chunk_bytes
2017
+ && (current_size >= options.max_chunk_bytes
2018
+ || current.entries.last().is_some_and(|entry| {
2019
+ boundary_trigger(
2020
+ entry.key,
2021
+ 0,
2022
+ current_size,
2023
+ item_size,
2024
+ options.target_chunk_bytes,
2025
+ )
2026
+ }))
2027
+ {
2028
+ groups.push(std::mem::take(&mut current));
2029
+ }
2030
+ }
2031
+ if !current.entries.is_empty() {
2032
+ groups.push(current);
2033
+ }
2034
+ groups
2035
+ }
2036
+
2037
+ fn chunk_internal_entries(
2038
+ children: Vec<ChildSummary>,
2039
+ options: &TrackedStateTreeOptions,
2040
+ level: usize,
2041
+ ) -> Vec<InternalChunkAccumulator> {
2042
+ let mut groups = Vec::new();
2043
+ let mut current = InternalChunkAccumulator::default();
2044
+ for child in children {
2045
+ let item_size = child.first_key.len()
2046
+ + child.last_key.len()
2047
+ + TRACKED_STATE_HASH_BYTES
2048
+ + std::mem::size_of::<u64>();
2049
+ let projected_size = estimate_internal_chunk_size(
2050
+ current.children.len() + 1,
2051
+ current.first_key_bytes + child.first_key.len(),
2052
+ current.last_key_bytes + child.last_key.len(),
2053
+ );
2054
+ if !current.children.is_empty() && projected_size > options.max_chunk_bytes {
2055
+ groups.push(std::mem::take(&mut current));
2056
+ }
2057
+
2058
+ current.first_key_bytes += child.first_key.len();
2059
+ current.last_key_bytes += child.last_key.len();
2060
+ current.children.push(child);
2061
+ let current_size = estimate_internal_chunk_size(
2062
+ current.children.len(),
2063
+ current.first_key_bytes,
2064
+ current.last_key_bytes,
2065
+ );
2066
+ if current_size >= options.min_chunk_bytes
2067
+ && (current_size >= options.max_chunk_bytes
2068
+ || current.children.last().is_some_and(|child| {
2069
+ boundary_trigger(
2070
+ &child.first_key,
2071
+ level,
2072
+ current_size,
2073
+ item_size,
2074
+ options.target_chunk_bytes,
2075
+ )
2076
+ }))
2077
+ {
2078
+ groups.push(std::mem::take(&mut current));
2079
+ }
2080
+ }
2081
+ if !current.children.is_empty() {
2082
+ groups.push(current);
2083
+ }
2084
+ groups
2085
+ }
2086
+
2087
+ fn chunk_internal_entry_refs<'a>(
2088
+ children: impl IntoIterator<Item = ChildSummaryRef<'a>>,
2089
+ options: &TrackedStateTreeOptions,
2090
+ level: usize,
2091
+ ) -> Vec<InternalChunkRefAccumulator<'a>> {
2092
+ let mut groups = Vec::new();
2093
+ let mut current = InternalChunkRefAccumulator::default();
2094
+ for child in children {
2095
+ let item_size = child.first_key.len()
2096
+ + child.last_key.len()
2097
+ + TRACKED_STATE_HASH_BYTES
2098
+ + std::mem::size_of::<u64>();
2099
+ let projected_size = estimate_internal_chunk_size(
2100
+ current.children.len() + 1,
2101
+ current.first_key_bytes + child.first_key.len(),
2102
+ current.last_key_bytes + child.last_key.len(),
2103
+ );
2104
+ if !current.children.is_empty() && projected_size > options.max_chunk_bytes {
2105
+ groups.push(std::mem::take(&mut current));
2106
+ }
2107
+
2108
+ current.first_key_bytes += child.first_key.len();
2109
+ current.last_key_bytes += child.last_key.len();
2110
+ current.children.push(child);
2111
+ let current_size = estimate_internal_chunk_size(
2112
+ current.children.len(),
2113
+ current.first_key_bytes,
2114
+ current.last_key_bytes,
2115
+ );
2116
+ if current_size >= options.min_chunk_bytes
2117
+ && (current_size >= options.max_chunk_bytes
2118
+ || current.children.last().is_some_and(|child| {
2119
+ boundary_trigger(
2120
+ child.first_key,
2121
+ level,
2122
+ current_size,
2123
+ item_size,
2124
+ options.target_chunk_bytes,
2125
+ )
2126
+ }))
2127
+ {
2128
+ groups.push(std::mem::take(&mut current));
2129
+ }
2130
+ }
2131
+ if !current.children.is_empty() {
2132
+ groups.push(current);
2133
+ }
2134
+ groups
2135
+ }
2136
+
2137
+ fn estimate_leaf_chunk_size(entry_count: usize, key_bytes: usize, value_bytes: usize) -> usize {
2138
+ 10 + entry_count * 12 + key_bytes + value_bytes
2139
+ }
2140
+
2141
+ fn estimate_leaf_entry_size(key_bytes: usize, value_bytes: usize) -> usize {
2142
+ 12 + key_bytes + value_bytes
2143
+ }
2144
+
2145
+ fn estimate_internal_chunk_size(
2146
+ child_count: usize,
2147
+ first_key_bytes: usize,
2148
+ last_key_bytes: usize,
2149
+ ) -> usize {
2150
+ 16 + child_count * (8 + TRACKED_STATE_HASH_BYTES + std::mem::size_of::<u64>())
2151
+ + first_key_bytes
2152
+ + last_key_bytes
2153
+ }
2154
+
2155
+ fn first_resync_index(
2156
+ generated: &[ChildSummary],
2157
+ existing: &[ChildSummary],
2158
+ mutation_key: &[u8],
2159
+ ) -> Option<(usize, usize)> {
2160
+ for (generated_index, generated) in generated.iter().enumerate() {
2161
+ // A matching old chunk before the mutation key is only unchanged
2162
+ // prefix; resync is only valid after the mutation has been emitted.
2163
+ if generated.first_key.as_slice() <= mutation_key {
2164
+ continue;
2165
+ }
2166
+ if let Some(existing_index) = existing.iter().position(|existing| generated == existing) {
2167
+ return Some((generated_index, existing_index));
2168
+ }
2169
+ }
2170
+ None
2171
+ }
2172
+
2173
+ fn internal_boundaries_match(left: &[ChildSummary], right: &[ChildSummary]) -> bool {
2174
+ left.len() == right.len()
2175
+ && left.iter().zip(right).all(|(left, right)| {
2176
+ left.first_key == right.first_key && left.last_key == right.last_key
2177
+ })
2178
+ }
2179
+
2180
+ async fn child_summaries_are_leaves(
2181
+ tree: &TrackedStateTree,
2182
+ store: &mut impl StorageReader,
2183
+ children: &[ChildSummary],
2184
+ ) -> Result<bool, LixError> {
2185
+ let Some(first_child) = children.first() else {
2186
+ return Ok(false);
2187
+ };
2188
+ Ok(matches!(
2189
+ tree.load_node(store, &first_child.child_hash).await?,
2190
+ DecodedNode::Leaf(_)
2191
+ ))
2192
+ }
2193
+
2194
+ fn decode_entry(
2195
+ entry: &EncodedLeafEntry,
2196
+ ) -> Result<(TrackedStateKey, TrackedStateIndexValue), LixError> {
2197
+ Ok((decode_key(&entry.key)?, decode_value(&entry.value)?))
2198
+ }
2199
+
2200
+ fn parent_index_for_child_index(
2201
+ old_children: &[ChildSummary],
2202
+ old_parents: &[ChildSummary],
2203
+ child_index: usize,
2204
+ ) -> usize {
2205
+ let key = if child_index < old_children.len() {
2206
+ old_children[child_index].first_key.as_slice()
2207
+ } else {
2208
+ old_children
2209
+ .last()
2210
+ .map(|child| child.last_key.as_slice())
2211
+ .unwrap_or_default()
2212
+ };
2213
+ old_parents
2214
+ .iter()
2215
+ .position(|parent| parent.last_key.as_slice() >= key)
2216
+ .unwrap_or_else(|| old_parents.len().saturating_sub(1))
2217
+ }
2218
+
2219
+ fn child_range_for_parent(
2220
+ old_children: &[ChildSummary],
2221
+ parent: &ChildSummary,
2222
+ ) -> Result<Range<usize>, LixError> {
2223
+ let start = old_children
2224
+ .iter()
2225
+ .position(|child| child.last_key.as_slice() >= parent.first_key.as_slice())
2226
+ .ok_or_else(|| {
2227
+ LixError::new(
2228
+ "LIX_ERROR_UNKNOWN",
2229
+ "tracked-state parent summary does not overlap child summaries",
2230
+ )
2231
+ })?;
2232
+ let end = old_children[start..]
2233
+ .iter()
2234
+ .position(|child| child.last_key == parent.last_key)
2235
+ .map(|offset| start + offset + 1)
2236
+ .ok_or_else(|| {
2237
+ LixError::new(
2238
+ "LIX_ERROR_UNKNOWN",
2239
+ "tracked-state parent summary end does not match child summaries",
2240
+ )
2241
+ })?;
2242
+ Ok(start..end)
2243
+ }
2244
+
2245
+ fn leaf_summary(
2246
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
2247
+ entries: &[EncodedLeafEntry],
2248
+ ) -> ChildSummary {
2249
+ ChildSummary {
2250
+ first_key: entries
2251
+ .first()
2252
+ .map(|entry| entry.key.clone())
2253
+ .unwrap_or_default(),
2254
+ last_key: entries
2255
+ .last()
2256
+ .map(|entry| entry.key.clone())
2257
+ .unwrap_or_default(),
2258
+ child_hash: hash,
2259
+ subtree_count: entries.len() as u64,
2260
+ }
2261
+ }
2262
+
2263
+ fn internal_summary(
2264
+ hash: [u8; TRACKED_STATE_HASH_BYTES],
2265
+ children: &[ChildSummary],
2266
+ ) -> Result<ChildSummary, LixError> {
2267
+ let first_key = children
2268
+ .first()
2269
+ .map(|child| child.first_key.clone())
2270
+ .ok_or_else(|| {
2271
+ LixError::new(
2272
+ "LIX_ERROR_UNKNOWN",
2273
+ "tracked-state internal node has no children",
2274
+ )
2275
+ })?;
2276
+ let last_key = children
2277
+ .last()
2278
+ .map(|child| child.last_key.clone())
2279
+ .ok_or_else(|| {
2280
+ LixError::new(
2281
+ "LIX_ERROR_UNKNOWN",
2282
+ "tracked-state internal node has no children",
2283
+ )
2284
+ })?;
2285
+ Ok(ChildSummary {
2286
+ first_key,
2287
+ last_key,
2288
+ child_hash: hash,
2289
+ subtree_count: children.iter().map(|child| child.subtree_count).sum(),
2290
+ })
2291
+ }
2292
+
2293
+ fn push_level_summary(levels: &mut Vec<Vec<ChildSummary>>, level: usize, summary: ChildSummary) {
2294
+ while levels.len() <= level {
2295
+ levels.push(Vec::new());
2296
+ }
2297
+ levels[level].push(summary);
2298
+ }
2299
+
2300
+ fn scan_ranges(request: &TrackedStateTreeScanRequest) -> Vec<EncodedScanRange> {
2301
+ if request.schema_keys.is_empty() {
2302
+ return Vec::new();
2303
+ }
2304
+
2305
+ let can_bind_entity = !request.entity_ids.is_empty()
2306
+ && !request.file_ids.is_empty()
2307
+ && request
2308
+ .file_ids
2309
+ .iter()
2310
+ .all(|filter| !matches!(filter, NullableKeyFilter::Any));
2311
+
2312
+ let mut ranges = Vec::new();
2313
+ for schema_key in &request.schema_keys {
2314
+ if can_bind_entity {
2315
+ for file_filter in &request.file_ids {
2316
+ let file_id = match file_filter {
2317
+ NullableKeyFilter::Null => None,
2318
+ NullableKeyFilter::Value(file_id) => Some(file_id.clone()),
2319
+ NullableKeyFilter::Any => unreachable!("filtered above"),
2320
+ };
2321
+ for entity_id in &request.entity_ids {
2322
+ let key = TrackedStateKey {
2323
+ schema_key: schema_key.clone(),
2324
+ file_id: file_id.clone(),
2325
+ entity_id: entity_id.clone(),
2326
+ };
2327
+ ranges.push(exact_scan_range(encode_key(&key)));
2328
+ }
2329
+ }
2330
+ continue;
2331
+ }
2332
+
2333
+ if request.file_ids.is_empty()
2334
+ || request
2335
+ .file_ids
2336
+ .iter()
2337
+ .any(|filter| matches!(filter, NullableKeyFilter::Any))
2338
+ {
2339
+ ranges.push(prefix_scan_range(encode_schema_key_prefix(schema_key)));
2340
+ continue;
2341
+ }
2342
+
2343
+ for file_filter in &request.file_ids {
2344
+ let prefix = match file_filter {
2345
+ NullableKeyFilter::Null => encode_schema_file_prefix(schema_key, None),
2346
+ NullableKeyFilter::Value(file_id) => {
2347
+ encode_schema_file_prefix(schema_key, Some(file_id))
2348
+ }
2349
+ NullableKeyFilter::Any => unreachable!("handled above"),
2350
+ };
2351
+ ranges.push(prefix_scan_range(prefix));
2352
+ }
2353
+ }
2354
+ ranges
2355
+ }
2356
+
2357
+ fn scan_key_decode_hint<'a>(
2358
+ request: &'a TrackedStateTreeScanRequest,
2359
+ ranges: &[EncodedScanRange],
2360
+ ) -> Option<ScanKeyDecodeHint<'a>> {
2361
+ if ranges.len() != 1 || request.schema_keys.len() != 1 || request.file_ids.len() != 1 {
2362
+ return None;
2363
+ }
2364
+ if !request.entity_ids.is_empty() {
2365
+ return None;
2366
+ }
2367
+ let file_id = match request.file_ids.first()? {
2368
+ NullableKeyFilter::Null => None,
2369
+ NullableKeyFilter::Value(file_id) => Some(file_id.as_str()),
2370
+ NullableKeyFilter::Any => return None,
2371
+ };
2372
+ Some(ScanKeyDecodeHint {
2373
+ schema_key: request.schema_keys.first()?.as_str(),
2374
+ file_id,
2375
+ prefix_len: ranges.first()?.start.len(),
2376
+ })
2377
+ }
2378
+
2379
+ fn prefix_scan_range(prefix: Vec<u8>) -> EncodedScanRange {
2380
+ EncodedScanRange {
2381
+ end: lexicographic_successor(&prefix),
2382
+ start: prefix,
2383
+ }
2384
+ }
2385
+
2386
+ fn exact_scan_range(key: Vec<u8>) -> EncodedScanRange {
2387
+ EncodedScanRange {
2388
+ end: lexicographic_successor(&key),
2389
+ start: key,
2390
+ }
2391
+ }
2392
+
2393
+ fn lexicographic_successor(bytes: &[u8]) -> Option<Vec<u8>> {
2394
+ let mut out = bytes.to_vec();
2395
+ for index in (0..out.len()).rev() {
2396
+ if out[index] != u8::MAX {
2397
+ out[index] += 1;
2398
+ out.truncate(index + 1);
2399
+ return Some(out);
2400
+ }
2401
+ }
2402
+ None
2403
+ }
2404
+
2405
+ fn child_summary_overlaps_scan_ranges(child: &ChildSummary, ranges: &[EncodedScanRange]) -> bool {
2406
+ ranges.is_empty()
2407
+ || ranges.iter().any(|range| {
2408
+ child.last_key.as_slice() >= range.start.as_slice()
2409
+ && range
2410
+ .end
2411
+ .as_ref()
2412
+ .is_none_or(|end| child.first_key.as_slice() < end.as_slice())
2413
+ })
2414
+ }
2415
+
2416
+ fn child_summary_contained_by_scan_ranges(
2417
+ child: &ChildSummary,
2418
+ ranges: &[EncodedScanRange],
2419
+ ) -> bool {
2420
+ ranges.is_empty()
2421
+ || ranges.iter().any(|range| {
2422
+ child.first_key.as_slice() >= range.start.as_slice()
2423
+ && range
2424
+ .end
2425
+ .as_ref()
2426
+ .is_none_or(|end| child.last_key.as_slice() < end.as_slice())
2427
+ })
2428
+ }
2429
+
2430
+ fn encoded_key_in_scan_ranges(key: &[u8], ranges: &[EncodedScanRange]) -> bool {
2431
+ ranges.is_empty()
2432
+ || ranges.iter().any(|range| {
2433
+ key >= range.start.as_slice()
2434
+ && range.end.as_ref().is_none_or(|end| key < end.as_slice())
2435
+ })
2436
+ }
2437
+
2438
+ fn key_matches_scan_filters(request: &TrackedStateTreeScanRequest, key: &TrackedStateKey) -> bool {
2439
+ if !request.schema_keys.is_empty() && !request.schema_keys.contains(&key.schema_key) {
2440
+ return false;
2441
+ }
2442
+ if !request.entity_ids.is_empty() && !request.entity_ids.contains(&key.entity_id) {
2443
+ return false;
2444
+ }
2445
+ if !request.file_ids.is_empty()
2446
+ && !request
2447
+ .file_ids
2448
+ .iter()
2449
+ .any(|filter| filter.matches(key.file_id.as_ref()))
2450
+ {
2451
+ return false;
2452
+ }
2453
+ true
2454
+ }
2455
+
2456
+ fn scan_limit_reached(request: &TrackedStateTreeScanRequest, row_count: usize) -> bool {
2457
+ request.limit.is_some_and(|limit| row_count >= limit)
2458
+ }
2459
+
2460
+ #[cfg(test)]
2461
+ mod tests {
2462
+ use std::sync::Arc;
2463
+
2464
+ use super::*;
2465
+ use crate::backend::testing::UnitTestBackend;
2466
+ use crate::entity_identity::EntityIdentity;
2467
+ use crate::storage::{StorageContext, StorageWriteTransaction};
2468
+ use crate::tracked_state::codec::encode_value;
2469
+
2470
+ #[tokio::test]
2471
+ async fn exact_read_roundtrips_from_stored_root() {
2472
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2473
+ let tree = TrackedStateTree::new();
2474
+ let key = key("schema", None, "entity");
2475
+ let value = value("change-1", Some("{}"));
2476
+
2477
+ let mut transaction = storage
2478
+ .begin_write_transaction()
2479
+ .await
2480
+ .expect("transaction should open");
2481
+ let result = apply_mutations_for_test(
2482
+ &tree,
2483
+ transaction.as_mut(),
2484
+ None,
2485
+ vec![mutation(&key, &value)],
2486
+ Some("commit-1"),
2487
+ )
2488
+ .await
2489
+ .expect("mutations should apply");
2490
+ transaction
2491
+ .commit()
2492
+ .await
2493
+ .expect("transaction should commit");
2494
+
2495
+ let mut store = storage.clone();
2496
+ assert_eq!(
2497
+ tree.load_root(&mut store, "commit-1")
2498
+ .await
2499
+ .expect("root should load"),
2500
+ Some(result.root_id.clone())
2501
+ );
2502
+ assert_eq!(
2503
+ tree.get(&mut store, &result.root_id, &key)
2504
+ .await
2505
+ .expect("row should load"),
2506
+ Some(value)
2507
+ );
2508
+ }
2509
+
2510
+ #[tokio::test]
2511
+ async fn latest_mutation_for_key_wins() {
2512
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2513
+ let tree = TrackedStateTree::new();
2514
+ let key = key("schema", None, "entity");
2515
+ let old_value = value("change-old", Some("{\"v\":1}"));
2516
+ let new_value = value("change-new", Some("{\"v\":2}"));
2517
+
2518
+ let mut transaction = storage
2519
+ .begin_write_transaction()
2520
+ .await
2521
+ .expect("transaction should open");
2522
+ let result = apply_mutations_for_test(
2523
+ &tree,
2524
+ transaction.as_mut(),
2525
+ None,
2526
+ vec![mutation(&key, &old_value), mutation(&key, &new_value)],
2527
+ None,
2528
+ )
2529
+ .await
2530
+ .expect("mutations should apply");
2531
+ transaction
2532
+ .commit()
2533
+ .await
2534
+ .expect("transaction should commit");
2535
+
2536
+ let mut store = storage.clone();
2537
+ let loaded = tree
2538
+ .get(&mut store, &result.root_id, &key)
2539
+ .await
2540
+ .expect("row should load")
2541
+ .expect("row should exist");
2542
+ assert_eq!(loaded.change_locator.change_id, "change-new");
2543
+ assert_eq!(loaded.change_locator.source_commit_id, "commit");
2544
+ }
2545
+
2546
+ #[tokio::test]
2547
+ async fn scan_filters_by_index_key_without_materializing_tombstones() {
2548
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2549
+ let tree = TrackedStateTree::new();
2550
+
2551
+ let mut transaction = storage
2552
+ .begin_write_transaction()
2553
+ .await
2554
+ .expect("transaction should open");
2555
+ let result = apply_mutations_for_test(
2556
+ &tree,
2557
+ transaction.as_mut(),
2558
+ None,
2559
+ vec![
2560
+ mutation_owned(key("schema-a", None, "visible"), value("c1", Some("{}"))),
2561
+ mutation_owned(key("schema-a", None, "deleted"), value("c2", None)),
2562
+ mutation_owned(key("schema-b", None, "other"), value("c3", Some("{}"))),
2563
+ ],
2564
+ None,
2565
+ )
2566
+ .await
2567
+ .expect("mutations should apply");
2568
+ transaction
2569
+ .commit()
2570
+ .await
2571
+ .expect("transaction should commit");
2572
+
2573
+ let mut store = storage.clone();
2574
+ let rows = tree
2575
+ .scan(
2576
+ &mut store,
2577
+ &result.root_id,
2578
+ &TrackedStateTreeScanRequest {
2579
+ schema_keys: vec!["schema-a".to_string()],
2580
+ ..Default::default()
2581
+ },
2582
+ )
2583
+ .await
2584
+ .expect("scan should succeed");
2585
+ assert_eq!(rows.len(), 2);
2586
+ let identities = rows
2587
+ .iter()
2588
+ .map(|(key, _)| key.entity_id.as_single_string_owned().expect("identity"))
2589
+ .collect::<Vec<_>>();
2590
+ assert_eq!(identities, vec!["deleted", "visible"]);
2591
+
2592
+ let live_rows = tree
2593
+ .scan(
2594
+ &mut store,
2595
+ &result.root_id,
2596
+ &TrackedStateTreeScanRequest {
2597
+ schema_keys: vec!["schema-a".to_string()],
2598
+ include_tombstones: false,
2599
+ ..Default::default()
2600
+ },
2601
+ )
2602
+ .await
2603
+ .expect("live scan should succeed");
2604
+ let live_identities = live_rows
2605
+ .iter()
2606
+ .map(|(key, _)| key.entity_id.as_single_string_owned().expect("identity"))
2607
+ .collect::<Vec<_>>();
2608
+ assert_eq!(live_identities, vec!["visible"]);
2609
+ }
2610
+
2611
+ #[tokio::test]
2612
+ async fn scan_filters_by_schema_entity_and_file() {
2613
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2614
+ let tree = TrackedStateTree::new();
2615
+
2616
+ let mut transaction = storage
2617
+ .begin_write_transaction()
2618
+ .await
2619
+ .expect("transaction should open");
2620
+ let result = apply_mutations_for_test(
2621
+ &tree,
2622
+ transaction.as_mut(),
2623
+ None,
2624
+ vec![
2625
+ mutation_owned(
2626
+ key("schema-a", Some("file-a"), "entity-a"),
2627
+ value("c1", Some("{}")),
2628
+ ),
2629
+ mutation_owned(
2630
+ key("schema-a", Some("file-b"), "entity-a"),
2631
+ value("c2", Some("{}")),
2632
+ ),
2633
+ mutation_owned(
2634
+ key("schema-a", Some("file-a"), "entity-b"),
2635
+ value("c3", Some("{}")),
2636
+ ),
2637
+ mutation_owned(
2638
+ key("schema-b", Some("file-a"), "entity-a"),
2639
+ value("c4", Some("{}")),
2640
+ ),
2641
+ ],
2642
+ None,
2643
+ )
2644
+ .await
2645
+ .expect("mutations should apply");
2646
+ transaction
2647
+ .commit()
2648
+ .await
2649
+ .expect("transaction should commit");
2650
+
2651
+ let mut store = storage.clone();
2652
+ let rows = tree
2653
+ .scan(
2654
+ &mut store,
2655
+ &result.root_id,
2656
+ &TrackedStateTreeScanRequest {
2657
+ schema_keys: vec!["schema-a".to_string()],
2658
+ entity_ids: vec![crate::entity_identity::EntityIdentity::single("entity-a")],
2659
+ file_ids: vec![crate::NullableKeyFilter::Value("file-a".to_string())],
2660
+ ..Default::default()
2661
+ },
2662
+ )
2663
+ .await
2664
+ .expect("scan should succeed");
2665
+
2666
+ assert_eq!(rows.len(), 1);
2667
+ assert_eq!(rows[0].0.schema_key, "schema-a");
2668
+ assert_eq!(
2669
+ rows[0]
2670
+ .0
2671
+ .entity_id
2672
+ .as_single_string_owned()
2673
+ .expect("identity"),
2674
+ "entity-a"
2675
+ );
2676
+ assert_eq!(rows[0].0.file_id.as_deref(), Some("file-a"));
2677
+ }
2678
+
2679
+ #[tokio::test]
2680
+ async fn scan_schema_file_prefix_honors_tombstones_and_limit() {
2681
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2682
+ let tree = TrackedStateTree::new();
2683
+
2684
+ let mut transaction = storage
2685
+ .begin_write_transaction()
2686
+ .await
2687
+ .expect("transaction should open");
2688
+ let result = apply_mutations_for_test(
2689
+ &tree,
2690
+ transaction.as_mut(),
2691
+ None,
2692
+ vec![
2693
+ mutation_owned(
2694
+ key("schema-a", Some("file-a"), "entity-a"),
2695
+ value("c1", Some("{}")),
2696
+ ),
2697
+ mutation_owned(
2698
+ key("schema-a", Some("file-a"), "entity-b"),
2699
+ value("c2", None),
2700
+ ),
2701
+ mutation_owned(
2702
+ key("schema-a", Some("file-a"), "entity-c"),
2703
+ value("c3", Some("{}")),
2704
+ ),
2705
+ mutation_owned(
2706
+ key("schema-a", Some("file-b"), "entity-d"),
2707
+ value("c4", Some("{}")),
2708
+ ),
2709
+ ],
2710
+ None,
2711
+ )
2712
+ .await
2713
+ .expect("mutations should apply");
2714
+ transaction
2715
+ .commit()
2716
+ .await
2717
+ .expect("transaction should commit");
2718
+
2719
+ let mut store = storage.clone();
2720
+ let rows = tree
2721
+ .scan(
2722
+ &mut store,
2723
+ &result.root_id,
2724
+ &TrackedStateTreeScanRequest {
2725
+ schema_keys: vec!["schema-a".to_string()],
2726
+ file_ids: vec![crate::NullableKeyFilter::Value("file-a".to_string())],
2727
+ include_tombstones: false,
2728
+ limit: Some(2),
2729
+ ..Default::default()
2730
+ },
2731
+ )
2732
+ .await
2733
+ .expect("scan should succeed");
2734
+
2735
+ assert_eq!(rows.len(), 2);
2736
+ assert!(rows.iter().all(
2737
+ |(key, _)| key.schema_key == "schema-a" && key.file_id.as_deref() == Some("file-a")
2738
+ ));
2739
+ assert_eq!(
2740
+ rows.iter()
2741
+ .map(|(key, _)| key.entity_id.as_single_string_owned().expect("identity"))
2742
+ .collect::<Vec<_>>(),
2743
+ vec!["entity-a", "entity-c"]
2744
+ );
2745
+ }
2746
+
2747
+ #[tokio::test]
2748
+ async fn applying_to_base_root_reuses_existing_rows_and_overwrites_changed_rows() {
2749
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2750
+ let tree = TrackedStateTree::new();
2751
+ let unchanged_key = key("schema", None, "unchanged");
2752
+ let changed_key = key("schema", None, "changed");
2753
+ let unchanged_value = value("c1", Some("{}"));
2754
+ let old_changed_value = value("c2", Some("{\"old\":true}"));
2755
+ let new_changed_value = value("c3", Some("{\"new\":true}"));
2756
+
2757
+ let mut transaction = storage
2758
+ .begin_write_transaction()
2759
+ .await
2760
+ .expect("transaction should open");
2761
+ let base = apply_mutations_for_test(
2762
+ &tree,
2763
+ transaction.as_mut(),
2764
+ None,
2765
+ vec![
2766
+ mutation(&unchanged_key, &unchanged_value),
2767
+ mutation(&changed_key, &old_changed_value),
2768
+ ],
2769
+ None,
2770
+ )
2771
+ .await
2772
+ .expect("base should build");
2773
+ let next = apply_mutations_for_test(
2774
+ &tree,
2775
+ transaction.as_mut(),
2776
+ Some(&base.root_id),
2777
+ vec![mutation(&changed_key, &new_changed_value)],
2778
+ None,
2779
+ )
2780
+ .await
2781
+ .expect("next should build");
2782
+ transaction
2783
+ .commit()
2784
+ .await
2785
+ .expect("transaction should commit");
2786
+
2787
+ let mut store = storage.clone();
2788
+ assert_eq!(
2789
+ tree.get(&mut store, &next.root_id, &unchanged_key)
2790
+ .await
2791
+ .expect("unchanged read")
2792
+ .expect("unchanged exists")
2793
+ .change_locator
2794
+ .change_id,
2795
+ "c1"
2796
+ );
2797
+ assert_eq!(
2798
+ tree.get(&mut store, &next.root_id, &changed_key)
2799
+ .await
2800
+ .expect("changed read")
2801
+ .expect("changed exists")
2802
+ .change_locator
2803
+ .change_id,
2804
+ "c3"
2805
+ );
2806
+ }
2807
+
2808
+ #[tokio::test]
2809
+ async fn two_commit_roots_can_share_unchanged_rows() {
2810
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2811
+ let tree = TrackedStateTree::new();
2812
+ let shared_key = key("schema", None, "shared");
2813
+ let branch_a_key = key("schema", None, "branch-a");
2814
+ let branch_b_key = key("schema", None, "branch-b");
2815
+ let shared_value = value("shared-change", Some("{\"shared\":true}"));
2816
+ let branch_a_value = value("branch-a-change", Some("{\"branch\":\"a\"}"));
2817
+ let branch_b_value = value("branch-b-change", Some("{\"branch\":\"b\"}"));
2818
+
2819
+ let mut transaction = storage
2820
+ .begin_write_transaction()
2821
+ .await
2822
+ .expect("transaction should open");
2823
+ let base = apply_mutations_for_test(
2824
+ &tree,
2825
+ transaction.as_mut(),
2826
+ None,
2827
+ vec![mutation(&shared_key, &shared_value)],
2828
+ Some("commit-base"),
2829
+ )
2830
+ .await
2831
+ .expect("base root should build");
2832
+ let branch_a = apply_mutations_for_test(
2833
+ &tree,
2834
+ transaction.as_mut(),
2835
+ Some(&base.root_id),
2836
+ vec![mutation(&branch_a_key, &branch_a_value)],
2837
+ Some("commit-a"),
2838
+ )
2839
+ .await
2840
+ .expect("branch a root should build");
2841
+ let branch_b = apply_mutations_for_test(
2842
+ &tree,
2843
+ transaction.as_mut(),
2844
+ Some(&base.root_id),
2845
+ vec![mutation(&branch_b_key, &branch_b_value)],
2846
+ Some("commit-b"),
2847
+ )
2848
+ .await
2849
+ .expect("branch b root should build");
2850
+ transaction
2851
+ .commit()
2852
+ .await
2853
+ .expect("transaction should commit");
2854
+
2855
+ assert_ne!(branch_a.root_id, branch_b.root_id);
2856
+ let mut store = storage.clone();
2857
+ assert_eq!(
2858
+ tree.get(&mut store, &branch_a.root_id, &shared_key)
2859
+ .await
2860
+ .expect("branch a shared row should load"),
2861
+ Some(value("shared-change", Some("{\"shared\":true}")))
2862
+ );
2863
+ assert_eq!(
2864
+ tree.get(&mut store, &branch_b.root_id, &shared_key)
2865
+ .await
2866
+ .expect("branch b shared row should load"),
2867
+ Some(value("shared-change", Some("{\"shared\":true}")))
2868
+ );
2869
+ assert!(tree
2870
+ .get(&mut store, &branch_a.root_id, &branch_b_key)
2871
+ .await
2872
+ .expect("branch a should read")
2873
+ .is_none());
2874
+ assert!(tree
2875
+ .get(&mut store, &branch_b.root_id, &branch_a_key)
2876
+ .await
2877
+ .expect("branch b should read")
2878
+ .is_none());
2879
+ }
2880
+
2881
+ #[tokio::test]
2882
+ async fn single_update_matches_full_canonical_rebuild() {
2883
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2884
+ let tree = TrackedStateTree::with_options(TrackedStateTreeOptions {
2885
+ target_chunk_bytes: 128,
2886
+ min_chunk_bytes: 64,
2887
+ max_chunk_bytes: 256,
2888
+ });
2889
+ let rows = (0..100)
2890
+ .map(|index| {
2891
+ mutation_owned(
2892
+ key("schema", None, &format!("entity-{index:03}")),
2893
+ value(&format!("c-{index}"), Some(&format!("{{\"v\":{index}}}"))),
2894
+ )
2895
+ })
2896
+ .collect::<Vec<_>>();
2897
+ let changed_key = key("schema", None, "entity-000");
2898
+ let changed_value = value("changed", Some("{\"v\":\"changed\"}"));
2899
+
2900
+ let mut transaction = storage
2901
+ .begin_write_transaction()
2902
+ .await
2903
+ .expect("transaction should open");
2904
+ let base = apply_mutations_for_test(&tree, transaction.as_mut(), None, rows, None)
2905
+ .await
2906
+ .expect("base should build");
2907
+ let fast = apply_mutations_for_test(
2908
+ &tree,
2909
+ transaction.as_mut(),
2910
+ Some(&base.root_id),
2911
+ vec![mutation(&changed_key, &changed_value)],
2912
+ None,
2913
+ )
2914
+ .await
2915
+ .expect("fast path should apply");
2916
+ let mut canonical_entries = tree
2917
+ .collect_leaf_entries(&mut transaction.as_mut(), &base.root_id)
2918
+ .await
2919
+ .expect("base entries should collect");
2920
+ assert!(canonical_entries
2921
+ .windows(2)
2922
+ .all(|window| window[0].key < window[1].key));
2923
+ let encoded_changed_key = encode_key(&changed_key);
2924
+ let encoded_changed_value = encode_value(&changed_value);
2925
+ let index = canonical_entries
2926
+ .binary_search_by(|entry| entry.key.as_slice().cmp(&encoded_changed_key))
2927
+ .expect("changed key should exist");
2928
+ canonical_entries[index].value = encoded_changed_value;
2929
+ let canonical = tree
2930
+ .build_tree_from_entries(canonical_entries)
2931
+ .expect("canonical root should build");
2932
+
2933
+ assert_eq!(fast.root_id, canonical.root_id);
2934
+ }
2935
+
2936
+ #[tokio::test]
2937
+ async fn single_insert_matches_full_canonical_rebuild() {
2938
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2939
+ let tree = TrackedStateTree::with_options(TrackedStateTreeOptions {
2940
+ target_chunk_bytes: 128,
2941
+ min_chunk_bytes: 64,
2942
+ max_chunk_bytes: 256,
2943
+ });
2944
+ let rows = (0..100)
2945
+ .map(|index| {
2946
+ mutation_owned(
2947
+ key("schema", None, &format!("entity-{index:03}")),
2948
+ value(&format!("c-{index}"), Some(&format!("{{\"v\":{index}}}"))),
2949
+ )
2950
+ })
2951
+ .collect::<Vec<_>>();
2952
+ let inserted_key = key("schema", None, "entity-050a");
2953
+ let inserted_value = value("inserted", Some("{\"v\":\"inserted\"}"));
2954
+
2955
+ let mut transaction = storage
2956
+ .begin_write_transaction()
2957
+ .await
2958
+ .expect("transaction should open");
2959
+ let base = apply_mutations_for_test(&tree, transaction.as_mut(), None, rows, None)
2960
+ .await
2961
+ .expect("base should build");
2962
+ let fast = apply_mutations_for_test(
2963
+ &tree,
2964
+ transaction.as_mut(),
2965
+ Some(&base.root_id),
2966
+ vec![mutation(&inserted_key, &inserted_value)],
2967
+ None,
2968
+ )
2969
+ .await
2970
+ .expect("fast path should apply");
2971
+ let mut canonical_entries = tree
2972
+ .collect_leaf_entries(&mut transaction.as_mut(), &base.root_id)
2973
+ .await
2974
+ .expect("base entries should collect");
2975
+ let encoded_inserted_key = encode_key(&inserted_key);
2976
+ let encoded_inserted_value = encode_value(&inserted_value);
2977
+ let index = canonical_entries
2978
+ .binary_search_by(|entry| entry.key.as_slice().cmp(&encoded_inserted_key))
2979
+ .expect_err("inserted key should not exist");
2980
+ canonical_entries.insert(
2981
+ index,
2982
+ EncodedLeafEntry {
2983
+ key: encoded_inserted_key,
2984
+ value: encoded_inserted_value,
2985
+ },
2986
+ );
2987
+ let canonical = tree
2988
+ .build_tree_from_entries(canonical_entries)
2989
+ .expect("canonical root should build");
2990
+
2991
+ assert_eq!(fast.root_id, canonical.root_id);
2992
+ }
2993
+
2994
+ #[tokio::test]
2995
+ async fn batch_update_matches_full_canonical_rebuild() {
2996
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
2997
+ let tree = TrackedStateTree::with_options(TrackedStateTreeOptions {
2998
+ target_chunk_bytes: 128,
2999
+ min_chunk_bytes: 64,
3000
+ max_chunk_bytes: 256,
3001
+ });
3002
+ let rows = (0..100)
3003
+ .map(|index| {
3004
+ mutation_owned(
3005
+ key("schema", None, &format!("entity-{index:03}")),
3006
+ value(&format!("c-{index}"), Some(&format!("{{\"v\":{index}}}"))),
3007
+ )
3008
+ })
3009
+ .collect::<Vec<_>>();
3010
+ let updates = (10..25)
3011
+ .map(|index| {
3012
+ (
3013
+ key("schema", None, &format!("entity-{index:03}")),
3014
+ value(
3015
+ &format!("changed-{index}"),
3016
+ Some(&format!("{{\"changed\":{index}}}")),
3017
+ ),
3018
+ )
3019
+ })
3020
+ .collect::<Vec<_>>();
3021
+
3022
+ let mut transaction = storage
3023
+ .begin_write_transaction()
3024
+ .await
3025
+ .expect("transaction should open");
3026
+ let base = apply_mutations_for_test(&tree, transaction.as_mut(), None, rows, None)
3027
+ .await
3028
+ .expect("base should build");
3029
+ let fast = apply_mutations_for_test(
3030
+ &tree,
3031
+ transaction.as_mut(),
3032
+ Some(&base.root_id),
3033
+ updates
3034
+ .iter()
3035
+ .map(|(key, value)| mutation(key, value))
3036
+ .collect(),
3037
+ None,
3038
+ )
3039
+ .await
3040
+ .expect("batch path should apply");
3041
+ let mut canonical_entries = tree
3042
+ .collect_leaf_entries(&mut transaction.as_mut(), &base.root_id)
3043
+ .await
3044
+ .expect("base entries should collect");
3045
+ for (key, value) in updates {
3046
+ let encoded_key = encode_key(&key);
3047
+ let encoded_value = encode_value(&value);
3048
+ let index = canonical_entries
3049
+ .binary_search_by(|entry| entry.key.as_slice().cmp(&encoded_key))
3050
+ .expect("updated key should exist");
3051
+ canonical_entries[index].value = encoded_value;
3052
+ }
3053
+ let canonical = tree
3054
+ .build_tree_from_entries(canonical_entries)
3055
+ .expect("canonical root should build");
3056
+
3057
+ assert_eq!(fast.root_id, canonical.root_id);
3058
+ }
3059
+
3060
+ #[tokio::test]
3061
+ async fn batch_insert_matches_full_canonical_rebuild() {
3062
+ let storage = StorageContext::new(Arc::new(UnitTestBackend::new()));
3063
+ let tree = TrackedStateTree::with_options(TrackedStateTreeOptions {
3064
+ target_chunk_bytes: 128,
3065
+ min_chunk_bytes: 64,
3066
+ max_chunk_bytes: 256,
3067
+ });
3068
+ let rows = (0..100)
3069
+ .map(|index| {
3070
+ mutation_owned(
3071
+ key("schema", None, &format!("entity-{index:03}")),
3072
+ value(&format!("c-{index}"), Some(&format!("{{\"v\":{index}}}"))),
3073
+ )
3074
+ })
3075
+ .collect::<Vec<_>>();
3076
+ let inserts = ["entity-050a", "entity-050b", "entity-050c"]
3077
+ .into_iter()
3078
+ .enumerate()
3079
+ .map(|(index, entity_id)| {
3080
+ (
3081
+ key("schema", None, entity_id),
3082
+ value(
3083
+ &format!("inserted-{index}"),
3084
+ Some(&format!("{{\"inserted\":{index}}}")),
3085
+ ),
3086
+ )
3087
+ })
3088
+ .collect::<Vec<_>>();
3089
+
3090
+ let mut transaction = storage
3091
+ .begin_write_transaction()
3092
+ .await
3093
+ .expect("transaction should open");
3094
+ let base = apply_mutations_for_test(&tree, transaction.as_mut(), None, rows, None)
3095
+ .await
3096
+ .expect("base should build");
3097
+ let fast = apply_mutations_for_test(
3098
+ &tree,
3099
+ transaction.as_mut(),
3100
+ Some(&base.root_id),
3101
+ inserts
3102
+ .iter()
3103
+ .map(|(key, value)| mutation(key, value))
3104
+ .collect(),
3105
+ None,
3106
+ )
3107
+ .await
3108
+ .expect("batch path should apply");
3109
+ let mut canonical_entries = tree
3110
+ .collect_leaf_entries(&mut transaction.as_mut(), &base.root_id)
3111
+ .await
3112
+ .expect("base entries should collect");
3113
+ for (key, value) in inserts {
3114
+ let encoded_key = encode_key(&key);
3115
+ let encoded_value = encode_value(&value);
3116
+ let index = canonical_entries
3117
+ .binary_search_by(|entry| entry.key.as_slice().cmp(&encoded_key))
3118
+ .expect_err("inserted key should not exist");
3119
+ canonical_entries.insert(
3120
+ index,
3121
+ EncodedLeafEntry {
3122
+ key: encoded_key,
3123
+ value: encoded_value,
3124
+ },
3125
+ );
3126
+ }
3127
+ let canonical = tree
3128
+ .build_tree_from_entries(canonical_entries)
3129
+ .expect("canonical root should build");
3130
+
3131
+ assert_eq!(fast.root_id, canonical.root_id);
3132
+ }
3133
+
3134
+ async fn apply_mutations_for_test(
3135
+ tree: &TrackedStateTree,
3136
+ transaction: &mut dyn StorageWriteTransaction,
3137
+ base_root: Option<&TrackedStateRootId>,
3138
+ mutations: Vec<TrackedStateMutation>,
3139
+ commit_id: Option<&str>,
3140
+ ) -> Result<TrackedStateApplyResult, LixError> {
3141
+ let mut writes = StorageWriteSet::new();
3142
+ let result = tree
3143
+ .apply_mutations(transaction, &mut writes, base_root, mutations, commit_id)
3144
+ .await?;
3145
+ writes.apply(transaction).await?;
3146
+ Ok(result)
3147
+ }
3148
+
3149
+ fn mutation(key: &TrackedStateKey, value: &TrackedStateIndexValue) -> TrackedStateMutation {
3150
+ TrackedStateMutation::put_encoded(encode_key(key), encode_value(value))
3151
+ }
3152
+
3153
+ fn mutation_owned(key: TrackedStateKey, value: TrackedStateIndexValue) -> TrackedStateMutation {
3154
+ mutation(&key, &value)
3155
+ }
3156
+
3157
+ fn key(schema_key: &str, file_id: Option<&str>, entity_id: &str) -> TrackedStateKey {
3158
+ TrackedStateKey {
3159
+ schema_key: schema_key.to_string(),
3160
+ file_id: file_id.map(str::to_string),
3161
+ entity_id: EntityIdentity::single(entity_id),
3162
+ }
3163
+ }
3164
+
3165
+ fn value(change_id: &str, snapshot_content: Option<&str>) -> TrackedStateIndexValue {
3166
+ let source_ordinal = match snapshot_content {
3167
+ Some("{\"v\":1}") => 1,
3168
+ Some("{\"v\":2}") => 2,
3169
+ Some(_) => 3,
3170
+ None => 0,
3171
+ };
3172
+ TrackedStateIndexValue {
3173
+ change_locator: crate::commit_store::ChangeLocator {
3174
+ source_commit_id: "commit".to_string(),
3175
+ source_pack_id: 0,
3176
+ source_ordinal,
3177
+ change_id: change_id.to_string(),
3178
+ },
3179
+ deleted: snapshot_content.is_none(),
3180
+ snapshot_ref: snapshot_content
3181
+ .map(|content| crate::json_store::JsonRef::for_content(content.as_bytes())),
3182
+ metadata_ref: None,
3183
+ created_at: "2026-01-01T00:00:00Z".to_string(),
3184
+ updated_at: "2026-01-01T00:00:00Z".to_string(),
3185
+ }
3186
+ }
3187
+ }