@lix-js/sdk 0.6.0-preview.2 → 0.6.0-preview.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +46 -8
- package/dist/engine-wasm/wasm/lix_engine.d.ts +25 -1
- package/dist/engine-wasm/wasm/lix_engine.js +60 -2
- package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
- package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +5 -0
- package/dist/generated/builtin-schemas.d.ts +87 -162
- package/dist/generated/builtin-schemas.js +139 -236
- package/dist/open-lix.d.ts +10 -3
- package/dist/open-lix.js +39 -0
- package/dist-engine-src/src/binary_cas/types.rs +0 -6
- package/dist-engine-src/src/catalog/context.rs +412 -0
- package/dist-engine-src/src/catalog/mod.rs +10 -0
- package/dist-engine-src/src/catalog/schema.rs +4 -0
- package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
- package/dist-engine-src/src/cel/mod.rs +1 -1
- package/dist-engine-src/src/cel/provider.rs +1 -1
- package/dist-engine-src/src/commit_graph/context.rs +328 -1015
- package/dist-engine-src/src/commit_graph/mod.rs +2 -3
- package/dist-engine-src/src/commit_graph/types.rs +7 -43
- package/dist-engine-src/src/commit_graph/walker.rs +57 -81
- package/dist-engine-src/src/commit_store/codec.rs +887 -0
- package/dist-engine-src/src/commit_store/context.rs +944 -0
- package/dist-engine-src/src/commit_store/materialization.rs +84 -0
- package/dist-engine-src/src/commit_store/mod.rs +16 -0
- package/dist-engine-src/src/commit_store/storage.rs +600 -0
- package/dist-engine-src/src/commit_store/types.rs +215 -0
- package/dist-engine-src/src/common/identity.rs +15 -5
- package/dist-engine-src/src/common/json_pointer.rs +67 -0
- package/dist-engine-src/src/common/metadata.rs +17 -12
- package/dist-engine-src/src/common/mod.rs +5 -5
- package/dist-engine-src/src/domain.rs +324 -0
- package/dist-engine-src/src/engine.rs +29 -43
- package/dist-engine-src/src/entity_identity.rs +238 -118
- package/dist-engine-src/src/functions/context.rs +17 -52
- package/dist-engine-src/src/functions/deterministic.rs +1 -1
- package/dist-engine-src/src/functions/mod.rs +1 -1
- package/dist-engine-src/src/functions/provider.rs +4 -4
- package/dist-engine-src/src/functions/state.rs +39 -66
- package/dist-engine-src/src/functions/types.rs +1 -1
- package/dist-engine-src/src/init.rs +204 -151
- package/dist-engine-src/src/json_store/context.rs +354 -60
- package/dist-engine-src/src/json_store/encoded.rs +6 -6
- package/dist-engine-src/src/json_store/mod.rs +4 -1
- package/dist-engine-src/src/json_store/store.rs +884 -11
- package/dist-engine-src/src/json_store/types.rs +166 -1
- package/dist-engine-src/src/lib.rs +11 -10
- package/dist-engine-src/src/live_state/context.rs +608 -830
- package/dist-engine-src/src/live_state/mod.rs +3 -3
- package/dist-engine-src/src/live_state/overlay.rs +7 -7
- package/dist-engine-src/src/live_state/reader.rs +5 -5
- package/dist-engine-src/src/live_state/types.rs +19 -36
- package/dist-engine-src/src/live_state/visibility.rs +19 -14
- package/dist-engine-src/src/plugin/archive.rs +3 -6
- package/dist-engine-src/src/plugin/install.rs +0 -18
- package/dist-engine-src/src/plugin/plugin_manifest.json +0 -1
- package/dist-engine-src/src/schema/annotations/defaults.rs +2 -7
- package/dist-engine-src/src/schema/builtin/lix_account.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_active_account.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_change.json +11 -10
- package/dist-engine-src/src/schema/builtin/lix_change_author.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_commit.json +8 -46
- package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +29 -22
- package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_key_value.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_label.json +10 -3
- package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
- package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +2 -8
- package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +0 -1
- package/dist-engine-src/src/schema/builtin/lix_version_ref.json +0 -1
- package/dist-engine-src/src/schema/builtin/mod.rs +10 -59
- package/dist-engine-src/src/schema/compatibility.rs +787 -0
- package/dist-engine-src/src/schema/definition.json +47 -17
- package/dist-engine-src/src/schema/definition.rs +202 -96
- package/dist-engine-src/src/schema/key.rs +9 -77
- package/dist-engine-src/src/schema/mod.rs +4 -4
- package/dist-engine-src/src/schema/tests.rs +133 -92
- package/dist-engine-src/src/session/context.rs +86 -48
- package/dist-engine-src/src/session/create_version.rs +22 -14
- package/dist-engine-src/src/session/execute.rs +117 -23
- package/dist-engine-src/src/session/merge/apply.rs +4 -4
- package/dist-engine-src/src/session/merge/conflicts.rs +3 -2
- package/dist-engine-src/src/session/merge/stats.rs +1 -1
- package/dist-engine-src/src/session/merge/version.rs +35 -45
- package/dist-engine-src/src/session/mod.rs +9 -7
- package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
- package/dist-engine-src/src/session/switch_version.rs +17 -28
- package/dist-engine-src/src/session/transaction.rs +76 -0
- package/dist-engine-src/src/sql2/change_provider.rs +14 -20
- package/dist-engine-src/src/sql2/classify.rs +75 -48
- package/dist-engine-src/src/sql2/context.rs +22 -18
- package/dist-engine-src/src/sql2/directory_history_provider.rs +28 -20
- package/dist-engine-src/src/sql2/directory_provider.rs +131 -83
- package/dist-engine-src/src/sql2/entity_history_provider.rs +10 -14
- package/dist-engine-src/src/sql2/entity_provider.rs +680 -169
- package/dist-engine-src/src/sql2/error.rs +24 -5
- package/dist-engine-src/src/sql2/execute.rs +426 -272
- package/dist-engine-src/src/sql2/file_history_provider.rs +29 -21
- package/dist-engine-src/src/sql2/file_provider.rs +533 -108
- package/dist-engine-src/src/sql2/filesystem_planner.rs +58 -94
- package/dist-engine-src/src/sql2/filesystem_visibility.rs +37 -23
- package/dist-engine-src/src/sql2/history_projection.rs +3 -27
- package/dist-engine-src/src/sql2/history_provider.rs +11 -17
- package/dist-engine-src/src/sql2/history_route.rs +22 -8
- package/dist-engine-src/src/sql2/lix_state_provider.rs +178 -96
- package/dist-engine-src/src/sql2/mod.rs +8 -4
- package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
- package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
- package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
- package/dist-engine-src/src/sql2/public_bind/dml.rs +172 -0
- package/dist-engine-src/src/sql2/public_bind/mod.rs +26 -0
- package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
- package/dist-engine-src/src/sql2/read_only.rs +10 -12
- package/dist-engine-src/src/sql2/session.rs +7 -10
- package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
- package/dist-engine-src/src/sql2/udfs/mod.rs +8 -1
- package/dist-engine-src/src/sql2/udfs/public_call.rs +238 -0
- package/dist-engine-src/src/sql2/version_provider.rs +46 -31
- package/dist-engine-src/src/sql2/version_scope.rs +4 -4
- package/dist-engine-src/src/storage_bench.rs +1782 -325
- package/dist-engine-src/src/test_support.rs +183 -36
- package/dist-engine-src/src/tracked_state/by_file_index.rs +20 -24
- package/dist-engine-src/src/tracked_state/codec.rs +1519 -181
- package/dist-engine-src/src/tracked_state/context.rs +1155 -271
- package/dist-engine-src/src/tracked_state/diff.rs +249 -57
- package/dist-engine-src/src/tracked_state/materialization.rs +365 -103
- package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
- package/dist-engine-src/src/tracked_state/merge.rs +37 -19
- package/dist-engine-src/src/tracked_state/mod.rs +8 -7
- package/dist-engine-src/src/tracked_state/storage.rs +138 -6
- package/dist-engine-src/src/tracked_state/tree.rs +695 -252
- package/dist-engine-src/src/tracked_state/types.rs +176 -6
- package/dist-engine-src/src/transaction/commit.rs +695 -435
- package/dist-engine-src/src/transaction/context.rs +551 -310
- package/dist-engine-src/src/transaction/live_state_overlay.rs +9 -8
- package/dist-engine-src/src/transaction/mod.rs +2 -0
- package/dist-engine-src/src/transaction/normalization.rs +311 -447
- package/dist-engine-src/src/transaction/prep.rs +37 -0
- package/dist-engine-src/src/transaction/schema_resolver.rs +93 -71
- package/dist-engine-src/src/transaction/staging.rs +701 -406
- package/dist-engine-src/src/transaction/types.rs +231 -122
- package/dist-engine-src/src/transaction/validation.rs +2717 -1698
- package/dist-engine-src/src/untracked_state/codec.rs +40 -96
- package/dist-engine-src/src/untracked_state/context.rs +21 -5
- package/dist-engine-src/src/untracked_state/materialization.rs +10 -104
- package/dist-engine-src/src/untracked_state/mod.rs +3 -5
- package/dist-engine-src/src/untracked_state/storage.rs +105 -57
- package/dist-engine-src/src/untracked_state/types.rs +63 -13
- package/dist-engine-src/src/version/context.rs +1 -13
- package/dist-engine-src/src/version/lifecycle.rs +221 -0
- package/dist-engine-src/src/version/mod.rs +3 -2
- package/dist-engine-src/src/version/refs.rs +12 -103
- package/dist-engine-src/src/version/stage_rows.rs +15 -19
- package/package.json +1 -1
- package/dist-engine-src/src/changelog/codec.rs +0 -321
- package/dist-engine-src/src/changelog/context.rs +0 -92
- package/dist-engine-src/src/changelog/materialization.rs +0 -121
- package/dist-engine-src/src/changelog/mod.rs +0 -13
- package/dist-engine-src/src/changelog/reader.rs +0 -20
- package/dist-engine-src/src/changelog/storage.rs +0 -220
- package/dist-engine-src/src/changelog/types.rs +0 -38
- package/dist-engine-src/src/schema/builtin/lix_change_set.json +0 -18
- package/dist-engine-src/src/schema/builtin/lix_change_set_element.json +0 -75
- package/dist-engine-src/src/schema/builtin/lix_entity_label.json +0 -63
- package/dist-engine-src/src/schema_registry.rs +0 -294
- package/dist-engine-src/src/sql2/commit_derived_provider.rs +0 -591
- package/dist-engine-src/src/tracked_state/rebuild.rs +0 -771
- package/dist-engine-src/src/tracked_state/tree_types.rs +0 -176
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use std::any::Any;
|
|
2
|
-
use std::collections::BTreeSet;
|
|
2
|
+
use std::collections::{BTreeMap, BTreeSet};
|
|
3
3
|
use std::sync::Arc;
|
|
4
4
|
|
|
5
5
|
use async_trait::async_trait;
|
|
@@ -14,7 +14,8 @@ use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, Scalar
|
|
|
14
14
|
use datafusion::datasource::TableType;
|
|
15
15
|
use datafusion::execution::TaskContext;
|
|
16
16
|
use datafusion::logical_expr::dml::InsertOp;
|
|
17
|
-
use datafusion::logical_expr::
|
|
17
|
+
use datafusion::logical_expr::expr::InList;
|
|
18
|
+
use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown};
|
|
18
19
|
use datafusion::physical_expr::{create_physical_expr, EquivalenceProperties, PhysicalExpr};
|
|
19
20
|
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType, PlanProperties};
|
|
20
21
|
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
|
|
@@ -27,11 +28,12 @@ use serde_json::Value as JsonValue;
|
|
|
27
28
|
|
|
28
29
|
use crate::commit_graph::CommitGraphReader;
|
|
29
30
|
use crate::entity_identity::EntityIdentity;
|
|
30
|
-
use crate::live_state::
|
|
31
|
+
use crate::live_state::MaterializedLiveStateRow;
|
|
31
32
|
use crate::live_state::{
|
|
32
33
|
LiveStateFilter, LiveStateProjection, LiveStateReader, LiveStateScanRequest,
|
|
33
34
|
};
|
|
34
35
|
use crate::sql2::dml::{InsertExec, InsertSink};
|
|
36
|
+
use crate::sql2::predicate_typecheck::validate_json_predicate_filters;
|
|
35
37
|
use crate::sql2::read_only::reject_read_only_entity_surface;
|
|
36
38
|
use crate::sql2::version_scope::{
|
|
37
39
|
explicit_version_ids_from_dml_filters, resolve_provider_version_ids,
|
|
@@ -40,23 +42,22 @@ use crate::sql2::version_scope::{
|
|
|
40
42
|
use crate::sql2::write_normalization::{
|
|
41
43
|
InsertCell, InsertColumnIntents, SqlCell, UpdateAssignmentValues, UpdateCell,
|
|
42
44
|
};
|
|
43
|
-
use crate::transaction::types::
|
|
45
|
+
use crate::transaction::types::{TransactionJson, TransactionWriteRow};
|
|
44
46
|
use crate::version::VersionRefReader;
|
|
45
|
-
use crate::{
|
|
47
|
+
use crate::{parse_row_metadata_value, serialize_row_metadata, LixError};
|
|
46
48
|
|
|
47
49
|
use super::entity_history_provider::EntityHistoryProvider;
|
|
48
50
|
use super::history_route::{
|
|
49
51
|
HISTORY_COL_CHANGE_ID, HISTORY_COL_COMMIT_CREATED_AT, HISTORY_COL_DEPTH, HISTORY_COL_ENTITY_ID,
|
|
50
52
|
HISTORY_COL_FILE_ID, HISTORY_COL_METADATA, HISTORY_COL_OBSERVED_COMMIT_ID,
|
|
51
|
-
HISTORY_COL_SCHEMA_KEY,
|
|
52
|
-
HISTORY_COL_START_COMMIT_ID,
|
|
53
|
+
HISTORY_COL_SCHEMA_KEY, HISTORY_COL_SNAPSHOT_CONTENT, HISTORY_COL_START_COMMIT_ID,
|
|
53
54
|
};
|
|
54
55
|
use super::result_metadata::{json_field, mark_json_field};
|
|
55
56
|
use crate::sql2::{
|
|
56
|
-
|
|
57
|
+
SqlCommitStoreQuerySource, SqlWriteContext, WriteAccess, WriteContextLiveStateReader,
|
|
57
58
|
WriteContextVersionRefReader,
|
|
58
59
|
};
|
|
59
|
-
use crate::transaction::types::{
|
|
60
|
+
use crate::transaction::types::{TransactionWrite, TransactionWriteMode};
|
|
60
61
|
|
|
61
62
|
pub(crate) async fn register_entity_providers(
|
|
62
63
|
ctx: &SessionContext,
|
|
@@ -64,7 +65,7 @@ pub(crate) async fn register_entity_providers(
|
|
|
64
65
|
live_state: Arc<dyn LiveStateReader>,
|
|
65
66
|
version_ref: Arc<dyn VersionRefReader>,
|
|
66
67
|
commit_graph: Arc<tokio::sync::Mutex<Box<dyn CommitGraphReader>>>,
|
|
67
|
-
query_source:
|
|
68
|
+
query_source: SqlCommitStoreQuerySource,
|
|
68
69
|
schema_definitions: &[JsonValue],
|
|
69
70
|
) -> Result<(), LixError> {
|
|
70
71
|
for schema in schema_definitions {
|
|
@@ -99,16 +100,18 @@ pub(crate) async fn register_entity_providers(
|
|
|
99
100
|
)
|
|
100
101
|
.map_err(datafusion_error_to_lix_error)?;
|
|
101
102
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
Arc::
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
103
|
+
if schema_exposed_as_entity_history_surface(&spec.schema_key) {
|
|
104
|
+
let history_name = format!("{}_history", spec.schema_key);
|
|
105
|
+
ctx.register_table(
|
|
106
|
+
&history_name,
|
|
107
|
+
Arc::new(EntityHistoryProvider::new(
|
|
108
|
+
Arc::clone(&spec),
|
|
109
|
+
Arc::clone(&commit_graph),
|
|
110
|
+
query_source.clone(),
|
|
111
|
+
)),
|
|
112
|
+
)
|
|
113
|
+
.map_err(datafusion_error_to_lix_error)?;
|
|
114
|
+
}
|
|
112
115
|
}
|
|
113
116
|
|
|
114
117
|
Ok(())
|
|
@@ -177,7 +180,6 @@ pub(super) struct EntitySurfaceColumn {
|
|
|
177
180
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
178
181
|
pub(super) struct EntitySurfaceSpec {
|
|
179
182
|
pub(super) schema_key: String,
|
|
180
|
-
schema_version: Option<String>,
|
|
181
183
|
pub(super) primary_key_paths: Vec<Vec<String>>,
|
|
182
184
|
pub(super) columns: Vec<EntitySurfaceColumn>,
|
|
183
185
|
}
|
|
@@ -300,13 +302,14 @@ impl TableProvider for EntityProvider {
|
|
|
300
302
|
&self,
|
|
301
303
|
filters: &[&Expr],
|
|
302
304
|
) -> Result<Vec<TableProviderFilterPushDown>> {
|
|
305
|
+
let analyzer = EntityPrimaryKeyFilterAnalyzer::new(&self.spec);
|
|
303
306
|
Ok(filters
|
|
304
307
|
.iter()
|
|
305
308
|
.map(|filter| {
|
|
306
|
-
if
|
|
307
|
-
TableProviderFilterPushDown::
|
|
309
|
+
if ExactVersionIdFilterAnalyzer.supports(filter) || analyzer.supports(filter) {
|
|
310
|
+
TableProviderFilterPushDown::Exact
|
|
308
311
|
} else {
|
|
309
|
-
TableProviderFilterPushDown::
|
|
312
|
+
TableProviderFilterPushDown::Unsupported
|
|
310
313
|
}
|
|
311
314
|
})
|
|
312
315
|
.collect())
|
|
@@ -323,6 +326,7 @@ impl TableProvider for EntityProvider {
|
|
|
323
326
|
let mut request = entity_live_state_scan_request(
|
|
324
327
|
&self.spec.schema_key,
|
|
325
328
|
self.version_binding.active_version_id(),
|
|
329
|
+
Some(projected_schema.as_ref()),
|
|
326
330
|
limit,
|
|
327
331
|
);
|
|
328
332
|
if self.write_access.is_write() && matches!(self.version_binding, VersionBinding::Explicit)
|
|
@@ -342,6 +346,8 @@ impl TableProvider for EntityProvider {
|
|
|
342
346
|
)
|
|
343
347
|
.await
|
|
344
348
|
.map_err(lix_error_to_datafusion_error)?;
|
|
349
|
+
apply_exact_version_id_filter(&mut request, exact_version_ids_from_filters(filters)?);
|
|
350
|
+
apply_exact_entity_id_filters(&mut request, &self.spec, filters)?;
|
|
345
351
|
|
|
346
352
|
Ok(Arc::new(EntityScanExec::new(
|
|
347
353
|
Arc::clone(&self.spec),
|
|
@@ -406,6 +412,7 @@ impl TableProvider for EntityProvider {
|
|
|
406
412
|
};
|
|
407
413
|
|
|
408
414
|
let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
|
|
415
|
+
validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
|
|
409
416
|
let physical_filters = filters
|
|
410
417
|
.iter()
|
|
411
418
|
.map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
|
|
@@ -414,16 +421,19 @@ impl TableProvider for EntityProvider {
|
|
|
414
421
|
&self.spec.schema_key,
|
|
415
422
|
version_binding.active_version_id(),
|
|
416
423
|
None,
|
|
424
|
+
None,
|
|
417
425
|
);
|
|
418
426
|
if matches!(version_binding, VersionBinding::Explicit) {
|
|
419
|
-
|
|
420
|
-
if
|
|
427
|
+
let exact_version_ids = exact_version_ids_from_filters(&filters)?;
|
|
428
|
+
if exact_version_ids.is_none() {
|
|
421
429
|
return Err(DataFusionError::Plan(format!(
|
|
422
430
|
"DELETE FROM {}_by_version requires an explicit lixcol_version_id predicate",
|
|
423
431
|
self.spec.schema_key
|
|
424
432
|
)));
|
|
425
433
|
}
|
|
434
|
+
apply_exact_version_id_filter(&mut request, exact_version_ids);
|
|
426
435
|
}
|
|
436
|
+
apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
|
|
427
437
|
|
|
428
438
|
Ok(Arc::new(EntityDeleteExec::new(
|
|
429
439
|
Arc::clone(&self.spec),
|
|
@@ -458,6 +468,7 @@ impl TableProvider for EntityProvider {
|
|
|
458
468
|
};
|
|
459
469
|
|
|
460
470
|
let df_schema = DFSchema::try_from(Arc::clone(&self.schema))?;
|
|
471
|
+
validate_json_predicate_filters(self.schema.as_ref(), &filters)?;
|
|
461
472
|
let physical_assignments = assignments
|
|
462
473
|
.iter()
|
|
463
474
|
.map(|(column_name, expr)| {
|
|
@@ -471,11 +482,13 @@ impl TableProvider for EntityProvider {
|
|
|
471
482
|
.iter()
|
|
472
483
|
.map(|expr| create_physical_expr(expr, &df_schema, state.execution_props()))
|
|
473
484
|
.collect::<Result<Vec<_>>>()?;
|
|
474
|
-
let request = entity_live_state_scan_request(
|
|
485
|
+
let mut request = entity_live_state_scan_request(
|
|
475
486
|
&self.spec.schema_key,
|
|
476
487
|
version_binding.active_version_id(),
|
|
477
488
|
None,
|
|
489
|
+
None,
|
|
478
490
|
);
|
|
491
|
+
apply_exact_entity_id_filters(&mut request, &self.spec, &filters)?;
|
|
479
492
|
|
|
480
493
|
Ok(Arc::new(EntityUpdateExec::new(
|
|
481
494
|
Arc::clone(&self.spec),
|
|
@@ -489,6 +502,413 @@ impl TableProvider for EntityProvider {
|
|
|
489
502
|
}
|
|
490
503
|
}
|
|
491
504
|
|
|
505
|
+
fn entity_ids_from_primary_key_filters(
|
|
506
|
+
spec: &EntitySurfaceSpec,
|
|
507
|
+
filters: &[Expr],
|
|
508
|
+
) -> Result<Option<Vec<EntityIdentity>>> {
|
|
509
|
+
let analyzer = EntityPrimaryKeyFilterAnalyzer::new(spec);
|
|
510
|
+
let mut entity_ids: Option<BTreeSet<EntityIdentity>> = None;
|
|
511
|
+
for filter in filters {
|
|
512
|
+
let Some(filter_ids) = analyzer.analyze(filter)? else {
|
|
513
|
+
continue;
|
|
514
|
+
};
|
|
515
|
+
entity_ids = Some(match entity_ids {
|
|
516
|
+
Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
|
|
517
|
+
None => filter_ids,
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
Ok(entity_ids.map(|ids| ids.into_iter().collect()))
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
fn apply_exact_entity_id_filters(
|
|
525
|
+
request: &mut LiveStateScanRequest,
|
|
526
|
+
spec: &EntitySurfaceSpec,
|
|
527
|
+
filters: &[Expr],
|
|
528
|
+
) -> Result<()> {
|
|
529
|
+
if let Some(entity_ids) = entity_ids_from_primary_key_filters(spec, filters)? {
|
|
530
|
+
if entity_ids.is_empty() {
|
|
531
|
+
request.limit = Some(0);
|
|
532
|
+
}
|
|
533
|
+
request.filter.entity_ids = entity_ids;
|
|
534
|
+
}
|
|
535
|
+
Ok(())
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
fn exact_version_ids_from_filters(filters: &[Expr]) -> Result<Option<Vec<String>>> {
|
|
539
|
+
let analyzer = ExactVersionIdFilterAnalyzer;
|
|
540
|
+
let mut version_ids: Option<BTreeSet<String>> = None;
|
|
541
|
+
for filter in filters {
|
|
542
|
+
let Some(filter_ids) = analyzer.analyze(filter)? else {
|
|
543
|
+
continue;
|
|
544
|
+
};
|
|
545
|
+
version_ids = Some(match version_ids {
|
|
546
|
+
Some(existing_ids) => existing_ids.intersection(&filter_ids).cloned().collect(),
|
|
547
|
+
None => filter_ids,
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
Ok(version_ids.map(|ids| ids.into_iter().collect()))
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
fn apply_exact_version_id_filter(
|
|
554
|
+
request: &mut LiveStateScanRequest,
|
|
555
|
+
version_ids: Option<Vec<String>>,
|
|
556
|
+
) {
|
|
557
|
+
if let Some(version_ids) = version_ids {
|
|
558
|
+
if version_ids.is_empty() {
|
|
559
|
+
request.limit = Some(0);
|
|
560
|
+
}
|
|
561
|
+
request.filter.version_ids = version_ids;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
struct EntityPrimaryKeyFilterAnalyzer<'a> {
|
|
566
|
+
primary_key_columns: Vec<&'a str>,
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
struct ExactVersionIdFilterAnalyzer;
|
|
570
|
+
|
|
571
|
+
impl ExactVersionIdFilterAnalyzer {
|
|
572
|
+
fn supports(&self, expr: &Expr) -> bool {
|
|
573
|
+
self.analyze(expr)
|
|
574
|
+
.is_ok_and(|constraint| constraint.is_some())
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<String>>> {
|
|
578
|
+
match expr {
|
|
579
|
+
Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
|
|
580
|
+
let Some(left) = self.analyze(&binary_expr.left)? else {
|
|
581
|
+
return Ok(None);
|
|
582
|
+
};
|
|
583
|
+
let Some(right) = self.analyze(&binary_expr.right)? else {
|
|
584
|
+
return Ok(None);
|
|
585
|
+
};
|
|
586
|
+
Ok(Some(left.intersection(&right).cloned().collect()))
|
|
587
|
+
}
|
|
588
|
+
Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
|
|
589
|
+
let Some(mut left) = self.analyze(&binary_expr.left)? else {
|
|
590
|
+
return Ok(None);
|
|
591
|
+
};
|
|
592
|
+
let Some(right) = self.analyze(&binary_expr.right)? else {
|
|
593
|
+
return Ok(None);
|
|
594
|
+
};
|
|
595
|
+
left.extend(right);
|
|
596
|
+
Ok(Some(left))
|
|
597
|
+
}
|
|
598
|
+
Expr::BinaryExpr(binary_expr) => {
|
|
599
|
+
Ok(version_id_from_binary_filter(binary_expr).map(|value| BTreeSet::from([value])))
|
|
600
|
+
}
|
|
601
|
+
Expr::InList(in_list) => {
|
|
602
|
+
Ok(version_ids_from_in_list_filter(in_list)
|
|
603
|
+
.map(|values| values.into_iter().collect()))
|
|
604
|
+
}
|
|
605
|
+
_ => Ok(None),
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
fn version_id_from_binary_filter(binary_expr: &BinaryExpr) -> Option<String> {
|
|
611
|
+
if binary_expr.op != Operator::Eq {
|
|
612
|
+
return None;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
version_id_from_column_literal_filter(&binary_expr.left, &binary_expr.right)
|
|
616
|
+
.or_else(|| version_id_from_column_literal_filter(&binary_expr.right, &binary_expr.left))
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
fn version_ids_from_in_list_filter(in_list: &InList) -> Option<Vec<String>> {
|
|
620
|
+
if in_list.negated {
|
|
621
|
+
return None;
|
|
622
|
+
}
|
|
623
|
+
let Expr::Column(column) = in_list.expr.as_ref() else {
|
|
624
|
+
return None;
|
|
625
|
+
};
|
|
626
|
+
if column.name != "lixcol_version_id" {
|
|
627
|
+
return None;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
let values = in_list
|
|
631
|
+
.list
|
|
632
|
+
.iter()
|
|
633
|
+
.map(string_expr_literal)
|
|
634
|
+
.collect::<Option<Vec<_>>>()?;
|
|
635
|
+
if values.is_empty() {
|
|
636
|
+
return None;
|
|
637
|
+
}
|
|
638
|
+
Some(values)
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
fn version_id_from_column_literal_filter(
|
|
642
|
+
column_expr: &Expr,
|
|
643
|
+
literal_expr: &Expr,
|
|
644
|
+
) -> Option<String> {
|
|
645
|
+
let Expr::Column(column) = column_expr else {
|
|
646
|
+
return None;
|
|
647
|
+
};
|
|
648
|
+
if column.name != "lixcol_version_id" {
|
|
649
|
+
return None;
|
|
650
|
+
}
|
|
651
|
+
string_expr_literal(literal_expr)
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
impl<'a> EntityPrimaryKeyFilterAnalyzer<'a> {
|
|
655
|
+
fn new(spec: &'a EntitySurfaceSpec) -> Self {
|
|
656
|
+
Self {
|
|
657
|
+
primary_key_columns: string_primary_key_columns(spec),
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
fn supports(&self, expr: &Expr) -> bool {
|
|
662
|
+
self.analyze(expr)
|
|
663
|
+
.is_ok_and(|constraint| constraint.is_some())
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
fn analyze(&self, expr: &Expr) -> Result<Option<BTreeSet<EntityIdentity>>> {
|
|
667
|
+
if self.primary_key_columns.is_empty() {
|
|
668
|
+
return Ok(None);
|
|
669
|
+
};
|
|
670
|
+
let Some(constraint) = self.analyze_constraint(expr)? else {
|
|
671
|
+
return Ok(None);
|
|
672
|
+
};
|
|
673
|
+
Ok(constraint.into_entity_ids(&self.primary_key_columns))
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
fn analyze_constraint(&self, expr: &Expr) -> Result<Option<EntityIdentityConstraint>> {
|
|
677
|
+
match expr {
|
|
678
|
+
Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::And => {
|
|
679
|
+
let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
|
|
680
|
+
return Ok(None);
|
|
681
|
+
};
|
|
682
|
+
let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
|
|
683
|
+
return Ok(None);
|
|
684
|
+
};
|
|
685
|
+
Ok(Some(left.intersect(right, &self.primary_key_columns)))
|
|
686
|
+
}
|
|
687
|
+
Expr::BinaryExpr(binary_expr) if binary_expr.op == Operator::Or => {
|
|
688
|
+
let Some(left) = self.analyze_constraint(&binary_expr.left)? else {
|
|
689
|
+
return Ok(None);
|
|
690
|
+
};
|
|
691
|
+
let Some(right) = self.analyze_constraint(&binary_expr.right)? else {
|
|
692
|
+
return Ok(None);
|
|
693
|
+
};
|
|
694
|
+
let Some(left_ids) = left.into_entity_ids(&self.primary_key_columns) else {
|
|
695
|
+
return Ok(None);
|
|
696
|
+
};
|
|
697
|
+
let Some(mut right_ids) = right.into_entity_ids(&self.primary_key_columns) else {
|
|
698
|
+
return Ok(None);
|
|
699
|
+
};
|
|
700
|
+
right_ids.extend(left_ids);
|
|
701
|
+
Ok(Some(EntityIdentityConstraint::Full(right_ids)))
|
|
702
|
+
}
|
|
703
|
+
Expr::BinaryExpr(binary_expr) => Ok(entity_identity_constraint_from_binary_filter(
|
|
704
|
+
binary_expr,
|
|
705
|
+
&self.primary_key_columns,
|
|
706
|
+
)),
|
|
707
|
+
Expr::InList(in_list) => Ok(entity_identity_constraint_from_in_list_filter(
|
|
708
|
+
in_list,
|
|
709
|
+
&self.primary_key_columns,
|
|
710
|
+
)),
|
|
711
|
+
_ => Ok(None),
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
717
|
+
enum EntityIdentityConstraint {
|
|
718
|
+
Full(BTreeSet<EntityIdentity>),
|
|
719
|
+
Parts(BTreeMap<String, BTreeSet<String>>),
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
impl EntityIdentityConstraint {
|
|
723
|
+
fn intersect(self, other: Self, primary_key_columns: &[&str]) -> Self {
|
|
724
|
+
match (self, other) {
|
|
725
|
+
(Self::Full(left), Self::Full(right)) => {
|
|
726
|
+
Self::Full(left.intersection(&right).cloned().collect())
|
|
727
|
+
}
|
|
728
|
+
(Self::Full(ids), Self::Parts(parts)) | (Self::Parts(parts), Self::Full(ids)) => {
|
|
729
|
+
Self::Full(
|
|
730
|
+
ids.into_iter()
|
|
731
|
+
.filter(|identity| {
|
|
732
|
+
identity_matches_parts(identity, primary_key_columns, &parts)
|
|
733
|
+
})
|
|
734
|
+
.collect(),
|
|
735
|
+
)
|
|
736
|
+
}
|
|
737
|
+
(Self::Parts(mut left), Self::Parts(right)) => {
|
|
738
|
+
for (column, right_values) in right {
|
|
739
|
+
left.entry(column)
|
|
740
|
+
.and_modify(|left_values| {
|
|
741
|
+
*left_values =
|
|
742
|
+
left_values.intersection(&right_values).cloned().collect();
|
|
743
|
+
})
|
|
744
|
+
.or_insert(right_values);
|
|
745
|
+
}
|
|
746
|
+
Self::Parts(left)
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
fn into_entity_ids(self, primary_key_columns: &[&str]) -> Option<BTreeSet<EntityIdentity>> {
|
|
752
|
+
match self {
|
|
753
|
+
Self::Full(ids) => Some(ids),
|
|
754
|
+
Self::Parts(parts) => entity_ids_from_primary_key_parts(primary_key_columns, parts),
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
fn string_primary_key_columns(spec: &EntitySurfaceSpec) -> Vec<&str> {
|
|
760
|
+
spec.primary_key_paths
|
|
761
|
+
.iter()
|
|
762
|
+
.map(|path| {
|
|
763
|
+
let [column_name] = path.as_slice() else {
|
|
764
|
+
return None;
|
|
765
|
+
};
|
|
766
|
+
let column = spec.visible_column(column_name)?;
|
|
767
|
+
(column.column_type == EntityColumnType::String).then_some(column.name.as_str())
|
|
768
|
+
})
|
|
769
|
+
.collect::<Option<Vec<_>>>()
|
|
770
|
+
.unwrap_or_default()
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
fn entity_identity_constraint_from_binary_filter(
|
|
774
|
+
binary_expr: &BinaryExpr,
|
|
775
|
+
primary_key_columns: &[&str],
|
|
776
|
+
) -> Option<EntityIdentityConstraint> {
|
|
777
|
+
if binary_expr.op != Operator::Eq {
|
|
778
|
+
return None;
|
|
779
|
+
}
|
|
780
|
+
entity_identity_constraint_from_column_literal_filter(
|
|
781
|
+
&binary_expr.left,
|
|
782
|
+
&binary_expr.right,
|
|
783
|
+
primary_key_columns,
|
|
784
|
+
)
|
|
785
|
+
.or_else(|| {
|
|
786
|
+
entity_identity_constraint_from_column_literal_filter(
|
|
787
|
+
&binary_expr.right,
|
|
788
|
+
&binary_expr.left,
|
|
789
|
+
primary_key_columns,
|
|
790
|
+
)
|
|
791
|
+
})
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
fn entity_identity_constraint_from_in_list_filter(
|
|
795
|
+
in_list: &InList,
|
|
796
|
+
primary_key_columns: &[&str],
|
|
797
|
+
) -> Option<EntityIdentityConstraint> {
|
|
798
|
+
if in_list.negated {
|
|
799
|
+
return None;
|
|
800
|
+
}
|
|
801
|
+
let Expr::Column(column) = in_list.expr.as_ref() else {
|
|
802
|
+
return None;
|
|
803
|
+
};
|
|
804
|
+
let values = in_list
|
|
805
|
+
.list
|
|
806
|
+
.iter()
|
|
807
|
+
.map(string_expr_literal)
|
|
808
|
+
.collect::<Option<Vec<_>>>()?;
|
|
809
|
+
if values.is_empty() {
|
|
810
|
+
return None;
|
|
811
|
+
}
|
|
812
|
+
match column.name.as_str() {
|
|
813
|
+
"lixcol_entity_id" => values
|
|
814
|
+
.into_iter()
|
|
815
|
+
.map(|value| EntityIdentity::from_json_array_text(&value).ok())
|
|
816
|
+
.collect::<Option<BTreeSet<_>>>()
|
|
817
|
+
.map(EntityIdentityConstraint::Full),
|
|
818
|
+
column_name if primary_key_columns.contains(&column_name) => {
|
|
819
|
+
Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
|
|
820
|
+
column_name.to_string(),
|
|
821
|
+
values.into_iter().collect(),
|
|
822
|
+
)])))
|
|
823
|
+
}
|
|
824
|
+
_ => None,
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
fn entity_identity_constraint_from_column_literal_filter(
|
|
829
|
+
column_expr: &Expr,
|
|
830
|
+
literal_expr: &Expr,
|
|
831
|
+
primary_key_columns: &[&str],
|
|
832
|
+
) -> Option<EntityIdentityConstraint> {
|
|
833
|
+
let Expr::Column(column) = column_expr else {
|
|
834
|
+
return None;
|
|
835
|
+
};
|
|
836
|
+
let value = string_expr_literal(literal_expr)?;
|
|
837
|
+
match column.name.as_str() {
|
|
838
|
+
"lixcol_entity_id" => EntityIdentity::from_json_array_text(&value)
|
|
839
|
+
.ok()
|
|
840
|
+
.map(|identity| EntityIdentityConstraint::Full(BTreeSet::from([identity]))),
|
|
841
|
+
column_name if primary_key_columns.contains(&column_name) => {
|
|
842
|
+
Some(EntityIdentityConstraint::Parts(BTreeMap::from([(
|
|
843
|
+
column_name.to_string(),
|
|
844
|
+
BTreeSet::from([value]),
|
|
845
|
+
)])))
|
|
846
|
+
}
|
|
847
|
+
_ => None,
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
fn entity_ids_from_primary_key_parts(
|
|
852
|
+
primary_key_columns: &[&str],
|
|
853
|
+
parts: BTreeMap<String, BTreeSet<String>>,
|
|
854
|
+
) -> Option<BTreeSet<EntityIdentity>> {
|
|
855
|
+
if primary_key_columns
|
|
856
|
+
.iter()
|
|
857
|
+
.any(|column| !parts.contains_key(*column))
|
|
858
|
+
{
|
|
859
|
+
return None;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
let mut identities = BTreeSet::from([Vec::<String>::new()]);
|
|
863
|
+
for column in primary_key_columns {
|
|
864
|
+
let values = parts.get(*column)?;
|
|
865
|
+
identities = identities
|
|
866
|
+
.into_iter()
|
|
867
|
+
.flat_map(|prefix| {
|
|
868
|
+
values.iter().map(move |value| {
|
|
869
|
+
let mut parts = prefix.clone();
|
|
870
|
+
parts.push(value.clone());
|
|
871
|
+
parts
|
|
872
|
+
})
|
|
873
|
+
})
|
|
874
|
+
.collect();
|
|
875
|
+
}
|
|
876
|
+
Some(
|
|
877
|
+
identities
|
|
878
|
+
.into_iter()
|
|
879
|
+
.map(|parts| EntityIdentity { parts })
|
|
880
|
+
.collect(),
|
|
881
|
+
)
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
fn identity_matches_parts(
|
|
885
|
+
identity: &EntityIdentity,
|
|
886
|
+
primary_key_columns: &[&str],
|
|
887
|
+
parts: &BTreeMap<String, BTreeSet<String>>,
|
|
888
|
+
) -> bool {
|
|
889
|
+
let identity_parts = identity.parts.as_slice();
|
|
890
|
+
primary_key_columns
|
|
891
|
+
.iter()
|
|
892
|
+
.zip(identity_parts.iter())
|
|
893
|
+
.all(|(column, value)| {
|
|
894
|
+
parts
|
|
895
|
+
.get(*column)
|
|
896
|
+
.is_none_or(|values| values.contains(value))
|
|
897
|
+
})
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
fn string_expr_literal(expr: &Expr) -> Option<String> {
|
|
901
|
+
let Expr::Literal(literal, _) = expr else {
|
|
902
|
+
return None;
|
|
903
|
+
};
|
|
904
|
+
match literal {
|
|
905
|
+
ScalarValue::Utf8(Some(value))
|
|
906
|
+
| ScalarValue::Utf8View(Some(value))
|
|
907
|
+
| ScalarValue::LargeUtf8(Some(value)) => Some(value.clone()),
|
|
908
|
+
_ => None,
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
492
912
|
struct EntityInsertSink {
|
|
493
913
|
spec: Arc<EntitySurfaceSpec>,
|
|
494
914
|
insert_column_intents: InsertColumnIntents,
|
|
@@ -552,8 +972,8 @@ impl InsertSink for EntityInsertSink {
|
|
|
552
972
|
.map_err(|_| DataFusionError::Execution("entity INSERT row count overflow".into()))?;
|
|
553
973
|
|
|
554
974
|
self.write_ctx
|
|
555
|
-
.stage_write(
|
|
556
|
-
mode:
|
|
975
|
+
.stage_write(TransactionWrite::Rows {
|
|
976
|
+
mode: TransactionWriteMode::Insert,
|
|
557
977
|
rows,
|
|
558
978
|
})
|
|
559
979
|
.await
|
|
@@ -616,12 +1036,7 @@ impl DisplayAs for EntityDeleteExec {
|
|
|
616
1036
|
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
617
1037
|
match t {
|
|
618
1038
|
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
|
619
|
-
write!(
|
|
620
|
-
f,
|
|
621
|
-
"EntityDeleteExec(schema_key={}, filters={})",
|
|
622
|
-
self.spec.schema_key,
|
|
623
|
-
self.filters.len()
|
|
624
|
-
)
|
|
1039
|
+
write!(f, "EntityDeleteExec(schema_key={})", self.spec.schema_key)
|
|
625
1040
|
}
|
|
626
1041
|
DisplayFormatType::TreeRender => write!(f, "EntityDeleteExec"),
|
|
627
1042
|
}
|
|
@@ -694,7 +1109,7 @@ impl ExecutionPlan for EntityDeleteExec {
|
|
|
694
1109
|
version_binding.active_version_id(),
|
|
695
1110
|
)?;
|
|
696
1111
|
for row in &mut write_rows {
|
|
697
|
-
row.
|
|
1112
|
+
row.snapshot = None;
|
|
698
1113
|
}
|
|
699
1114
|
let count = u64::try_from(write_rows.len()).map_err(|_| {
|
|
700
1115
|
DataFusionError::Execution("entity DELETE row count overflow".to_string())
|
|
@@ -702,8 +1117,8 @@ impl ExecutionPlan for EntityDeleteExec {
|
|
|
702
1117
|
|
|
703
1118
|
if count > 0 {
|
|
704
1119
|
write_ctx
|
|
705
|
-
.stage_write(
|
|
706
|
-
mode:
|
|
1120
|
+
.stage_write(TransactionWrite::Rows {
|
|
1121
|
+
mode: TransactionWriteMode::Replace,
|
|
707
1122
|
rows: write_rows,
|
|
708
1123
|
})
|
|
709
1124
|
.await
|
|
@@ -781,10 +1196,9 @@ impl DisplayAs for EntityUpdateExec {
|
|
|
781
1196
|
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
|
782
1197
|
write!(
|
|
783
1198
|
f,
|
|
784
|
-
"EntityUpdateExec(schema_key={}, assignments={}
|
|
1199
|
+
"EntityUpdateExec(schema_key={}, assignments={})",
|
|
785
1200
|
self.spec.schema_key,
|
|
786
|
-
self.assignments.len()
|
|
787
|
-
self.filters.len()
|
|
1201
|
+
self.assignments.len()
|
|
788
1202
|
)
|
|
789
1203
|
}
|
|
790
1204
|
DisplayFormatType::TreeRender => write!(f, "EntityUpdateExec"),
|
|
@@ -865,8 +1279,8 @@ impl ExecutionPlan for EntityUpdateExec {
|
|
|
865
1279
|
|
|
866
1280
|
if count > 0 {
|
|
867
1281
|
write_ctx
|
|
868
|
-
.stage_write(
|
|
869
|
-
mode:
|
|
1282
|
+
.stage_write(TransactionWrite::Rows {
|
|
1283
|
+
mode: TransactionWriteMode::Replace,
|
|
870
1284
|
rows: write_rows,
|
|
871
1285
|
})
|
|
872
1286
|
.await
|
|
@@ -953,7 +1367,7 @@ fn entity_update_write_rows_from_batch(
|
|
|
953
1367
|
batch: &RecordBatch,
|
|
954
1368
|
assignments: &[(String, Arc<dyn PhysicalExpr>)],
|
|
955
1369
|
version_binding: Option<&str>,
|
|
956
|
-
) -> Result<Vec<
|
|
1370
|
+
) -> Result<Vec<TransactionWriteRow>> {
|
|
957
1371
|
let assignment_values = UpdateAssignmentValues::evaluate(batch, assignments)?;
|
|
958
1372
|
(0..batch.num_rows())
|
|
959
1373
|
.map(|row_index| {
|
|
@@ -965,19 +1379,10 @@ fn entity_update_write_rows_from_batch(
|
|
|
965
1379
|
&spec.schema_key,
|
|
966
1380
|
)?;
|
|
967
1381
|
|
|
968
|
-
|
|
969
|
-
.or_else(|| spec.schema_version.clone())
|
|
970
|
-
.ok_or_else(|| {
|
|
971
|
-
DataFusionError::Execution(format!(
|
|
972
|
-
"UPDATE entity surface '{}' requires lixcol_schema_version",
|
|
973
|
-
spec.schema_key
|
|
974
|
-
))
|
|
975
|
-
})?;
|
|
976
|
-
|
|
977
|
-
Ok(StageRow {
|
|
1382
|
+
Ok(TransactionWriteRow {
|
|
978
1383
|
entity_id: optional_string_value(batch, row_index, "lixcol_entity_id")?
|
|
979
1384
|
.map(|entity_id| {
|
|
980
|
-
EntityIdentity::
|
|
1385
|
+
EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
|
|
981
1386
|
DataFusionError::Execution(format!(
|
|
982
1387
|
"UPDATE entity surface '{}' has invalid lixcol_entity_id: {error}",
|
|
983
1388
|
spec.schema_key
|
|
@@ -987,12 +1392,18 @@ fn entity_update_write_rows_from_batch(
|
|
|
987
1392
|
.transpose()?,
|
|
988
1393
|
schema_key: spec.schema_key.clone(),
|
|
989
1394
|
file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1395
|
+
snapshot: Some(
|
|
1396
|
+
TransactionJson::from_value(
|
|
1397
|
+
entity_update_snapshot_content_from_batch(
|
|
1398
|
+
spec,
|
|
1399
|
+
batch,
|
|
1400
|
+
&assignment_values,
|
|
1401
|
+
row_index,
|
|
1402
|
+
)?,
|
|
1403
|
+
&format!("{} update snapshot_content", spec.schema_key),
|
|
1404
|
+
)
|
|
1405
|
+
.map_err(super::error::lix_error_to_datafusion_error)?,
|
|
1406
|
+
),
|
|
996
1407
|
metadata: entity_update_optional_metadata_value(
|
|
997
1408
|
batch,
|
|
998
1409
|
&assignment_values,
|
|
@@ -1001,7 +1412,6 @@ fn entity_update_write_rows_from_batch(
|
|
|
1001
1412
|
&spec.schema_key,
|
|
1002
1413
|
)?,
|
|
1003
1414
|
origin: None,
|
|
1004
|
-
schema_version,
|
|
1005
1415
|
created_at: None,
|
|
1006
1416
|
updated_at: None,
|
|
1007
1417
|
global: scope.global,
|
|
@@ -1020,7 +1430,7 @@ fn entity_update_snapshot_content_from_batch(
|
|
|
1020
1430
|
batch: &RecordBatch,
|
|
1021
1431
|
assignment_values: &UpdateAssignmentValues,
|
|
1022
1432
|
row_index: usize,
|
|
1023
|
-
) -> Result<
|
|
1433
|
+
) -> Result<JsonValue> {
|
|
1024
1434
|
let snapshot_content = optional_string_value(batch, row_index, "lixcol_snapshot_content")?
|
|
1025
1435
|
.ok_or_else(|| {
|
|
1026
1436
|
DataFusionError::Execution(format!(
|
|
@@ -1055,12 +1465,7 @@ fn entity_update_snapshot_content_from_batch(
|
|
|
1055
1465
|
};
|
|
1056
1466
|
object.insert(column.name.clone(), value);
|
|
1057
1467
|
}
|
|
1058
|
-
|
|
1059
|
-
DataFusionError::Execution(format!(
|
|
1060
|
-
"failed to serialize entity surface '{}' snapshot_content: {error}",
|
|
1061
|
-
spec.schema_key
|
|
1062
|
-
))
|
|
1063
|
-
})
|
|
1468
|
+
Ok(JsonValue::Object(object))
|
|
1064
1469
|
}
|
|
1065
1470
|
|
|
1066
1471
|
fn entity_update_optional_string_value(
|
|
@@ -1088,10 +1493,13 @@ fn entity_update_optional_metadata_value(
|
|
|
1088
1493
|
row_index: usize,
|
|
1089
1494
|
column_name: &str,
|
|
1090
1495
|
context: &str,
|
|
1091
|
-
) -> Result<Option<
|
|
1496
|
+
) -> Result<Option<TransactionJson>> {
|
|
1092
1497
|
entity_update_optional_string_value(batch, assignment_values, row_index, column_name)?
|
|
1093
1498
|
.map(|value| {
|
|
1094
|
-
|
|
1499
|
+
let metadata = parse_row_metadata_value(&value, context)
|
|
1500
|
+
.map_err(super::error::lix_error_to_datafusion_error)?;
|
|
1501
|
+
TransactionJson::from_value(metadata, &format!("{context} metadata"))
|
|
1502
|
+
.map_err(super::error::lix_error_to_datafusion_error)
|
|
1095
1503
|
})
|
|
1096
1504
|
.transpose()
|
|
1097
1505
|
}
|
|
@@ -1132,7 +1540,7 @@ fn entity_lix_state_write_rows_from_batch(
|
|
|
1132
1540
|
batch: &RecordBatch,
|
|
1133
1541
|
insert_column_intents: &InsertColumnIntents,
|
|
1134
1542
|
version_binding: Option<&str>,
|
|
1135
|
-
) -> Result<Vec<
|
|
1543
|
+
) -> Result<Vec<TransactionWriteRow>> {
|
|
1136
1544
|
entity_lix_state_write_rows_from_batch_with_options(
|
|
1137
1545
|
spec,
|
|
1138
1546
|
batch,
|
|
@@ -1146,7 +1554,7 @@ fn entity_existing_lix_state_write_rows_from_batch(
|
|
|
1146
1554
|
spec: &EntitySurfaceSpec,
|
|
1147
1555
|
batch: &RecordBatch,
|
|
1148
1556
|
version_binding: Option<&str>,
|
|
1149
|
-
) -> Result<Vec<
|
|
1557
|
+
) -> Result<Vec<TransactionWriteRow>> {
|
|
1150
1558
|
entity_lix_state_write_rows_from_batch_with_options(
|
|
1151
1559
|
spec,
|
|
1152
1560
|
batch,
|
|
@@ -1162,7 +1570,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
|
|
|
1162
1570
|
insert_column_intents: &InsertColumnIntents,
|
|
1163
1571
|
version_binding: Option<&str>,
|
|
1164
1572
|
reject_read_only_fields: bool,
|
|
1165
|
-
) -> Result<Vec<
|
|
1573
|
+
) -> Result<Vec<TransactionWriteRow>> {
|
|
1166
1574
|
(0..batch.num_rows())
|
|
1167
1575
|
.map(|row_index| {
|
|
1168
1576
|
let scope = resolve_write_version_scope(
|
|
@@ -1194,14 +1602,6 @@ fn entity_lix_state_write_rows_from_batch_with_options(
|
|
|
1194
1602
|
reject_present_entity_insert_field(batch, row_index, "lixcol_commit_id")?;
|
|
1195
1603
|
}
|
|
1196
1604
|
|
|
1197
|
-
let schema_version = optional_string_value(batch, row_index, "lixcol_schema_version")?
|
|
1198
|
-
.or_else(|| spec.schema_version.clone())
|
|
1199
|
-
.ok_or_else(|| {
|
|
1200
|
-
DataFusionError::Execution(format!(
|
|
1201
|
-
"INSERT into entity surface '{}' requires lixcol_schema_version",
|
|
1202
|
-
spec.schema_key
|
|
1203
|
-
))
|
|
1204
|
-
})?;
|
|
1205
1605
|
let snapshot_content =
|
|
1206
1606
|
entity_snapshot_content_from_batch(spec, batch, insert_column_intents, row_index)?;
|
|
1207
1607
|
let explicit_entity_id = optional_string_value(batch, row_index, "lixcol_entity_id")?;
|
|
@@ -1212,7 +1612,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
|
|
|
1212
1612
|
spec.schema_key
|
|
1213
1613
|
))
|
|
1214
1614
|
})?;
|
|
1215
|
-
Some(EntityIdentity::
|
|
1615
|
+
Some(EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
|
|
1216
1616
|
DataFusionError::Execution(format!(
|
|
1217
1617
|
"INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
|
|
1218
1618
|
spec.schema_key
|
|
@@ -1221,7 +1621,7 @@ fn entity_lix_state_write_rows_from_batch_with_options(
|
|
|
1221
1621
|
} else {
|
|
1222
1622
|
explicit_entity_id
|
|
1223
1623
|
.map(|entity_id| {
|
|
1224
|
-
EntityIdentity::
|
|
1624
|
+
EntityIdentity::from_json_array_text(&entity_id).map_err(|error| {
|
|
1225
1625
|
DataFusionError::Execution(format!(
|
|
1226
1626
|
"INSERT into entity surface '{}' has invalid lixcol_entity_id: {error}",
|
|
1227
1627
|
spec.schema_key
|
|
@@ -1231,14 +1631,22 @@ fn entity_lix_state_write_rows_from_batch_with_options(
|
|
|
1231
1631
|
.transpose()?
|
|
1232
1632
|
};
|
|
1233
1633
|
|
|
1234
|
-
Ok(
|
|
1634
|
+
Ok(TransactionWriteRow {
|
|
1235
1635
|
entity_id,
|
|
1236
1636
|
schema_key: spec.schema_key.clone(),
|
|
1237
1637
|
file_id: optional_string_value(batch, row_index, "lixcol_file_id")?,
|
|
1238
|
-
|
|
1239
|
-
|
|
1638
|
+
snapshot: Some(TransactionJson::from_value(
|
|
1639
|
+
snapshot_content,
|
|
1640
|
+
&format!("{} insert snapshot_content", spec.schema_key),
|
|
1641
|
+
)
|
|
1642
|
+
.map_err(super::error::lix_error_to_datafusion_error)?),
|
|
1643
|
+
metadata: optional_metadata_value(
|
|
1644
|
+
batch,
|
|
1645
|
+
row_index,
|
|
1646
|
+
"lixcol_metadata",
|
|
1647
|
+
&spec.schema_key,
|
|
1648
|
+
)?,
|
|
1240
1649
|
origin: None,
|
|
1241
|
-
schema_version: schema_version,
|
|
1242
1650
|
created_at: None,
|
|
1243
1651
|
updated_at: None,
|
|
1244
1652
|
global: scope.global,
|
|
@@ -1257,7 +1665,7 @@ fn entity_snapshot_content_from_batch(
|
|
|
1257
1665
|
batch: &RecordBatch,
|
|
1258
1666
|
insert_column_intents: &InsertColumnIntents,
|
|
1259
1667
|
row_index: usize,
|
|
1260
|
-
) -> Result<
|
|
1668
|
+
) -> Result<JsonValue> {
|
|
1261
1669
|
let mut object = serde_json::Map::new();
|
|
1262
1670
|
for column in &spec.columns {
|
|
1263
1671
|
let value = match insert_column_intents.cell(batch, row_index, &column.name)? {
|
|
@@ -1271,12 +1679,7 @@ fn entity_snapshot_content_from_batch(
|
|
|
1271
1679
|
};
|
|
1272
1680
|
object.insert(column.name.clone(), value);
|
|
1273
1681
|
}
|
|
1274
|
-
|
|
1275
|
-
DataFusionError::Execution(format!(
|
|
1276
|
-
"failed to serialize entity surface '{}' snapshot_content: {error}",
|
|
1277
|
-
spec.schema_key
|
|
1278
|
-
))
|
|
1279
|
-
})
|
|
1682
|
+
Ok(JsonValue::Object(object))
|
|
1280
1683
|
}
|
|
1281
1684
|
|
|
1282
1685
|
fn entity_json_value_from_scalar(
|
|
@@ -1404,10 +1807,13 @@ fn optional_metadata_value(
|
|
|
1404
1807
|
row_index: usize,
|
|
1405
1808
|
column_name: &str,
|
|
1406
1809
|
context: &str,
|
|
1407
|
-
) -> Result<Option<
|
|
1810
|
+
) -> Result<Option<TransactionJson>> {
|
|
1408
1811
|
optional_string_value(batch, row_index, column_name)?
|
|
1409
1812
|
.map(|value| {
|
|
1410
|
-
|
|
1813
|
+
let metadata = parse_row_metadata_value(&value, context)
|
|
1814
|
+
.map_err(super::error::lix_error_to_datafusion_error)?;
|
|
1815
|
+
TransactionJson::from_value(metadata, &format!("{context} metadata"))
|
|
1816
|
+
.map_err(super::error::lix_error_to_datafusion_error)
|
|
1411
1817
|
})
|
|
1412
1818
|
.transpose()
|
|
1413
1819
|
}
|
|
@@ -1573,6 +1979,7 @@ impl ExecutionPlan for EntityScanExec {
|
|
|
1573
1979
|
fn entity_live_state_scan_request(
|
|
1574
1980
|
schema_key: &str,
|
|
1575
1981
|
active_version_id: Option<&str>,
|
|
1982
|
+
projected_schema: Option<&Schema>,
|
|
1576
1983
|
limit: Option<usize>,
|
|
1577
1984
|
) -> LiveStateScanRequest {
|
|
1578
1985
|
LiveStateScanRequest {
|
|
@@ -1583,15 +1990,40 @@ fn entity_live_state_scan_request(
|
|
|
1583
1990
|
.unwrap_or_default(),
|
|
1584
1991
|
..LiveStateFilter::default()
|
|
1585
1992
|
},
|
|
1586
|
-
projection:
|
|
1993
|
+
projection: entity_live_state_projection(projected_schema),
|
|
1587
1994
|
limit,
|
|
1588
1995
|
}
|
|
1589
1996
|
}
|
|
1590
1997
|
|
|
1998
|
+
fn entity_live_state_projection(projected_schema: Option<&Schema>) -> LiveStateProjection {
|
|
1999
|
+
let Some(schema) = projected_schema else {
|
|
2000
|
+
return LiveStateProjection::default();
|
|
2001
|
+
};
|
|
2002
|
+
let mut columns = projection_column_names(schema);
|
|
2003
|
+
if schema
|
|
2004
|
+
.fields()
|
|
2005
|
+
.iter()
|
|
2006
|
+
.any(|field| !field.name().starts_with("lixcol_"))
|
|
2007
|
+
&& !columns.iter().any(|column| column == "snapshot_content")
|
|
2008
|
+
{
|
|
2009
|
+
columns.push("snapshot_content".to_string());
|
|
2010
|
+
}
|
|
2011
|
+
LiveStateProjection { columns }
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
fn projection_column_names(schema: &Schema) -> Vec<String> {
|
|
2015
|
+
schema
|
|
2016
|
+
.fields()
|
|
2017
|
+
.iter()
|
|
2018
|
+
.filter_map(|field| field.name().strip_prefix("lixcol_"))
|
|
2019
|
+
.map(str::to_string)
|
|
2020
|
+
.collect()
|
|
2021
|
+
}
|
|
2022
|
+
|
|
1591
2023
|
fn entity_record_batch(
|
|
1592
2024
|
spec: &EntitySurfaceSpec,
|
|
1593
2025
|
schema: SchemaRef,
|
|
1594
|
-
rows: &[
|
|
2026
|
+
rows: &[MaterializedLiveStateRow],
|
|
1595
2027
|
) -> Result<RecordBatch> {
|
|
1596
2028
|
if schema.fields().is_empty() {
|
|
1597
2029
|
let options = RecordBatchOptions::new().with_row_count(Some(rows.len()));
|
|
@@ -1616,7 +2048,7 @@ fn entity_record_batch(
|
|
|
1616
2048
|
fn entity_column_array(
|
|
1617
2049
|
spec: &EntitySurfaceSpec,
|
|
1618
2050
|
column_name: &str,
|
|
1619
|
-
rows: &[
|
|
2051
|
+
rows: &[MaterializedLiveStateRow],
|
|
1620
2052
|
snapshots: &[Option<JsonValue>],
|
|
1621
2053
|
) -> Result<ArrayRef> {
|
|
1622
2054
|
if let Some(property_name) = column_name.strip_prefix("lixcol_") {
|
|
@@ -1665,13 +2097,16 @@ fn entity_column_array(
|
|
|
1665
2097
|
})
|
|
1666
2098
|
}
|
|
1667
2099
|
|
|
1668
|
-
fn entity_system_column_array(
|
|
2100
|
+
fn entity_system_column_array(
|
|
2101
|
+
column_name: &str,
|
|
2102
|
+
rows: &[MaterializedLiveStateRow],
|
|
2103
|
+
) -> Result<ArrayRef> {
|
|
1669
2104
|
Ok(match column_name {
|
|
1670
2105
|
"entity_id" => Arc::new(StringArray::from(
|
|
1671
2106
|
rows.iter()
|
|
1672
2107
|
.map(|row| {
|
|
1673
2108
|
row.entity_id
|
|
1674
|
-
.
|
|
2109
|
+
.as_json_array_text()
|
|
1675
2110
|
.map(Some)
|
|
1676
2111
|
.map_err(lix_error_to_datafusion_error)
|
|
1677
2112
|
})
|
|
@@ -1685,7 +2120,6 @@ fn entity_system_column_array(column_name: &str, rows: &[LiveStateRow]) -> Resul
|
|
|
1685
2120
|
.map(|row| row.metadata.as_ref().map(serialize_row_metadata))
|
|
1686
2121
|
.collect::<Vec<_>>(),
|
|
1687
2122
|
)) as ArrayRef,
|
|
1688
|
-
"schema_version" => string_array(rows.iter().map(|row| Some(row.schema_version.as_str()))),
|
|
1689
2123
|
"created_at" => string_array(rows.iter().map(|row| Some(row.created_at.as_str()))),
|
|
1690
2124
|
"updated_at" => string_array(rows.iter().map(|row| Some(row.updated_at.as_str()))),
|
|
1691
2125
|
"global" => Arc::new(BooleanArray::from(
|
|
@@ -1801,12 +2235,11 @@ fn arrow_data_type_for_entity_column_type(column_type: EntityColumnType) -> Data
|
|
|
1801
2235
|
pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field> {
|
|
1802
2236
|
if variant == EntityProviderVariant::History {
|
|
1803
2237
|
return vec![
|
|
1804
|
-
|
|
2238
|
+
json_field(HISTORY_COL_ENTITY_ID, false),
|
|
1805
2239
|
Field::new(HISTORY_COL_SCHEMA_KEY, DataType::Utf8, false),
|
|
1806
2240
|
Field::new(HISTORY_COL_FILE_ID, DataType::Utf8, true),
|
|
1807
2241
|
json_field(HISTORY_COL_SNAPSHOT_CONTENT, true),
|
|
1808
2242
|
json_field(HISTORY_COL_METADATA, true),
|
|
1809
|
-
Field::new(HISTORY_COL_SCHEMA_VERSION, DataType::Utf8, false),
|
|
1810
2243
|
Field::new(HISTORY_COL_CHANGE_ID, DataType::Utf8, false),
|
|
1811
2244
|
Field::new(HISTORY_COL_OBSERVED_COMMIT_ID, DataType::Utf8, false),
|
|
1812
2245
|
Field::new(HISTORY_COL_COMMIT_CREATED_AT, DataType::Utf8, false),
|
|
@@ -1816,12 +2249,11 @@ pub(super) fn entity_system_fields(variant: EntityProviderVariant) -> Vec<Field>
|
|
|
1816
2249
|
}
|
|
1817
2250
|
|
|
1818
2251
|
let mut fields = vec![
|
|
1819
|
-
|
|
2252
|
+
json_field("lixcol_entity_id", true),
|
|
1820
2253
|
Field::new("lixcol_schema_key", DataType::Utf8, false),
|
|
1821
2254
|
Field::new("lixcol_file_id", DataType::Utf8, true),
|
|
1822
2255
|
json_field("lixcol_snapshot_content", true),
|
|
1823
2256
|
json_field("lixcol_metadata", true),
|
|
1824
|
-
Field::new("lixcol_schema_version", DataType::Utf8, true),
|
|
1825
2257
|
Field::new("lixcol_created_at", DataType::Utf8, true),
|
|
1826
2258
|
Field::new("lixcol_updated_at", DataType::Utf8, true),
|
|
1827
2259
|
Field::new("lixcol_global", DataType::Boolean, true),
|
|
@@ -1855,11 +2287,6 @@ fn derive_entity_surface_spec_from_schema(
|
|
|
1855
2287
|
)
|
|
1856
2288
|
})?;
|
|
1857
2289
|
|
|
1858
|
-
let schema_version = schema
|
|
1859
|
-
.get("x-lix-version")
|
|
1860
|
-
.and_then(JsonValue::as_str)
|
|
1861
|
-
.map(ToOwned::to_owned);
|
|
1862
|
-
|
|
1863
2290
|
let properties = schema
|
|
1864
2291
|
.get("properties")
|
|
1865
2292
|
.and_then(JsonValue::as_object)
|
|
@@ -1895,7 +2322,6 @@ fn derive_entity_surface_spec_from_schema(
|
|
|
1895
2322
|
|
|
1896
2323
|
Ok(EntitySurfaceSpec {
|
|
1897
2324
|
schema_key: schema_key.to_string(),
|
|
1898
|
-
schema_version,
|
|
1899
2325
|
primary_key_paths,
|
|
1900
2326
|
columns,
|
|
1901
2327
|
})
|
|
@@ -1927,7 +2353,7 @@ fn parse_primary_key_paths(schema: &JsonValue) -> std::result::Result<Vec<Vec<St
|
|
|
1927
2353
|
.collect()
|
|
1928
2354
|
}
|
|
1929
2355
|
|
|
1930
|
-
// TODO(
|
|
2356
|
+
// TODO(engine): share JSON Pointer parsing with schema/canonical validation once
|
|
1931
2357
|
// those helpers have a clean module boundary for SQL providers.
|
|
1932
2358
|
fn parse_json_pointer(pointer: &str) -> std::result::Result<Vec<String>, LixError> {
|
|
1933
2359
|
if pointer.is_empty() {
|
|
@@ -1968,14 +2394,11 @@ fn decode_json_pointer_segment(segment: &str) -> std::result::Result<String, Lix
|
|
|
1968
2394
|
}
|
|
1969
2395
|
|
|
1970
2396
|
fn schema_exposed_as_entity_surface(schema_key: &str) -> bool {
|
|
1971
|
-
!matches!(
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
| "lix_change_set"
|
|
1977
|
-
| "lix_change_set_element"
|
|
1978
|
-
)
|
|
2397
|
+
!matches!(schema_key, "lix_active_account" | "lix_change")
|
|
2398
|
+
}
|
|
2399
|
+
|
|
2400
|
+
fn schema_exposed_as_entity_history_surface(schema_key: &str) -> bool {
|
|
2401
|
+
!matches!(schema_key, "lix_commit" | "lix_commit_edge")
|
|
1979
2402
|
}
|
|
1980
2403
|
|
|
1981
2404
|
fn entity_column_type_from_schema(schema: &JsonValue) -> Option<EntityColumnType> {
|
|
@@ -2039,7 +2462,10 @@ mod tests {
|
|
|
2039
2462
|
use datafusion::arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray};
|
|
2040
2463
|
use datafusion::arrow::datatypes::{DataType, Field, Schema};
|
|
2041
2464
|
use datafusion::arrow::record_batch::RecordBatch;
|
|
2465
|
+
use datafusion::common::{Column, ScalarValue};
|
|
2042
2466
|
use datafusion::execution::TaskContext;
|
|
2467
|
+
use datafusion::logical_expr::expr::InList;
|
|
2468
|
+
use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
|
|
2043
2469
|
use serde_json::json;
|
|
2044
2470
|
|
|
2045
2471
|
use super::{
|
|
@@ -2052,12 +2478,15 @@ mod tests {
|
|
|
2052
2478
|
FunctionProvider, FunctionProviderHandle, SharedFunctionProvider, SystemFunctionProvider,
|
|
2053
2479
|
};
|
|
2054
2480
|
use crate::live_state::{
|
|
2055
|
-
LiveStateReader,
|
|
2481
|
+
LiveStateReader, LiveStateRowRequest, LiveStateScanRequest, MaterializedLiveStateRow,
|
|
2056
2482
|
};
|
|
2057
2483
|
use crate::sql2::dml::InsertSink;
|
|
2058
2484
|
use crate::sql2::write_normalization::InsertColumnIntents;
|
|
2059
2485
|
use crate::sql2::{SqlWriteContext, SqlWriteExecutionContext};
|
|
2060
|
-
use crate::transaction::types::{
|
|
2486
|
+
use crate::transaction::types::{
|
|
2487
|
+
TransactionJson, TransactionWrite, TransactionWriteMode, TransactionWriteOutcome,
|
|
2488
|
+
TransactionWriteRow,
|
|
2489
|
+
};
|
|
2061
2490
|
use crate::version::{VersionHead, VersionRefReader};
|
|
2062
2491
|
use crate::LixError;
|
|
2063
2492
|
|
|
@@ -2065,8 +2494,8 @@ mod tests {
|
|
|
2065
2494
|
struct EmptyVersionRefReader;
|
|
2066
2495
|
#[derive(Default)]
|
|
2067
2496
|
struct CapturingWriteContext {
|
|
2068
|
-
rows: Vec<
|
|
2069
|
-
writes: Vec<
|
|
2497
|
+
rows: Vec<MaterializedLiveStateRow>,
|
|
2498
|
+
writes: Vec<TransactionWrite>,
|
|
2070
2499
|
}
|
|
2071
2500
|
|
|
2072
2501
|
#[async_trait]
|
|
@@ -2074,14 +2503,14 @@ mod tests {
|
|
|
2074
2503
|
async fn scan_rows(
|
|
2075
2504
|
&self,
|
|
2076
2505
|
_request: &LiveStateScanRequest,
|
|
2077
|
-
) -> Result<Vec<
|
|
2506
|
+
) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
|
|
2078
2507
|
Ok(vec![])
|
|
2079
2508
|
}
|
|
2080
2509
|
|
|
2081
2510
|
async fn load_row(
|
|
2082
2511
|
&self,
|
|
2083
2512
|
_request: &LiveStateRowRequest,
|
|
2084
|
-
) -> Result<Option<
|
|
2513
|
+
) -> Result<Option<MaterializedLiveStateRow>, LixError> {
|
|
2085
2514
|
Ok(None)
|
|
2086
2515
|
}
|
|
2087
2516
|
}
|
|
@@ -2113,7 +2542,10 @@ mod tests {
|
|
|
2113
2542
|
&self,
|
|
2114
2543
|
hashes: &[crate::binary_cas::BlobHash],
|
|
2115
2544
|
) -> Result<crate::binary_cas::BlobBytesBatch, LixError> {
|
|
2116
|
-
Ok(crate::binary_cas::BlobBytesBatch::
|
|
2545
|
+
Ok(crate::binary_cas::BlobBytesBatch::new(vec![
|
|
2546
|
+
None;
|
|
2547
|
+
hashes.len()
|
|
2548
|
+
]))
|
|
2117
2549
|
}
|
|
2118
2550
|
}
|
|
2119
2551
|
|
|
@@ -2141,7 +2573,7 @@ mod tests {
|
|
|
2141
2573
|
async fn scan_live_state(
|
|
2142
2574
|
&mut self,
|
|
2143
2575
|
_request: &LiveStateScanRequest,
|
|
2144
|
-
) -> Result<Vec<
|
|
2576
|
+
) -> Result<Vec<MaterializedLiveStateRow>, LixError> {
|
|
2145
2577
|
Ok(self.rows.clone())
|
|
2146
2578
|
}
|
|
2147
2579
|
|
|
@@ -2155,14 +2587,17 @@ mod tests {
|
|
|
2155
2587
|
Ok(Some(format!("commit-{version_id}")))
|
|
2156
2588
|
}
|
|
2157
2589
|
|
|
2158
|
-
async fn stage_write(
|
|
2590
|
+
async fn stage_write(
|
|
2591
|
+
&mut self,
|
|
2592
|
+
write: TransactionWrite,
|
|
2593
|
+
) -> Result<TransactionWriteOutcome, LixError> {
|
|
2159
2594
|
self.writes.push(write);
|
|
2160
|
-
Ok(
|
|
2595
|
+
Ok(TransactionWriteOutcome { count: 0 })
|
|
2161
2596
|
}
|
|
2162
2597
|
}
|
|
2163
2598
|
|
|
2164
|
-
fn live_row() ->
|
|
2165
|
-
|
|
2599
|
+
fn live_row() -> MaterializedLiveStateRow {
|
|
2600
|
+
MaterializedLiveStateRow {
|
|
2166
2601
|
entity_id: crate::entity_identity::EntityIdentity::single("entity-1"),
|
|
2167
2602
|
schema_key: "project_message".to_string(),
|
|
2168
2603
|
file_id: None,
|
|
@@ -2170,8 +2605,8 @@ mod tests {
|
|
|
2170
2605
|
"{\"body\":\"hello\",\"rating\":4.5,\"count\":7,\"enabled\":true,\"meta\":{\"x\":1}}"
|
|
2171
2606
|
.to_string(),
|
|
2172
2607
|
),
|
|
2173
|
-
metadata: Some(json!({"source": "test"})),
|
|
2174
|
-
|
|
2608
|
+
metadata: Some(json!({"source": "test"}).to_string()),
|
|
2609
|
+
deleted: false,
|
|
2175
2610
|
version_id: "version-a".to_string(),
|
|
2176
2611
|
change_id: Some("change-a".to_string()),
|
|
2177
2612
|
commit_id: Some("commit-a".to_string()),
|
|
@@ -2186,7 +2621,6 @@ mod tests {
|
|
|
2186
2621
|
Arc::new(
|
|
2187
2622
|
derive_entity_surface_spec_from_schema(&json!({
|
|
2188
2623
|
"x-lix-key": "project_message",
|
|
2189
|
-
"x-lix-version": "1",
|
|
2190
2624
|
"type": "object",
|
|
2191
2625
|
"properties": {
|
|
2192
2626
|
"body": { "type": "string" },
|
|
@@ -2204,7 +2638,6 @@ mod tests {
|
|
|
2204
2638
|
Arc::new(
|
|
2205
2639
|
derive_entity_surface_spec_from_schema(&json!({
|
|
2206
2640
|
"x-lix-key": "project_message",
|
|
2207
|
-
"x-lix-version": "1",
|
|
2208
2641
|
"x-lix-primary-key": ["/id"],
|
|
2209
2642
|
"type": "object",
|
|
2210
2643
|
"properties": {
|
|
@@ -2221,6 +2654,22 @@ mod tests {
|
|
|
2221
2654
|
Arc::new(StringArray::from(values)) as ArrayRef
|
|
2222
2655
|
}
|
|
2223
2656
|
|
|
2657
|
+
fn string_literal(value: &str) -> Expr {
|
|
2658
|
+
Expr::Literal(ScalarValue::Utf8(Some(value.to_string())), None)
|
|
2659
|
+
}
|
|
2660
|
+
|
|
2661
|
+
fn column(name: &str) -> Expr {
|
|
2662
|
+
Expr::Column(Column::from_name(name))
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
fn eq_filter(column_name: &str, value: &str) -> Expr {
|
|
2666
|
+
Expr::BinaryExpr(BinaryExpr::new(
|
|
2667
|
+
Box::new(column(column_name)),
|
|
2668
|
+
Operator::Eq,
|
|
2669
|
+
Box::new(string_literal(value)),
|
|
2670
|
+
))
|
|
2671
|
+
}
|
|
2672
|
+
|
|
2224
2673
|
fn entity_insert_batch(include_version: bool, global: bool) -> RecordBatch {
|
|
2225
2674
|
let mut fields = vec![
|
|
2226
2675
|
Field::new("body", DataType::Utf8, true),
|
|
@@ -2239,7 +2688,7 @@ mod tests {
|
|
|
2239
2688
|
Arc::new(BooleanArray::from(vec![true])) as ArrayRef,
|
|
2240
2689
|
string_column(vec![Some("{\"x\":1}")]),
|
|
2241
2690
|
Arc::new(Float64Array::from(vec![4.5])) as ArrayRef,
|
|
2242
|
-
string_column(vec![Some("entity-1")]),
|
|
2691
|
+
string_column(vec![Some("[\"entity-1\"]")]),
|
|
2243
2692
|
string_column(vec![Some("{\"source\":\"entity\"}")]),
|
|
2244
2693
|
Arc::new(BooleanArray::from(vec![global])) as ArrayRef,
|
|
2245
2694
|
Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
|
|
@@ -2266,7 +2715,7 @@ mod tests {
|
|
|
2266
2715
|
];
|
|
2267
2716
|
if include_entity_id {
|
|
2268
2717
|
fields.push(Field::new("lixcol_entity_id", DataType::Utf8, false));
|
|
2269
|
-
columns.push(string_column(vec![Some("message-1")]));
|
|
2718
|
+
columns.push(string_column(vec![Some("[\"message-1\"]")]));
|
|
2270
2719
|
}
|
|
2271
2720
|
|
|
2272
2721
|
RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
|
|
@@ -2283,7 +2732,6 @@ mod tests {
|
|
|
2283
2732
|
fn derives_entity_surface_spec_from_schema_definition() {
|
|
2284
2733
|
let spec = derive_entity_surface_spec_from_schema(&json!({
|
|
2285
2734
|
"x-lix-key": "project_message",
|
|
2286
|
-
"x-lix-version": "1",
|
|
2287
2735
|
"type": "object",
|
|
2288
2736
|
"properties": {
|
|
2289
2737
|
"body": { "type": "string" },
|
|
@@ -2295,7 +2743,6 @@ mod tests {
|
|
|
2295
2743
|
.expect("schema should derive entity surface spec");
|
|
2296
2744
|
|
|
2297
2745
|
assert_eq!(spec.schema_key, "project_message");
|
|
2298
|
-
assert_eq!(spec.schema_version.as_deref(), Some("1"));
|
|
2299
2746
|
assert_eq!(
|
|
2300
2747
|
spec.visible_column_names().collect::<Vec<_>>(),
|
|
2301
2748
|
vec!["body", "meta", "rating"]
|
|
@@ -2320,7 +2767,6 @@ mod tests {
|
|
|
2320
2767
|
fn entity_surface_spec_rejects_properties_without_projection_type() {
|
|
2321
2768
|
let error = derive_entity_surface_spec_from_schema(&json!({
|
|
2322
2769
|
"x-lix-key": "project_message",
|
|
2323
|
-
"x-lix-version": "1",
|
|
2324
2770
|
"x-lix-primary-key": ["/id"],
|
|
2325
2771
|
"type": "object",
|
|
2326
2772
|
"properties": {
|
|
@@ -2463,7 +2909,7 @@ mod tests {
|
|
|
2463
2909
|
.downcast_ref::<datafusion::arrow::array::StringArray>()
|
|
2464
2910
|
.expect("entity id is string")
|
|
2465
2911
|
.value(0),
|
|
2466
|
-
"entity-1"
|
|
2912
|
+
"[\"entity-1\"]"
|
|
2467
2913
|
);
|
|
2468
2914
|
assert_eq!(
|
|
2469
2915
|
batch
|
|
@@ -2498,6 +2944,80 @@ mod tests {
|
|
|
2498
2944
|
assert!(provider.schema.field_with_name("lixcol_version_id").is_ok());
|
|
2499
2945
|
}
|
|
2500
2946
|
|
|
2947
|
+
#[test]
|
|
2948
|
+
fn primary_key_filters_route_entity_ids_for_string_primary_key() {
|
|
2949
|
+
let spec = entity_insert_spec_with_primary_key();
|
|
2950
|
+
let filters = vec![
|
|
2951
|
+
eq_filter("id", "entity-a"),
|
|
2952
|
+
Expr::InList(InList::new(
|
|
2953
|
+
Box::new(column("id")),
|
|
2954
|
+
vec![string_literal("entity-b"), string_literal("entity-a")],
|
|
2955
|
+
false,
|
|
2956
|
+
)),
|
|
2957
|
+
];
|
|
2958
|
+
|
|
2959
|
+
let entity_ids = super::entity_ids_from_primary_key_filters(&spec, &filters)
|
|
2960
|
+
.expect("primary-key filters should analyze")
|
|
2961
|
+
.expect("primary-key filters should produce a constraint");
|
|
2962
|
+
|
|
2963
|
+
assert_eq!(
|
|
2964
|
+
entity_ids,
|
|
2965
|
+
vec![crate::entity_identity::EntityIdentity::single("entity-a")]
|
|
2966
|
+
);
|
|
2967
|
+
}
|
|
2968
|
+
|
|
2969
|
+
#[test]
|
|
2970
|
+
fn primary_key_filter_analyzer_models_boolean_predicates() {
|
|
2971
|
+
let spec = entity_insert_spec_with_primary_key();
|
|
2972
|
+
let analyzer = super::EntityPrimaryKeyFilterAnalyzer::new(&spec);
|
|
2973
|
+
let disjunction = Expr::BinaryExpr(BinaryExpr::new(
|
|
2974
|
+
Box::new(eq_filter("id", "entity-a")),
|
|
2975
|
+
Operator::Or,
|
|
2976
|
+
Box::new(eq_filter("id", "entity-b")),
|
|
2977
|
+
));
|
|
2978
|
+
let contradiction = Expr::BinaryExpr(BinaryExpr::new(
|
|
2979
|
+
Box::new(eq_filter("id", "entity-a")),
|
|
2980
|
+
Operator::And,
|
|
2981
|
+
Box::new(eq_filter("id", "entity-b")),
|
|
2982
|
+
));
|
|
2983
|
+
|
|
2984
|
+
let disjunction_ids = analyzer
|
|
2985
|
+
.analyze(&disjunction)
|
|
2986
|
+
.expect("OR should analyze")
|
|
2987
|
+
.expect("OR should produce an entity-id set");
|
|
2988
|
+
let contradiction_ids = analyzer
|
|
2989
|
+
.analyze(&contradiction)
|
|
2990
|
+
.expect("AND should analyze")
|
|
2991
|
+
.expect("AND should produce an entity-id set");
|
|
2992
|
+
|
|
2993
|
+
assert_eq!(
|
|
2994
|
+
disjunction_ids.into_iter().collect::<Vec<_>>(),
|
|
2995
|
+
vec![
|
|
2996
|
+
crate::entity_identity::EntityIdentity::single("entity-a"),
|
|
2997
|
+
crate::entity_identity::EntityIdentity::single("entity-b"),
|
|
2998
|
+
]
|
|
2999
|
+
);
|
|
3000
|
+
assert!(contradiction_ids.is_empty());
|
|
3001
|
+
}
|
|
3002
|
+
|
|
3003
|
+
#[test]
|
|
3004
|
+
fn primary_key_filters_ignore_non_key_and_negated_predicates() {
|
|
3005
|
+
let spec = entity_insert_spec_with_primary_key();
|
|
3006
|
+
let filters = vec![
|
|
3007
|
+
eq_filter("body", "hello"),
|
|
3008
|
+
Expr::InList(InList::new(
|
|
3009
|
+
Box::new(column("id")),
|
|
3010
|
+
vec![string_literal("entity-a")],
|
|
3011
|
+
true,
|
|
3012
|
+
)),
|
|
3013
|
+
];
|
|
3014
|
+
|
|
3015
|
+
assert!(super::entity_ids_from_primary_key_filters(&spec, &filters)
|
|
3016
|
+
.expect("ignored filters should analyze")
|
|
3017
|
+
.unwrap_or_default()
|
|
3018
|
+
.is_empty());
|
|
3019
|
+
}
|
|
3020
|
+
|
|
2501
3021
|
#[test]
|
|
2502
3022
|
fn decodes_by_version_entity_insert_into_lix_state_write_row() {
|
|
2503
3023
|
let spec = entity_insert_spec();
|
|
@@ -2515,22 +3035,17 @@ mod tests {
|
|
|
2515
3035
|
Some(&crate::entity_identity::EntityIdentity::single("entity-1"))
|
|
2516
3036
|
);
|
|
2517
3037
|
assert_eq!(rows[0].schema_key, "project_message");
|
|
2518
|
-
assert_eq!(rows[0].schema_version.as_str(), "1");
|
|
2519
3038
|
assert_eq!(rows[0].version_id, "version-a");
|
|
2520
3039
|
assert_eq!(
|
|
2521
3040
|
rows[0].metadata.as_ref(),
|
|
2522
|
-
Some(&
|
|
3041
|
+
Some(&TransactionJson::from_value_for_test(
|
|
3042
|
+
json!({"source": "entity"})
|
|
3043
|
+
))
|
|
2523
3044
|
);
|
|
2524
3045
|
assert!(!rows[0].global);
|
|
2525
3046
|
assert_eq!(
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
.snapshot_content
|
|
2529
|
-
.as_deref()
|
|
2530
|
-
.expect("snapshot_content")
|
|
2531
|
-
)
|
|
2532
|
-
.expect("snapshot_content JSON"),
|
|
2533
|
-
json!({
|
|
3047
|
+
rows[0].snapshot.as_ref().expect("snapshot_content"),
|
|
3048
|
+
&json!({
|
|
2534
3049
|
"body": "hello",
|
|
2535
3050
|
"count": 7,
|
|
2536
3051
|
"enabled": true,
|
|
@@ -2554,14 +3069,8 @@ mod tests {
|
|
|
2554
3069
|
assert_eq!(rows.len(), 1);
|
|
2555
3070
|
assert_eq!(rows[0].entity_id, None);
|
|
2556
3071
|
assert_eq!(
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
.snapshot_content
|
|
2560
|
-
.as_deref()
|
|
2561
|
-
.expect("snapshot_content")
|
|
2562
|
-
)
|
|
2563
|
-
.expect("snapshot_content JSON"),
|
|
2564
|
-
json!({
|
|
3072
|
+
rows[0].snapshot.as_ref().expect("snapshot_content"),
|
|
3073
|
+
&json!({
|
|
2565
3074
|
"body": "hello",
|
|
2566
3075
|
"id": "message-1"
|
|
2567
3076
|
})
|
|
@@ -2675,17 +3184,19 @@ mod tests {
|
|
|
2675
3184
|
assert_eq!(count, 1);
|
|
2676
3185
|
assert_eq!(
|
|
2677
3186
|
write_context.writes.as_slice(),
|
|
2678
|
-
&[
|
|
3187
|
+
&[TransactionWrite::Rows {
|
|
3188
|
+
mode: TransactionWriteMode::Insert,
|
|
3189
|
+
rows: vec![TransactionWriteRow {
|
|
2679
3190
|
entity_id: Some(crate::entity_identity::EntityIdentity::single("entity-1")),
|
|
2680
3191
|
schema_key: "project_message".to_string(),
|
|
2681
3192
|
file_id: None,
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
3193
|
+
snapshot: Some(TransactionJson::from_value_for_test(
|
|
3194
|
+
json!({"body":"hello","count":7,"enabled":true,"meta":{"x":1},"rating":4.5})
|
|
3195
|
+
)),
|
|
3196
|
+
metadata: Some(TransactionJson::from_value_for_test(
|
|
3197
|
+
json!({"source": "entity"})
|
|
3198
|
+
)),
|
|
2687
3199
|
origin: None,
|
|
2688
|
-
schema_version: "1".to_string(),
|
|
2689
3200
|
created_at: None,
|
|
2690
3201
|
updated_at: None,
|
|
2691
3202
|
global: false,
|