floe-python 0.3.10__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floe_python-0.3.10 → floe_python-0.4.0}/Cargo.lock +56 -3
- {floe_python-0.3.10 → floe_python-0.4.0}/PKG-INFO +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/Cargo.toml +3 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/mismatch.rs +4 -3
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/parse.rs +37 -6
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/storage.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/types.rs +27 -38
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/validate.rs +39 -51
- floe_python-0.4.0/crates/floe-core/src/errors.rs +27 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/format.rs +33 -36
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/mod.rs +42 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/inputs.rs +5 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/adls.rs +121 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/gcs.rs +123 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/local.rs +110 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/s3.rs +113 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/unique_seed/mod.rs +15 -41
- floe_python-0.4.0/crates/floe-core/src/io/write/accepted.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/unity.rs +16 -4
- floe_python-0.4.0/crates/floe-core/src/io/write/delta/unity_tests.rs +209 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta.rs +73 -30
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/rest.rs +6 -4
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg.rs +280 -44
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/mod.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/parquet.rs +93 -30
- floe_python-0.4.0/crates/floe-core/src/io/write/sink_format.rs +66 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +2 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +15 -15
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/shared.rs +55 -112
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/mod.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/lineage/mod.rs +85 -20
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/builder.rs +24 -7
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/model.rs +8 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/parse.rs +66 -2
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/types.rs +21 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/validate.rs +24 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/entity.rs +35 -14
- floe_python-0.4.0/crates/floe-core/src/run/entity/accepted_write.rs +156 -0
- floe_python-0.4.0/crates/floe-core/src/run/entity/incremental.rs +281 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/mod.rs +53 -52
- floe_python-0.4.0/crates/floe-core/src/run/entity/pii.rs +446 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/precheck.rs +3 -2
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/validate_split.rs +12 -16
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/mod.rs +40 -37
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/output.rs +13 -16
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runtime.rs +5 -4
- floe_python-0.4.0/crates/floe-core/src/state/mod.rs +730 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/delta_run.rs +2 -2
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/add_entity.rs +2 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/adls_validation.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/catalogs.rs +2 -2
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/config_validation.rs +4 -4
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/lineage_validation.rs +57 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/parse.rs +4 -4
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/inputs.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/local.rs +48 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/delta_write.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/object_store.rs +8 -8
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/manifest/mod.rs +22 -3
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/parse.rs +28 -4
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/validate.rs +36 -1
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/incremental.rs +39 -0
- floe_python-0.4.0/crates/floe-core/tests/unit/run/lineage.rs +210 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/mod.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/state/mod.rs +136 -77
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/Cargo.toml +2 -2
- {floe_python-0.3.10 → floe_python-0.4.0}/pyproject.toml +1 -1
- floe_python-0.3.10/crates/floe-core/src/errors.rs +0 -57
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/delta.rs +0 -47
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/iceberg.rs +0 -299
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/parquet.rs +0 -80
- floe_python-0.3.10/crates/floe-core/src/io/write/accepted.rs +0 -56
- floe_python-0.3.10/crates/floe-core/src/run/entity/accepted_write.rs +0 -242
- floe_python-0.3.10/crates/floe-core/src/run/entity/incremental.rs +0 -177
- floe_python-0.3.10/crates/floe-core/src/run/entity/pii.rs +0 -182
- floe_python-0.3.10/crates/floe-core/src/state/mod.rs +0 -356
- {floe_python-0.3.10 → floe_python-0.4.0}/Cargo.toml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/add_entity.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/cast.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/normalize.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/not_null.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/unique.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/catalog.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/location.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/template.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/yaml_decode.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/avro.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/csv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/json.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/json_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/orc.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/parquet.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xlsx.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xml.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/object_store.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/target.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/csv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/options.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/parts.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/lib.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/log.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/build.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/context.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/process.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/events.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/file.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/perf.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runner/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runner/outcome.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/vars/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/vars/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/warnings.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/archive_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/dry_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/local_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/common.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/templating.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/format.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/checks.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/pii.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/report.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/.gitignore +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/functions.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/lib.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/observer.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/config.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/errors.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/outcome.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/config.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/profile.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/test_floe.py +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/__init__.py +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/_floe.pyi +0 -0
- {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/py.typed +0 -0
|
@@ -476,6 +476,16 @@ version = "0.3.2"
|
|
|
476
476
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
477
477
|
checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063"
|
|
478
478
|
|
|
479
|
+
[[package]]
|
|
480
|
+
name = "assert-json-diff"
|
|
481
|
+
version = "2.0.2"
|
|
482
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
483
|
+
checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
|
|
484
|
+
dependencies = [
|
|
485
|
+
"serde",
|
|
486
|
+
"serde_json",
|
|
487
|
+
]
|
|
488
|
+
|
|
479
489
|
[[package]]
|
|
480
490
|
name = "assert_cmd"
|
|
481
491
|
version = "2.1.2"
|
|
@@ -1708,6 +1718,15 @@ version = "1.0.4"
|
|
|
1708
1718
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1709
1719
|
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
|
1710
1720
|
|
|
1721
|
+
[[package]]
|
|
1722
|
+
name = "colored"
|
|
1723
|
+
version = "3.1.1"
|
|
1724
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1725
|
+
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
|
|
1726
|
+
dependencies = [
|
|
1727
|
+
"windows-sys 0.61.2",
|
|
1728
|
+
]
|
|
1729
|
+
|
|
1711
1730
|
[[package]]
|
|
1712
1731
|
name = "comfy-table"
|
|
1713
1732
|
version = "7.2.2"
|
|
@@ -3380,7 +3399,7 @@ dependencies = [
|
|
|
3380
3399
|
|
|
3381
3400
|
[[package]]
|
|
3382
3401
|
name = "floe-cli"
|
|
3383
|
-
version = "0.
|
|
3402
|
+
version = "0.4.0"
|
|
3384
3403
|
dependencies = [
|
|
3385
3404
|
"assert_cmd",
|
|
3386
3405
|
"clap",
|
|
@@ -3393,7 +3412,7 @@ dependencies = [
|
|
|
3393
3412
|
|
|
3394
3413
|
[[package]]
|
|
3395
3414
|
name = "floe-core"
|
|
3396
|
-
version = "0.
|
|
3415
|
+
version = "0.4.0"
|
|
3397
3416
|
dependencies = [
|
|
3398
3417
|
"apache-avro 0.16.0",
|
|
3399
3418
|
"arrow",
|
|
@@ -3414,6 +3433,7 @@ dependencies = [
|
|
|
3414
3433
|
"iceberg",
|
|
3415
3434
|
"iceberg-catalog-rest",
|
|
3416
3435
|
"iceberg-storage-opendal",
|
|
3436
|
+
"mockito",
|
|
3417
3437
|
"orc-rust",
|
|
3418
3438
|
"polars",
|
|
3419
3439
|
"rayon",
|
|
@@ -3425,6 +3445,7 @@ dependencies = [
|
|
|
3425
3445
|
"serde_yaml",
|
|
3426
3446
|
"sha2",
|
|
3427
3447
|
"tempfile",
|
|
3448
|
+
"thiserror 1.0.69",
|
|
3428
3449
|
"time",
|
|
3429
3450
|
"tokio",
|
|
3430
3451
|
"url",
|
|
@@ -3434,7 +3455,7 @@ dependencies = [
|
|
|
3434
3455
|
|
|
3435
3456
|
[[package]]
|
|
3436
3457
|
name = "floe-python"
|
|
3437
|
-
version = "0.
|
|
3458
|
+
version = "0.4.0"
|
|
3438
3459
|
dependencies = [
|
|
3439
3460
|
"floe-core",
|
|
3440
3461
|
"pyo3",
|
|
@@ -4046,6 +4067,7 @@ dependencies = [
|
|
|
4046
4067
|
"http 1.4.0",
|
|
4047
4068
|
"http-body 1.0.1",
|
|
4048
4069
|
"httparse",
|
|
4070
|
+
"httpdate",
|
|
4049
4071
|
"itoa",
|
|
4050
4072
|
"pin-project-lite",
|
|
4051
4073
|
"pin-utils",
|
|
@@ -4861,6 +4883,31 @@ dependencies = [
|
|
|
4861
4883
|
"windows-sys 0.61.2",
|
|
4862
4884
|
]
|
|
4863
4885
|
|
|
4886
|
+
[[package]]
|
|
4887
|
+
name = "mockito"
|
|
4888
|
+
version = "1.7.2"
|
|
4889
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4890
|
+
checksum = "90820618712cab19cfc46b274c6c22546a82affcb3c3bdf0f29e3db8e1bb92c0"
|
|
4891
|
+
dependencies = [
|
|
4892
|
+
"assert-json-diff",
|
|
4893
|
+
"bytes",
|
|
4894
|
+
"colored",
|
|
4895
|
+
"futures-core",
|
|
4896
|
+
"http 1.4.0",
|
|
4897
|
+
"http-body 1.0.1",
|
|
4898
|
+
"http-body-util",
|
|
4899
|
+
"hyper 1.8.1",
|
|
4900
|
+
"hyper-util",
|
|
4901
|
+
"log",
|
|
4902
|
+
"pin-project-lite",
|
|
4903
|
+
"rand 0.9.4",
|
|
4904
|
+
"regex",
|
|
4905
|
+
"serde_json",
|
|
4906
|
+
"serde_urlencoded",
|
|
4907
|
+
"similar",
|
|
4908
|
+
"tokio",
|
|
4909
|
+
]
|
|
4910
|
+
|
|
4864
4911
|
[[package]]
|
|
4865
4912
|
name = "moka"
|
|
4866
4913
|
version = "0.12.13"
|
|
@@ -7420,6 +7467,12 @@ version = "0.1.5"
|
|
|
7420
7467
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7421
7468
|
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
|
|
7422
7469
|
|
|
7470
|
+
[[package]]
|
|
7471
|
+
name = "similar"
|
|
7472
|
+
version = "2.7.0"
|
|
7473
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7474
|
+
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
|
|
7475
|
+
|
|
7423
7476
|
[[package]]
|
|
7424
7477
|
name = "simple_asn1"
|
|
7425
7478
|
version = "0.6.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "floe-core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
description = "Core library for Floe, a YAML-driven technical ingestion tool."
|
|
6
6
|
license = "MIT"
|
|
@@ -48,9 +48,11 @@ orc-rust = "0.7.1"
|
|
|
48
48
|
reqwest = { version = "0.12", default-features = true, features = ["native-tls-vendored", "json", "blocking"] }
|
|
49
49
|
sha2 = "0.10"
|
|
50
50
|
hex = "0.4"
|
|
51
|
+
thiserror = "1"
|
|
51
52
|
|
|
52
53
|
[dev-dependencies]
|
|
53
54
|
rust_xlsxwriter = "0.67"
|
|
55
|
+
mockito = "1"
|
|
54
56
|
|
|
55
57
|
[features]
|
|
56
58
|
vendored-openssl = []
|
|
@@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|
|
2
2
|
|
|
3
3
|
use polars::prelude::{DataFrame, DataType, Series};
|
|
4
4
|
|
|
5
|
+
use crate::config::PolicySeverity;
|
|
5
6
|
use crate::errors::RunError;
|
|
6
7
|
use crate::{config, report, ConfigError, FloeResult};
|
|
7
8
|
|
|
@@ -124,7 +125,7 @@ pub fn plan_schema_mismatch(
|
|
|
124
125
|
let mut warning = None;
|
|
125
126
|
let rejection_requested = (effective_missing == "reject_file" && !missing.is_empty())
|
|
126
127
|
|| (effective_extra == "reject_file" && !extra.is_empty());
|
|
127
|
-
if rejection_requested && entity.policy.severity ==
|
|
128
|
+
if rejection_requested && entity.policy.severity == PolicySeverity::Warn {
|
|
128
129
|
warning = Some(format!(
|
|
129
130
|
"entity.name={} schema mismatch requested reject_file but policy.severity=warn; continuing",
|
|
130
131
|
entity.name
|
|
@@ -139,10 +140,10 @@ pub fn plan_schema_mismatch(
|
|
|
139
140
|
if (effective_missing == "reject_file" && !missing.is_empty())
|
|
140
141
|
|| (effective_extra == "reject_file" && !extra.is_empty())
|
|
141
142
|
{
|
|
142
|
-
if entity.policy.severity ==
|
|
143
|
+
if entity.policy.severity == PolicySeverity::Abort {
|
|
143
144
|
aborted = true;
|
|
144
145
|
action = report::MismatchAction::Aborted;
|
|
145
|
-
} else if entity.policy.severity ==
|
|
146
|
+
} else if entity.policy.severity == PolicySeverity::Reject {
|
|
146
147
|
rejected = true;
|
|
147
148
|
action = report::MismatchAction::RejectedFile;
|
|
148
149
|
}
|
|
@@ -15,8 +15,8 @@ use crate::config::{
|
|
|
15
15
|
DomainConfig, EntityConfig, EntityMetadata, EntityStateConfig, EnvConfig,
|
|
16
16
|
IcebergPartitionFieldConfig, IcebergSinkTargetConfig, IncrementalMode, LineageConfig,
|
|
17
17
|
MergeOptionsConfig, MergeScd2OptionsConfig, NormalizeColumnsConfig, PiiColumnConfig, PiiConfig,
|
|
18
|
-
PiiStrategy, PolicyConfig, ProjectMetadata, ReportConfig, RootConfig,
|
|
19
|
-
SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
|
|
18
|
+
PiiStrategy, PolicyConfig, PolicySeverity, ProjectMetadata, ReportConfig, RootConfig,
|
|
19
|
+
SchemaConfig, SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
|
|
20
20
|
SchemaMismatchConfig, SinkConfig, SinkOptions, SinkTarget, SourceConfig, SourceOptions,
|
|
21
21
|
StorageDefinition, StoragesConfig, WriteMode,
|
|
22
22
|
};
|
|
@@ -830,9 +830,18 @@ fn parse_archive_target(value: &Yaml) -> FloeResult<ArchiveTarget> {
|
|
|
830
830
|
fn parse_policy(value: &Yaml) -> FloeResult<PolicyConfig> {
|
|
831
831
|
let hash = yaml_hash(value, "policy")?;
|
|
832
832
|
validate_known_keys(hash, "policy", &["severity"])?;
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
833
|
+
let severity_str = get_string(hash, "severity", "policy")?;
|
|
834
|
+
let severity = match severity_str.as_str() {
|
|
835
|
+
"warn" => PolicySeverity::Warn,
|
|
836
|
+
"reject" => PolicySeverity::Reject,
|
|
837
|
+
"abort" => PolicySeverity::Abort,
|
|
838
|
+
other => {
|
|
839
|
+
return Err(Box::new(ConfigError(format!(
|
|
840
|
+
"policy.severity={other} is unsupported (allowed: warn, reject, abort)"
|
|
841
|
+
))))
|
|
842
|
+
}
|
|
843
|
+
};
|
|
844
|
+
Ok(PolicyConfig { severity })
|
|
836
845
|
}
|
|
837
846
|
|
|
838
847
|
fn parse_schema(value: &Yaml) -> FloeResult<SchemaConfig> {
|
|
@@ -1061,6 +1070,20 @@ fn opt_u64(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u64>> {
|
|
|
1061
1070
|
}
|
|
1062
1071
|
}
|
|
1063
1072
|
|
|
1073
|
+
fn opt_u32(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u32>> {
|
|
1074
|
+
match opt_u64(hash, key, ctx)? {
|
|
1075
|
+
None => Ok(None),
|
|
1076
|
+
Some(v) => {
|
|
1077
|
+
if v > u32::MAX as u64 {
|
|
1078
|
+
return Err(Box::new(ConfigError(format!(
|
|
1079
|
+
"value at {ctx}.{key} exceeds maximum allowed value"
|
|
1080
|
+
))));
|
|
1081
|
+
}
|
|
1082
|
+
Ok(Some(v as u32))
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1064
1087
|
fn parse_pii_config(value: &Yaml) -> FloeResult<PiiConfig> {
|
|
1065
1088
|
let hash = yaml_hash(value, "pii")?;
|
|
1066
1089
|
validate_known_keys(hash, "pii", &["columns"])?;
|
|
@@ -1111,7 +1134,14 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
|
1111
1134
|
validate_known_keys(
|
|
1112
1135
|
hash,
|
|
1113
1136
|
"lineage",
|
|
1114
|
-
&[
|
|
1137
|
+
&[
|
|
1138
|
+
"url",
|
|
1139
|
+
"api_key",
|
|
1140
|
+
"timeout_secs",
|
|
1141
|
+
"namespace",
|
|
1142
|
+
"producer",
|
|
1143
|
+
"max_failures",
|
|
1144
|
+
],
|
|
1115
1145
|
)?;
|
|
1116
1146
|
Ok(LineageConfig {
|
|
1117
1147
|
url: get_string(hash, "url", "lineage")?,
|
|
@@ -1119,5 +1149,6 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
|
1119
1149
|
timeout_secs: opt_u64(hash, "timeout_secs", "lineage")?,
|
|
1120
1150
|
namespace: get_string(hash, "namespace", "lineage")?,
|
|
1121
1151
|
producer: opt_string(hash, "producer", "lineage")?,
|
|
1152
|
+
max_failures: opt_u32(hash, "max_failures", "lineage")?,
|
|
1122
1153
|
})
|
|
1123
1154
|
}
|
|
@@ -3,7 +3,7 @@ use std::path::Path;
|
|
|
3
3
|
|
|
4
4
|
use polars::polars_utils::pl_str::PlSmallStr;
|
|
5
5
|
use polars::prelude::{
|
|
6
|
-
CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues,
|
|
6
|
+
CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, TimeUnit,
|
|
7
7
|
};
|
|
8
8
|
|
|
9
9
|
use crate::{ConfigError, FloeResult};
|
|
@@ -28,6 +28,7 @@ pub struct LineageConfig {
|
|
|
28
28
|
pub timeout_secs: Option<u64>,
|
|
29
29
|
pub namespace: String,
|
|
30
30
|
pub producer: Option<String>,
|
|
31
|
+
pub max_failures: Option<u32>,
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
#[derive(Debug)]
|
|
@@ -261,12 +262,6 @@ pub struct SinkConfig {
|
|
|
261
262
|
pub archive: Option<ArchiveTarget>,
|
|
262
263
|
}
|
|
263
264
|
|
|
264
|
-
impl SinkConfig {
|
|
265
|
-
pub fn resolved_write_mode(&self) -> WriteMode {
|
|
266
|
-
self.write_mode
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
265
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
271
266
|
pub enum WriteMode {
|
|
272
267
|
#[default]
|
|
@@ -445,9 +440,33 @@ pub struct ArchiveTarget {
|
|
|
445
440
|
pub storage: Option<String>,
|
|
446
441
|
}
|
|
447
442
|
|
|
443
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
444
|
+
pub enum PolicySeverity {
|
|
445
|
+
#[default]
|
|
446
|
+
Warn,
|
|
447
|
+
Reject,
|
|
448
|
+
Abort,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
impl PolicySeverity {
|
|
452
|
+
pub fn as_str(self) -> &'static str {
|
|
453
|
+
match self {
|
|
454
|
+
Self::Warn => "warn",
|
|
455
|
+
Self::Reject => "reject",
|
|
456
|
+
Self::Abort => "abort",
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
impl std::fmt::Display for PolicySeverity {
|
|
462
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
463
|
+
f.write_str(self.as_str())
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
448
467
|
#[derive(Debug)]
|
|
449
468
|
pub struct PolicyConfig {
|
|
450
|
-
pub severity:
|
|
469
|
+
pub severity: PolicySeverity,
|
|
451
470
|
}
|
|
452
471
|
|
|
453
472
|
#[derive(Debug)]
|
|
@@ -464,36 +483,6 @@ impl SchemaConfig {
|
|
|
464
483
|
pub fn resolved_schema_evolution(&self) -> SchemaEvolutionConfig {
|
|
465
484
|
self.schema_evolution.unwrap_or_default()
|
|
466
485
|
}
|
|
467
|
-
|
|
468
|
-
pub fn to_polars_schema(&self) -> FloeResult<Schema> {
|
|
469
|
-
let mut schema = Schema::with_capacity(self.columns.len());
|
|
470
|
-
for column in &self.columns {
|
|
471
|
-
let dtype = parse_data_type(&column.column_type)?;
|
|
472
|
-
if schema.insert(column.name.as_str().into(), dtype).is_some() {
|
|
473
|
-
return Err(Box::new(ConfigError(format!(
|
|
474
|
-
"duplicate column name in schema: {}",
|
|
475
|
-
column.name
|
|
476
|
-
))));
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
Ok(schema)
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
pub fn to_polars_string_schema(&self) -> FloeResult<Schema> {
|
|
483
|
-
let mut schema = Schema::with_capacity(self.columns.len());
|
|
484
|
-
for column in &self.columns {
|
|
485
|
-
if schema
|
|
486
|
-
.insert(column.name.as_str().into(), DataType::String)
|
|
487
|
-
.is_some()
|
|
488
|
-
{
|
|
489
|
-
return Err(Box::new(ConfigError(format!(
|
|
490
|
-
"duplicate column name in schema: {}",
|
|
491
|
-
column.name
|
|
492
|
-
))));
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
Ok(schema)
|
|
496
|
-
}
|
|
497
486
|
}
|
|
498
487
|
|
|
499
488
|
#[derive(Debug)]
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
2
|
|
|
3
3
|
use crate::config::{
|
|
4
|
-
CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode,
|
|
5
|
-
StorageDefinition,
|
|
4
|
+
CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, PolicySeverity,
|
|
5
|
+
RootConfig, SourceOptions, StorageDefinition,
|
|
6
6
|
};
|
|
7
7
|
use crate::io::format;
|
|
8
8
|
use crate::io::read::json_selector::parse_selector;
|
|
9
9
|
use crate::io::read::xml_selector;
|
|
10
|
+
use crate::io::write::sink_format::sink_format;
|
|
10
11
|
use crate::{warnings, ConfigError, FloeResult};
|
|
11
12
|
|
|
12
13
|
const ALLOWED_COLUMN_TYPES: &[&str] = &["string", "number", "boolean", "datetime", "date", "time"];
|
|
13
14
|
const ALLOWED_CAST_MODES: &[&str] = &["strict", "coerce"];
|
|
14
15
|
const ALLOWED_NORMALIZE_STRATEGIES: &[&str] = &["snake_case", "lower", "camel_case", "none"];
|
|
15
|
-
const ALLOWED_POLICY_SEVERITIES: &[&str] = &["warn", "reject", "abort"];
|
|
16
16
|
const ALLOWED_MISSING_POLICIES: &[&str] = &["reject_file", "fill_nulls"];
|
|
17
17
|
const ALLOWED_EXTRA_POLICIES: &[&str] = &["reject_file", "ignore"];
|
|
18
18
|
const ALLOWED_STORAGE_TYPES: &[&str] = &["local", "s3", "adls", "gcs"];
|
|
@@ -103,6 +103,11 @@ fn validate_lineage(lineage: &crate::config::LineageConfig) -> FloeResult<()> {
|
|
|
103
103
|
"lineage.namespace must not be empty".to_string(),
|
|
104
104
|
)));
|
|
105
105
|
}
|
|
106
|
+
if lineage.max_failures == Some(0) {
|
|
107
|
+
return Err(Box::new(ConfigError(
|
|
108
|
+
"lineage.max_failures must be at least 1".to_string(),
|
|
109
|
+
)));
|
|
110
|
+
}
|
|
106
111
|
Ok(())
|
|
107
112
|
}
|
|
108
113
|
|
|
@@ -134,7 +139,6 @@ fn validate_entity(
|
|
|
134
139
|
) -> FloeResult<()> {
|
|
135
140
|
validate_source(entity, storages)?;
|
|
136
141
|
validate_state(entity)?;
|
|
137
|
-
validate_policy(entity)?;
|
|
138
142
|
validate_sink(entity, storages, catalogs)?;
|
|
139
143
|
validate_schema(entity, config_version)?;
|
|
140
144
|
if let Some(pii) = &entity.pii {
|
|
@@ -147,7 +151,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
|
|
|
147
151
|
use crate::config::PiiStrategy;
|
|
148
152
|
// Abort severity writes the raw input file to the rejected sink without
|
|
149
153
|
// loading a DataFrame, bypassing masking entirely.
|
|
150
|
-
if entity.policy.severity ==
|
|
154
|
+
if entity.policy.severity == PolicySeverity::Abort {
|
|
151
155
|
return Err(Box::new(ConfigError(format!(
|
|
152
156
|
"entity.name={} pii: masking is not applied when policy.severity=abort \
|
|
153
157
|
because the raw file is written to sink.rejected without DataFrame processing",
|
|
@@ -197,7 +201,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
|
|
|
197
201
|
.flatten()
|
|
198
202
|
.map(|s| s.as_str())
|
|
199
203
|
.collect();
|
|
200
|
-
let write_mode = entity.sink.
|
|
204
|
+
let write_mode = entity.sink.write_mode;
|
|
201
205
|
let is_merge_mode = matches!(
|
|
202
206
|
write_mode,
|
|
203
207
|
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
@@ -515,7 +519,7 @@ fn validate_sink(
|
|
|
515
519
|
entity.sink.accepted.options.as_ref(),
|
|
516
520
|
)?;
|
|
517
521
|
|
|
518
|
-
if entity.policy.severity ==
|
|
522
|
+
if entity.policy.severity == PolicySeverity::Reject && entity.sink.rejected.is_none() {
|
|
519
523
|
return Err(Box::new(ConfigError(format!(
|
|
520
524
|
"entity.name={} sink.rejected is required when policy.severity=reject",
|
|
521
525
|
entity.name
|
|
@@ -534,28 +538,14 @@ fn validate_sink(
|
|
|
534
538
|
entity.sink.accepted.storage.as_deref(),
|
|
535
539
|
)?;
|
|
536
540
|
storages.validate_reference(entity, "sink.accepted.storage", &accepted_storage)?;
|
|
537
|
-
if
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
"entity.name={} sink.accepted.format=delta is only supported on local, s3, adls, or gcs storage (got {})",
|
|
546
|
-
entity.name, storage_type
|
|
547
|
-
))));
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
if entity.sink.accepted.format == "iceberg" {
|
|
552
|
-
if let Some(storage_type) = storages.definition_type(&accepted_storage) {
|
|
553
|
-
if storage_type != "local" && storage_type != "s3" && storage_type != "gcs" {
|
|
554
|
-
return Err(Box::new(ConfigError(format!(
|
|
555
|
-
"entity.name={} sink.accepted.format=iceberg is only supported on local, s3, or gcs storage for now (got {})",
|
|
556
|
-
entity.name, storage_type
|
|
557
|
-
))));
|
|
558
|
-
}
|
|
541
|
+
if let Some(storage_type) = storages.definition_type(&accepted_storage) {
|
|
542
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
543
|
+
if !fmt.supported_storages().contains(&storage_type) {
|
|
544
|
+
let supported = fmt.supported_storages().join(", ");
|
|
545
|
+
return Err(Box::new(ConfigError(format!(
|
|
546
|
+
"entity.name={} sink.accepted.format={} is not supported on {} storage (supported: {})",
|
|
547
|
+
entity.name, entity.sink.accepted.format, storage_type, supported
|
|
548
|
+
))));
|
|
559
549
|
}
|
|
560
550
|
}
|
|
561
551
|
validate_iceberg_catalog_binding(entity, storages, catalogs, &accepted_storage)?;
|
|
@@ -589,20 +579,23 @@ fn validate_sink(
|
|
|
589
579
|
}
|
|
590
580
|
|
|
591
581
|
fn validate_sink_write_mode(entity: &EntityConfig) -> FloeResult<()> {
|
|
592
|
-
let write_mode = entity.sink.
|
|
582
|
+
let write_mode = entity.sink.write_mode;
|
|
583
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
584
|
+
if !fmt.supported_modes().contains(&write_mode) {
|
|
585
|
+
return Err(Box::new(ConfigError(format!(
|
|
586
|
+
"entity.name={} sink.write_mode={} is not supported by sink.accepted.format={}",
|
|
587
|
+
entity.name,
|
|
588
|
+
write_mode.as_str(),
|
|
589
|
+
entity.sink.accepted.format
|
|
590
|
+
))));
|
|
591
|
+
}
|
|
592
|
+
|
|
593
593
|
let is_merge_mode = matches!(
|
|
594
594
|
write_mode,
|
|
595
595
|
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
596
596
|
);
|
|
597
597
|
if is_merge_mode {
|
|
598
598
|
let mode_name = write_mode.as_str();
|
|
599
|
-
if entity.sink.accepted.format != "delta" {
|
|
600
|
-
return Err(Box::new(ConfigError(format!(
|
|
601
|
-
"entity.name={} sink.write_mode={} requires sink.accepted.format=delta",
|
|
602
|
-
entity.name, mode_name
|
|
603
|
-
))));
|
|
604
|
-
}
|
|
605
|
-
|
|
606
599
|
let primary_key = entity.schema.primary_key.as_ref().ok_or_else(|| {
|
|
607
600
|
Box::new(ConfigError(format!(
|
|
608
601
|
"entity.name={} sink.write_mode={} requires schema.primary_key",
|
|
@@ -628,9 +621,16 @@ fn validate_merge_options(
|
|
|
628
621
|
return Ok(());
|
|
629
622
|
};
|
|
630
623
|
|
|
631
|
-
|
|
624
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
625
|
+
let supports_merge = fmt.supported_modes().iter().any(|m| {
|
|
626
|
+
matches!(
|
|
627
|
+
m,
|
|
628
|
+
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
629
|
+
)
|
|
630
|
+
});
|
|
631
|
+
if !supports_merge {
|
|
632
632
|
return Err(Box::new(ConfigError(format!(
|
|
633
|
-
"entity.name={} sink.accepted.merge is only supported when sink.accepted.format
|
|
633
|
+
"entity.name={} sink.accepted.merge is only supported when sink.accepted.format supports merge (e.g. delta)",
|
|
634
634
|
entity.name
|
|
635
635
|
))));
|
|
636
636
|
}
|
|
@@ -1063,18 +1063,6 @@ fn validate_sink_partitioning(entity: &EntityConfig) -> FloeResult<()> {
|
|
|
1063
1063
|
Ok(())
|
|
1064
1064
|
}
|
|
1065
1065
|
|
|
1066
|
-
fn validate_policy(entity: &EntityConfig) -> FloeResult<()> {
|
|
1067
|
-
if !ALLOWED_POLICY_SEVERITIES.contains(&entity.policy.severity.as_str()) {
|
|
1068
|
-
return Err(Box::new(ConfigError(format!(
|
|
1069
|
-
"entity.name={} policy.severity={} is unsupported (allowed: {})",
|
|
1070
|
-
entity.name,
|
|
1071
|
-
entity.policy.severity,
|
|
1072
|
-
ALLOWED_POLICY_SEVERITIES.join(", ")
|
|
1073
|
-
))));
|
|
1074
|
-
}
|
|
1075
|
-
Ok(())
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
1066
|
fn validate_schema(entity: &EntityConfig, config_version: ConfigVersion) -> FloeResult<()> {
|
|
1079
1067
|
if entity.source.format == "json" && entity.schema.columns.len() > MAX_JSON_COLUMNS {
|
|
1080
1068
|
return Err(Box::new(ConfigError(format!(
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
use crate::log::emit_log;
|
|
2
|
+
|
|
3
|
+
#[derive(Debug, thiserror::Error)]
|
|
4
|
+
#[error("{0}")]
|
|
5
|
+
pub struct ConfigError(pub String);
|
|
6
|
+
|
|
7
|
+
#[derive(Debug, thiserror::Error)]
|
|
8
|
+
#[error("{0}")]
|
|
9
|
+
pub struct RunError(pub String);
|
|
10
|
+
|
|
11
|
+
#[derive(Debug, thiserror::Error)]
|
|
12
|
+
#[error("{0}")]
|
|
13
|
+
pub struct StorageError(pub String);
|
|
14
|
+
|
|
15
|
+
#[derive(Debug, thiserror::Error)]
|
|
16
|
+
#[error("{0}")]
|
|
17
|
+
pub struct IoError(pub String);
|
|
18
|
+
|
|
19
|
+
pub fn emit(
|
|
20
|
+
run_id: &str,
|
|
21
|
+
entity: Option<&str>,
|
|
22
|
+
input: Option<&str>,
|
|
23
|
+
code: Option<&str>,
|
|
24
|
+
message: &str,
|
|
25
|
+
) {
|
|
26
|
+
emit_log("error", run_id, entity, input, code, message);
|
|
27
|
+
}
|
|
@@ -46,7 +46,7 @@ pub enum ReadInput {
|
|
|
46
46
|
},
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
#[derive(Debug, Clone)]
|
|
49
|
+
#[derive(Debug, Clone, Default)]
|
|
50
50
|
pub struct AcceptedWriteMetrics {
|
|
51
51
|
pub total_bytes_written: Option<u64>,
|
|
52
52
|
pub avg_file_size_mb: Option<f64>,
|
|
@@ -85,6 +85,26 @@ pub struct AcceptedMergeMetrics {
|
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
#[derive(Debug, Clone)]
|
|
88
|
+
pub enum CatalogRegistration {
|
|
89
|
+
UnityDelta {
|
|
90
|
+
catalog_name: String,
|
|
91
|
+
schema: String,
|
|
92
|
+
table: String,
|
|
93
|
+
},
|
|
94
|
+
IcebergGlue {
|
|
95
|
+
catalog_name: String,
|
|
96
|
+
database: Option<String>,
|
|
97
|
+
namespace: String,
|
|
98
|
+
table: String,
|
|
99
|
+
},
|
|
100
|
+
IcebergRest {
|
|
101
|
+
catalog_name: String,
|
|
102
|
+
namespace: String,
|
|
103
|
+
table: String,
|
|
104
|
+
},
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
#[derive(Debug, Clone, Default)]
|
|
88
108
|
pub struct AcceptedWriteOutput {
|
|
89
109
|
pub files_written: Option<u64>,
|
|
90
110
|
pub parts_written: u64,
|
|
@@ -92,13 +112,7 @@ pub struct AcceptedWriteOutput {
|
|
|
92
112
|
pub table_version: Option<i64>,
|
|
93
113
|
pub snapshot_id: Option<i64>,
|
|
94
114
|
pub table_root_uri: Option<String>,
|
|
95
|
-
pub
|
|
96
|
-
pub iceberg_database: Option<String>,
|
|
97
|
-
pub iceberg_namespace: Option<String>,
|
|
98
|
-
pub iceberg_table: Option<String>,
|
|
99
|
-
pub delta_catalog_name: Option<String>,
|
|
100
|
-
pub delta_catalog_schema: Option<String>,
|
|
101
|
-
pub delta_catalog_table: Option<String>,
|
|
115
|
+
pub catalog: Option<CatalogRegistration>,
|
|
102
116
|
pub metrics: AcceptedWriteMetrics,
|
|
103
117
|
pub merge: Option<AcceptedMergeMetrics>,
|
|
104
118
|
pub schema_evolution: AcceptedSchemaEvolution,
|
|
@@ -163,20 +177,16 @@ pub trait InputAdapter: Send + Sync {
|
|
|
163
177
|
}
|
|
164
178
|
}
|
|
165
179
|
|
|
166
|
-
pub
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
resolver: &config::StorageResolver,
|
|
177
|
-
catalogs: &config::CatalogResolver,
|
|
178
|
-
entity: &config::EntityConfig,
|
|
179
|
-
) -> FloeResult<AcceptedWriteOutput>;
|
|
180
|
+
pub struct AcceptedWriteRequest<'a> {
|
|
181
|
+
pub target: &'a Target,
|
|
182
|
+
pub df: &'a mut DataFrame,
|
|
183
|
+
pub mode: config::WriteMode,
|
|
184
|
+
pub output_stem: &'a str,
|
|
185
|
+
pub temp_dir: Option<&'a Path>,
|
|
186
|
+
pub cloud: &'a mut io::storage::CloudClient,
|
|
187
|
+
pub resolver: &'a config::StorageResolver,
|
|
188
|
+
pub catalogs: &'a config::CatalogResolver,
|
|
189
|
+
pub entity: &'a config::EntityConfig,
|
|
180
190
|
}
|
|
181
191
|
|
|
182
192
|
pub struct RejectedWriteRequest<'a> {
|
|
@@ -247,7 +257,7 @@ pub fn ensure_input_format(entity_name: &str, format: &str) -> FloeResult<()> {
|
|
|
247
257
|
}
|
|
248
258
|
|
|
249
259
|
pub fn ensure_accepted_sink_format(entity_name: &str, format: &str) -> FloeResult<()> {
|
|
250
|
-
if
|
|
260
|
+
if crate::io::write::sink_format::sink_format(format).is_err() {
|
|
251
261
|
return Err(Box::new(unsupported_format_error(
|
|
252
262
|
FormatKind::SinkAccepted,
|
|
253
263
|
format,
|
|
@@ -371,19 +381,6 @@ pub fn input_adapter(format: &str) -> FloeResult<&'static dyn InputAdapter> {
|
|
|
371
381
|
}
|
|
372
382
|
}
|
|
373
383
|
|
|
374
|
-
pub fn accepted_sink_adapter(format: &str) -> FloeResult<&'static dyn AcceptedSinkAdapter> {
|
|
375
|
-
match format {
|
|
376
|
-
"parquet" => Ok(io::write::parquet::parquet_accepted_adapter()),
|
|
377
|
-
"delta" => Ok(io::write::delta::delta_accepted_adapter()),
|
|
378
|
-
"iceberg" => Ok(io::write::iceberg::iceberg_accepted_adapter()),
|
|
379
|
-
_ => Err(Box::new(unsupported_format_error(
|
|
380
|
-
FormatKind::SinkAccepted,
|
|
381
|
-
format,
|
|
382
|
-
None,
|
|
383
|
-
))),
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
|
|
387
384
|
pub fn rejected_sink_adapter(format: &str) -> FloeResult<&'static dyn RejectedSinkAdapter> {
|
|
388
385
|
match format {
|
|
389
386
|
"csv" => Ok(io::write::csv::csv_rejected_adapter()),
|