floe-python 0.3.10__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floe_python-0.3.10 → floe_python-0.4.1}/Cargo.lock +56 -3
- {floe_python-0.3.10 → floe_python-0.4.1}/PKG-INFO +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/Cargo.toml +4 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/mismatch.rs +4 -3
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/mod.rs +4 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/parse.rs +39 -8
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/storage.rs +2 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/types.rs +27 -38
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/validate.rs +46 -51
- floe_python-0.4.1/crates/floe-core/src/errors.rs +27 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/format.rs +33 -36
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/mod.rs +42 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/inputs.rs +5 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/adls.rs +121 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/gcs.rs +123 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/local.rs +110 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/s3.rs +113 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/unique_seed/mod.rs +15 -41
- floe_python-0.4.1/crates/floe-core/src/io/write/accepted.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/unity.rs +16 -4
- floe_python-0.4.1/crates/floe-core/src/io/write/delta/unity_tests.rs +209 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta.rs +73 -30
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/rest.rs +6 -4
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg.rs +314 -44
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/mod.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/parquet.rs +93 -30
- floe_python-0.4.1/crates/floe-core/src/io/write/sink_format.rs +66 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +2 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +15 -15
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/shared.rs +55 -112
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/mod.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/lib.rs +26 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/lineage/mod.rs +85 -20
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/builder.rs +24 -7
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/model.rs +8 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/parse.rs +80 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/types.rs +24 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/validate.rs +126 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/entity.rs +35 -14
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/context.rs +14 -0
- floe_python-0.4.1/crates/floe-core/src/run/entity/accepted_write.rs +156 -0
- floe_python-0.4.1/crates/floe-core/src/run/entity/incremental.rs +281 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/mod.rs +53 -52
- floe_python-0.4.1/crates/floe-core/src/run/entity/pii.rs +446 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/precheck.rs +3 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/validate_split.rs +12 -16
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/mod.rs +48 -37
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/output.rs +13 -16
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runtime.rs +5 -4
- floe_python-0.4.1/crates/floe-core/src/state/mod.rs +730 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/delta_run.rs +2 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/add_entity.rs +2 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/adls_validation.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/catalogs.rs +2 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/config_validation.rs +36 -4
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/lineage_validation.rs +57 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/parse.rs +4 -4
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/inputs.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/local.rs +48 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/delta_write.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/object_store.rs +8 -8
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +1 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/manifest/mod.rs +22 -3
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/parse.rs +28 -4
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/validate.rs +36 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/incremental.rs +39 -0
- floe_python-0.4.1/crates/floe-core/tests/unit/run/lineage.rs +210 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/mod.rs +1 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/state/mod.rs +136 -77
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/Cargo.toml +2 -2
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/functions.rs +5 -1
- {floe_python-0.3.10 → floe_python-0.4.1}/pyproject.toml +1 -1
- floe_python-0.3.10/crates/floe-core/src/errors.rs +0 -57
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/delta.rs +0 -47
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/iceberg.rs +0 -299
- floe_python-0.3.10/crates/floe-core/src/io/unique_seed/parquet.rs +0 -80
- floe_python-0.3.10/crates/floe-core/src/io/write/accepted.rs +0 -56
- floe_python-0.3.10/crates/floe-core/src/run/entity/accepted_write.rs +0 -242
- floe_python-0.3.10/crates/floe-core/src/run/entity/incremental.rs +0 -177
- floe_python-0.3.10/crates/floe-core/src/run/entity/pii.rs +0 -182
- floe_python-0.3.10/crates/floe-core/src/state/mod.rs +0 -356
- {floe_python-0.3.10 → floe_python-0.4.1}/Cargo.toml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/add_entity.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/cast.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/normalize.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/not_null.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/unique.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/catalog.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/location.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/template.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/yaml_decode.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/avro.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/csv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/json.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/json_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/orc.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/parquet.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xlsx.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xml.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/object_store.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/target.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/csv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/options.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/parts.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/log.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/build.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/process.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/events.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/file.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/perf.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runner/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runner/outcome.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/vars/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/vars/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/warnings.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/archive_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/dry_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/local_run.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/common.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/templating.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/format.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/storage.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/checks.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/pii.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/report.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/.gitignore +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/README.md +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/lib.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/observer.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/config.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/errors.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/mod.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/outcome.rs +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/config.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/profile.yml +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/test_floe.py +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/__init__.py +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/_floe.pyi +0 -0
- {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/py.typed +0 -0
|
@@ -476,6 +476,16 @@ version = "0.3.2"
|
|
|
476
476
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
477
477
|
checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063"
|
|
478
478
|
|
|
479
|
+
[[package]]
|
|
480
|
+
name = "assert-json-diff"
|
|
481
|
+
version = "2.0.2"
|
|
482
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
483
|
+
checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
|
|
484
|
+
dependencies = [
|
|
485
|
+
"serde",
|
|
486
|
+
"serde_json",
|
|
487
|
+
]
|
|
488
|
+
|
|
479
489
|
[[package]]
|
|
480
490
|
name = "assert_cmd"
|
|
481
491
|
version = "2.1.2"
|
|
@@ -1708,6 +1718,15 @@ version = "1.0.4"
|
|
|
1708
1718
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1709
1719
|
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
|
1710
1720
|
|
|
1721
|
+
[[package]]
|
|
1722
|
+
name = "colored"
|
|
1723
|
+
version = "3.1.1"
|
|
1724
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1725
|
+
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
|
|
1726
|
+
dependencies = [
|
|
1727
|
+
"windows-sys 0.61.2",
|
|
1728
|
+
]
|
|
1729
|
+
|
|
1711
1730
|
[[package]]
|
|
1712
1731
|
name = "comfy-table"
|
|
1713
1732
|
version = "7.2.2"
|
|
@@ -3380,7 +3399,7 @@ dependencies = [
|
|
|
3380
3399
|
|
|
3381
3400
|
[[package]]
|
|
3382
3401
|
name = "floe-cli"
|
|
3383
|
-
version = "0.
|
|
3402
|
+
version = "0.4.1"
|
|
3384
3403
|
dependencies = [
|
|
3385
3404
|
"assert_cmd",
|
|
3386
3405
|
"clap",
|
|
@@ -3393,7 +3412,7 @@ dependencies = [
|
|
|
3393
3412
|
|
|
3394
3413
|
[[package]]
|
|
3395
3414
|
name = "floe-core"
|
|
3396
|
-
version = "0.
|
|
3415
|
+
version = "0.4.1"
|
|
3397
3416
|
dependencies = [
|
|
3398
3417
|
"apache-avro 0.16.0",
|
|
3399
3418
|
"arrow",
|
|
@@ -3414,6 +3433,7 @@ dependencies = [
|
|
|
3414
3433
|
"iceberg",
|
|
3415
3434
|
"iceberg-catalog-rest",
|
|
3416
3435
|
"iceberg-storage-opendal",
|
|
3436
|
+
"mockito",
|
|
3417
3437
|
"orc-rust",
|
|
3418
3438
|
"polars",
|
|
3419
3439
|
"rayon",
|
|
@@ -3425,6 +3445,7 @@ dependencies = [
|
|
|
3425
3445
|
"serde_yaml",
|
|
3426
3446
|
"sha2",
|
|
3427
3447
|
"tempfile",
|
|
3448
|
+
"thiserror 1.0.69",
|
|
3428
3449
|
"time",
|
|
3429
3450
|
"tokio",
|
|
3430
3451
|
"url",
|
|
@@ -3434,7 +3455,7 @@ dependencies = [
|
|
|
3434
3455
|
|
|
3435
3456
|
[[package]]
|
|
3436
3457
|
name = "floe-python"
|
|
3437
|
-
version = "0.
|
|
3458
|
+
version = "0.4.1"
|
|
3438
3459
|
dependencies = [
|
|
3439
3460
|
"floe-core",
|
|
3440
3461
|
"pyo3",
|
|
@@ -4046,6 +4067,7 @@ dependencies = [
|
|
|
4046
4067
|
"http 1.4.0",
|
|
4047
4068
|
"http-body 1.0.1",
|
|
4048
4069
|
"httparse",
|
|
4070
|
+
"httpdate",
|
|
4049
4071
|
"itoa",
|
|
4050
4072
|
"pin-project-lite",
|
|
4051
4073
|
"pin-utils",
|
|
@@ -4861,6 +4883,31 @@ dependencies = [
|
|
|
4861
4883
|
"windows-sys 0.61.2",
|
|
4862
4884
|
]
|
|
4863
4885
|
|
|
4886
|
+
[[package]]
|
|
4887
|
+
name = "mockito"
|
|
4888
|
+
version = "1.7.2"
|
|
4889
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4890
|
+
checksum = "90820618712cab19cfc46b274c6c22546a82affcb3c3bdf0f29e3db8e1bb92c0"
|
|
4891
|
+
dependencies = [
|
|
4892
|
+
"assert-json-diff",
|
|
4893
|
+
"bytes",
|
|
4894
|
+
"colored",
|
|
4895
|
+
"futures-core",
|
|
4896
|
+
"http 1.4.0",
|
|
4897
|
+
"http-body 1.0.1",
|
|
4898
|
+
"http-body-util",
|
|
4899
|
+
"hyper 1.8.1",
|
|
4900
|
+
"hyper-util",
|
|
4901
|
+
"log",
|
|
4902
|
+
"pin-project-lite",
|
|
4903
|
+
"rand 0.9.4",
|
|
4904
|
+
"regex",
|
|
4905
|
+
"serde_json",
|
|
4906
|
+
"serde_urlencoded",
|
|
4907
|
+
"similar",
|
|
4908
|
+
"tokio",
|
|
4909
|
+
]
|
|
4910
|
+
|
|
4864
4911
|
[[package]]
|
|
4865
4912
|
name = "moka"
|
|
4866
4913
|
version = "0.12.13"
|
|
@@ -7420,6 +7467,12 @@ version = "0.1.5"
|
|
|
7420
7467
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7421
7468
|
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
|
|
7422
7469
|
|
|
7470
|
+
[[package]]
|
|
7471
|
+
name = "similar"
|
|
7472
|
+
version = "2.7.0"
|
|
7473
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7474
|
+
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
|
|
7475
|
+
|
|
7423
7476
|
[[package]]
|
|
7424
7477
|
name = "simple_asn1"
|
|
7425
7478
|
version = "0.6.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "floe-core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.1"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
description = "Core library for Floe, a YAML-driven technical ingestion tool."
|
|
6
6
|
license = "MIT"
|
|
@@ -41,16 +41,18 @@ uuid = "1"
|
|
|
41
41
|
arrow = "57"
|
|
42
42
|
iceberg = "0.9.0"
|
|
43
43
|
iceberg-catalog-rest = "0.9.0"
|
|
44
|
-
iceberg-storage-opendal = { version = "0.9.1", features = ["opendal-gcs"] }
|
|
44
|
+
iceberg-storage-opendal = { version = "0.9.1", features = ["opendal-s3", "opendal-gcs"] }
|
|
45
45
|
df-interchange = { version = "0.3.2", features = ["arrow_57", "polars_0_52"] }
|
|
46
46
|
orc-rust = "0.7.1"
|
|
47
47
|
|
|
48
48
|
reqwest = { version = "0.12", default-features = true, features = ["native-tls-vendored", "json", "blocking"] }
|
|
49
49
|
sha2 = "0.10"
|
|
50
50
|
hex = "0.4"
|
|
51
|
+
thiserror = "1"
|
|
51
52
|
|
|
52
53
|
[dev-dependencies]
|
|
53
54
|
rust_xlsxwriter = "0.67"
|
|
55
|
+
mockito = "1"
|
|
54
56
|
|
|
55
57
|
[features]
|
|
56
58
|
vendored-openssl = []
|
|
@@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|
|
2
2
|
|
|
3
3
|
use polars::prelude::{DataFrame, DataType, Series};
|
|
4
4
|
|
|
5
|
+
use crate::config::PolicySeverity;
|
|
5
6
|
use crate::errors::RunError;
|
|
6
7
|
use crate::{config, report, ConfigError, FloeResult};
|
|
7
8
|
|
|
@@ -124,7 +125,7 @@ pub fn plan_schema_mismatch(
|
|
|
124
125
|
let mut warning = None;
|
|
125
126
|
let rejection_requested = (effective_missing == "reject_file" && !missing.is_empty())
|
|
126
127
|
|| (effective_extra == "reject_file" && !extra.is_empty());
|
|
127
|
-
if rejection_requested && entity.policy.severity ==
|
|
128
|
+
if rejection_requested && entity.policy.severity == PolicySeverity::Warn {
|
|
128
129
|
warning = Some(format!(
|
|
129
130
|
"entity.name={} schema mismatch requested reject_file but policy.severity=warn; continuing",
|
|
130
131
|
entity.name
|
|
@@ -139,10 +140,10 @@ pub fn plan_schema_mismatch(
|
|
|
139
140
|
if (effective_missing == "reject_file" && !missing.is_empty())
|
|
140
141
|
|| (effective_extra == "reject_file" && !extra.is_empty())
|
|
141
142
|
{
|
|
142
|
-
if entity.policy.severity ==
|
|
143
|
+
if entity.policy.severity == PolicySeverity::Abort {
|
|
143
144
|
aborted = true;
|
|
144
145
|
action = report::MismatchAction::Aborted;
|
|
145
|
-
} else if entity.policy.severity ==
|
|
146
|
+
} else if entity.policy.severity == PolicySeverity::Reject {
|
|
146
147
|
rejected = true;
|
|
147
148
|
action = report::MismatchAction::RejectedFile;
|
|
148
149
|
}
|
|
@@ -13,6 +13,9 @@ pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver}
|
|
|
13
13
|
pub use types::*;
|
|
14
14
|
|
|
15
15
|
pub use parse::extract_raw_env_vars;
|
|
16
|
-
pub(crate) use parse::{
|
|
16
|
+
pub(crate) use parse::{
|
|
17
|
+
parse_catalogs_with_context, parse_config, parse_config_with_vars, parse_lineage_config,
|
|
18
|
+
parse_storages,
|
|
19
|
+
};
|
|
17
20
|
pub(crate) use template::apply_templates_with_vars;
|
|
18
21
|
pub(crate) use validate::{extract_first_n, extract_last_n, validate_config};
|
|
@@ -15,8 +15,8 @@ use crate::config::{
|
|
|
15
15
|
DomainConfig, EntityConfig, EntityMetadata, EntityStateConfig, EnvConfig,
|
|
16
16
|
IcebergPartitionFieldConfig, IcebergSinkTargetConfig, IncrementalMode, LineageConfig,
|
|
17
17
|
MergeOptionsConfig, MergeScd2OptionsConfig, NormalizeColumnsConfig, PiiColumnConfig, PiiConfig,
|
|
18
|
-
PiiStrategy, PolicyConfig, ProjectMetadata, ReportConfig, RootConfig,
|
|
19
|
-
SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
|
|
18
|
+
PiiStrategy, PolicyConfig, PolicySeverity, ProjectMetadata, ReportConfig, RootConfig,
|
|
19
|
+
SchemaConfig, SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
|
|
20
20
|
SchemaMismatchConfig, SinkConfig, SinkOptions, SinkTarget, SourceConfig, SourceOptions,
|
|
21
21
|
StorageDefinition, StoragesConfig, WriteMode,
|
|
22
22
|
};
|
|
@@ -658,7 +658,7 @@ fn parse_sink_delta_options(value: &Yaml, ctx: &str) -> FloeResult<DeltaSinkTarg
|
|
|
658
658
|
})
|
|
659
659
|
}
|
|
660
660
|
|
|
661
|
-
fn parse_storages(value: &Yaml) -> FloeResult<StoragesConfig> {
|
|
661
|
+
pub(crate) fn parse_storages(value: &Yaml) -> FloeResult<StoragesConfig> {
|
|
662
662
|
let hash = yaml_hash(value, "storages")?;
|
|
663
663
|
validate_known_keys(hash, "storages", &["default", "definitions"])?;
|
|
664
664
|
let definitions_yaml = match hash_get(hash, "definitions") {
|
|
@@ -830,9 +830,18 @@ fn parse_archive_target(value: &Yaml) -> FloeResult<ArchiveTarget> {
|
|
|
830
830
|
fn parse_policy(value: &Yaml) -> FloeResult<PolicyConfig> {
|
|
831
831
|
let hash = yaml_hash(value, "policy")?;
|
|
832
832
|
validate_known_keys(hash, "policy", &["severity"])?;
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
833
|
+
let severity_str = get_string(hash, "severity", "policy")?;
|
|
834
|
+
let severity = match severity_str.as_str() {
|
|
835
|
+
"warn" => PolicySeverity::Warn,
|
|
836
|
+
"reject" => PolicySeverity::Reject,
|
|
837
|
+
"abort" => PolicySeverity::Abort,
|
|
838
|
+
other => {
|
|
839
|
+
return Err(Box::new(ConfigError(format!(
|
|
840
|
+
"policy.severity={other} is unsupported (allowed: warn, reject, abort)"
|
|
841
|
+
))))
|
|
842
|
+
}
|
|
843
|
+
};
|
|
844
|
+
Ok(PolicyConfig { severity })
|
|
836
845
|
}
|
|
837
846
|
|
|
838
847
|
fn parse_schema(value: &Yaml) -> FloeResult<SchemaConfig> {
|
|
@@ -1061,6 +1070,20 @@ fn opt_u64(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u64>> {
|
|
|
1061
1070
|
}
|
|
1062
1071
|
}
|
|
1063
1072
|
|
|
1073
|
+
fn opt_u32(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u32>> {
|
|
1074
|
+
match opt_u64(hash, key, ctx)? {
|
|
1075
|
+
None => Ok(None),
|
|
1076
|
+
Some(v) => {
|
|
1077
|
+
if v > u32::MAX as u64 {
|
|
1078
|
+
return Err(Box::new(ConfigError(format!(
|
|
1079
|
+
"value at {ctx}.{key} exceeds maximum allowed value"
|
|
1080
|
+
))));
|
|
1081
|
+
}
|
|
1082
|
+
Ok(Some(v as u32))
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1064
1087
|
fn parse_pii_config(value: &Yaml) -> FloeResult<PiiConfig> {
|
|
1065
1088
|
let hash = yaml_hash(value, "pii")?;
|
|
1066
1089
|
validate_known_keys(hash, "pii", &["columns"])?;
|
|
@@ -1106,12 +1129,19 @@ fn parse_pii_column(value: &Yaml) -> FloeResult<PiiColumnConfig> {
|
|
|
1106
1129
|
})
|
|
1107
1130
|
}
|
|
1108
1131
|
|
|
1109
|
-
fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
1132
|
+
pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
1110
1133
|
let hash = yaml_hash(value, "lineage")?;
|
|
1111
1134
|
validate_known_keys(
|
|
1112
1135
|
hash,
|
|
1113
1136
|
"lineage",
|
|
1114
|
-
&[
|
|
1137
|
+
&[
|
|
1138
|
+
"url",
|
|
1139
|
+
"api_key",
|
|
1140
|
+
"timeout_secs",
|
|
1141
|
+
"namespace",
|
|
1142
|
+
"producer",
|
|
1143
|
+
"max_failures",
|
|
1144
|
+
],
|
|
1115
1145
|
)?;
|
|
1116
1146
|
Ok(LineageConfig {
|
|
1117
1147
|
url: get_string(hash, "url", "lineage")?,
|
|
@@ -1119,5 +1149,6 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
|
1119
1149
|
timeout_secs: opt_u64(hash, "timeout_secs", "lineage")?,
|
|
1120
1150
|
namespace: get_string(hash, "namespace", "lineage")?,
|
|
1121
1151
|
producer: opt_string(hash, "producer", "lineage")?,
|
|
1152
|
+
max_failures: opt_u32(hash, "max_failures", "lineage")?,
|
|
1122
1153
|
})
|
|
1123
1154
|
}
|
|
@@ -126,6 +126,7 @@ pub struct ResolvedPath {
|
|
|
126
126
|
pub local_path: Option<PathBuf>,
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
#[derive(Clone)]
|
|
129
130
|
pub struct StorageResolver {
|
|
130
131
|
config_base: ConfigBase,
|
|
131
132
|
default_name: String,
|
|
@@ -614,7 +615,7 @@ fn parent_prefix(key: &str) -> String {
|
|
|
614
615
|
}
|
|
615
616
|
}
|
|
616
617
|
|
|
617
|
-
fn is_remote_uri(value: &str) -> bool {
|
|
618
|
+
pub(crate) fn is_remote_uri(value: &str) -> bool {
|
|
618
619
|
value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
|
|
619
620
|
}
|
|
620
621
|
|
|
@@ -3,7 +3,7 @@ use std::path::Path;
|
|
|
3
3
|
|
|
4
4
|
use polars::polars_utils::pl_str::PlSmallStr;
|
|
5
5
|
use polars::prelude::{
|
|
6
|
-
CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues,
|
|
6
|
+
CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, TimeUnit,
|
|
7
7
|
};
|
|
8
8
|
|
|
9
9
|
use crate::{ConfigError, FloeResult};
|
|
@@ -28,6 +28,7 @@ pub struct LineageConfig {
|
|
|
28
28
|
pub timeout_secs: Option<u64>,
|
|
29
29
|
pub namespace: String,
|
|
30
30
|
pub producer: Option<String>,
|
|
31
|
+
pub max_failures: Option<u32>,
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
#[derive(Debug)]
|
|
@@ -261,12 +262,6 @@ pub struct SinkConfig {
|
|
|
261
262
|
pub archive: Option<ArchiveTarget>,
|
|
262
263
|
}
|
|
263
264
|
|
|
264
|
-
impl SinkConfig {
|
|
265
|
-
pub fn resolved_write_mode(&self) -> WriteMode {
|
|
266
|
-
self.write_mode
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
265
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
271
266
|
pub enum WriteMode {
|
|
272
267
|
#[default]
|
|
@@ -445,9 +440,33 @@ pub struct ArchiveTarget {
|
|
|
445
440
|
pub storage: Option<String>,
|
|
446
441
|
}
|
|
447
442
|
|
|
443
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
444
|
+
pub enum PolicySeverity {
|
|
445
|
+
#[default]
|
|
446
|
+
Warn,
|
|
447
|
+
Reject,
|
|
448
|
+
Abort,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
impl PolicySeverity {
|
|
452
|
+
pub fn as_str(self) -> &'static str {
|
|
453
|
+
match self {
|
|
454
|
+
Self::Warn => "warn",
|
|
455
|
+
Self::Reject => "reject",
|
|
456
|
+
Self::Abort => "abort",
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
impl std::fmt::Display for PolicySeverity {
|
|
462
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
463
|
+
f.write_str(self.as_str())
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
448
467
|
#[derive(Debug)]
|
|
449
468
|
pub struct PolicyConfig {
|
|
450
|
-
pub severity:
|
|
469
|
+
pub severity: PolicySeverity,
|
|
451
470
|
}
|
|
452
471
|
|
|
453
472
|
#[derive(Debug)]
|
|
@@ -464,36 +483,6 @@ impl SchemaConfig {
|
|
|
464
483
|
pub fn resolved_schema_evolution(&self) -> SchemaEvolutionConfig {
|
|
465
484
|
self.schema_evolution.unwrap_or_default()
|
|
466
485
|
}
|
|
467
|
-
|
|
468
|
-
pub fn to_polars_schema(&self) -> FloeResult<Schema> {
|
|
469
|
-
let mut schema = Schema::with_capacity(self.columns.len());
|
|
470
|
-
for column in &self.columns {
|
|
471
|
-
let dtype = parse_data_type(&column.column_type)?;
|
|
472
|
-
if schema.insert(column.name.as_str().into(), dtype).is_some() {
|
|
473
|
-
return Err(Box::new(ConfigError(format!(
|
|
474
|
-
"duplicate column name in schema: {}",
|
|
475
|
-
column.name
|
|
476
|
-
))));
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
Ok(schema)
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
pub fn to_polars_string_schema(&self) -> FloeResult<Schema> {
|
|
483
|
-
let mut schema = Schema::with_capacity(self.columns.len());
|
|
484
|
-
for column in &self.columns {
|
|
485
|
-
if schema
|
|
486
|
-
.insert(column.name.as_str().into(), DataType::String)
|
|
487
|
-
.is_some()
|
|
488
|
-
{
|
|
489
|
-
return Err(Box::new(ConfigError(format!(
|
|
490
|
-
"duplicate column name in schema: {}",
|
|
491
|
-
column.name
|
|
492
|
-
))));
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
Ok(schema)
|
|
496
|
-
}
|
|
497
486
|
}
|
|
498
487
|
|
|
499
488
|
#[derive(Debug)]
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
2
|
|
|
3
|
+
use crate::config::storage::is_remote_uri;
|
|
3
4
|
use crate::config::{
|
|
4
|
-
CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode,
|
|
5
|
-
StorageDefinition,
|
|
5
|
+
CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, PolicySeverity,
|
|
6
|
+
RootConfig, SourceOptions, StorageDefinition,
|
|
6
7
|
};
|
|
7
8
|
use crate::io::format;
|
|
8
9
|
use crate::io::read::json_selector::parse_selector;
|
|
9
10
|
use crate::io::read::xml_selector;
|
|
11
|
+
use crate::io::write::sink_format::sink_format;
|
|
10
12
|
use crate::{warnings, ConfigError, FloeResult};
|
|
11
13
|
|
|
12
14
|
const ALLOWED_COLUMN_TYPES: &[&str] = &["string", "number", "boolean", "datetime", "date", "time"];
|
|
13
15
|
const ALLOWED_CAST_MODES: &[&str] = &["strict", "coerce"];
|
|
14
16
|
const ALLOWED_NORMALIZE_STRATEGIES: &[&str] = &["snake_case", "lower", "camel_case", "none"];
|
|
15
|
-
const ALLOWED_POLICY_SEVERITIES: &[&str] = &["warn", "reject", "abort"];
|
|
16
17
|
const ALLOWED_MISSING_POLICIES: &[&str] = &["reject_file", "fill_nulls"];
|
|
17
18
|
const ALLOWED_EXTRA_POLICIES: &[&str] = &["reject_file", "ignore"];
|
|
18
19
|
const ALLOWED_STORAGE_TYPES: &[&str] = &["local", "s3", "adls", "gcs"];
|
|
@@ -103,6 +104,11 @@ fn validate_lineage(lineage: &crate::config::LineageConfig) -> FloeResult<()> {
|
|
|
103
104
|
"lineage.namespace must not be empty".to_string(),
|
|
104
105
|
)));
|
|
105
106
|
}
|
|
107
|
+
if lineage.max_failures == Some(0) {
|
|
108
|
+
return Err(Box::new(ConfigError(
|
|
109
|
+
"lineage.max_failures must be at least 1".to_string(),
|
|
110
|
+
)));
|
|
111
|
+
}
|
|
106
112
|
Ok(())
|
|
107
113
|
}
|
|
108
114
|
|
|
@@ -123,6 +129,12 @@ fn validate_report(
|
|
|
123
129
|
) -> FloeResult<()> {
|
|
124
130
|
let storage_name = storages.resolve_report_name(report.storage.as_deref())?;
|
|
125
131
|
storages.validate_report_reference("report.storage", &storage_name)?;
|
|
132
|
+
if storages.definition_type(&storage_name) == Some("local") && is_remote_uri(&report.path) {
|
|
133
|
+
return Err(Box::new(ConfigError(format!(
|
|
134
|
+
"report.path must be a local path (got {})",
|
|
135
|
+
report.path
|
|
136
|
+
))));
|
|
137
|
+
}
|
|
126
138
|
Ok(())
|
|
127
139
|
}
|
|
128
140
|
|
|
@@ -134,7 +146,6 @@ fn validate_entity(
|
|
|
134
146
|
) -> FloeResult<()> {
|
|
135
147
|
validate_source(entity, storages)?;
|
|
136
148
|
validate_state(entity)?;
|
|
137
|
-
validate_policy(entity)?;
|
|
138
149
|
validate_sink(entity, storages, catalogs)?;
|
|
139
150
|
validate_schema(entity, config_version)?;
|
|
140
151
|
if let Some(pii) = &entity.pii {
|
|
@@ -147,7 +158,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
|
|
|
147
158
|
use crate::config::PiiStrategy;
|
|
148
159
|
// Abort severity writes the raw input file to the rejected sink without
|
|
149
160
|
// loading a DataFrame, bypassing masking entirely.
|
|
150
|
-
if entity.policy.severity ==
|
|
161
|
+
if entity.policy.severity == PolicySeverity::Abort {
|
|
151
162
|
return Err(Box::new(ConfigError(format!(
|
|
152
163
|
"entity.name={} pii: masking is not applied when policy.severity=abort \
|
|
153
164
|
because the raw file is written to sink.rejected without DataFrame processing",
|
|
@@ -197,7 +208,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
|
|
|
197
208
|
.flatten()
|
|
198
209
|
.map(|s| s.as_str())
|
|
199
210
|
.collect();
|
|
200
|
-
let write_mode = entity.sink.
|
|
211
|
+
let write_mode = entity.sink.write_mode;
|
|
201
212
|
let is_merge_mode = matches!(
|
|
202
213
|
write_mode,
|
|
203
214
|
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
@@ -515,7 +526,7 @@ fn validate_sink(
|
|
|
515
526
|
entity.sink.accepted.options.as_ref(),
|
|
516
527
|
)?;
|
|
517
528
|
|
|
518
|
-
if entity.policy.severity ==
|
|
529
|
+
if entity.policy.severity == PolicySeverity::Reject && entity.sink.rejected.is_none() {
|
|
519
530
|
return Err(Box::new(ConfigError(format!(
|
|
520
531
|
"entity.name={} sink.rejected is required when policy.severity=reject",
|
|
521
532
|
entity.name
|
|
@@ -534,28 +545,14 @@ fn validate_sink(
|
|
|
534
545
|
entity.sink.accepted.storage.as_deref(),
|
|
535
546
|
)?;
|
|
536
547
|
storages.validate_reference(entity, "sink.accepted.storage", &accepted_storage)?;
|
|
537
|
-
if
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
"entity.name={} sink.accepted.format=delta is only supported on local, s3, adls, or gcs storage (got {})",
|
|
546
|
-
entity.name, storage_type
|
|
547
|
-
))));
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
if entity.sink.accepted.format == "iceberg" {
|
|
552
|
-
if let Some(storage_type) = storages.definition_type(&accepted_storage) {
|
|
553
|
-
if storage_type != "local" && storage_type != "s3" && storage_type != "gcs" {
|
|
554
|
-
return Err(Box::new(ConfigError(format!(
|
|
555
|
-
"entity.name={} sink.accepted.format=iceberg is only supported on local, s3, or gcs storage for now (got {})",
|
|
556
|
-
entity.name, storage_type
|
|
557
|
-
))));
|
|
558
|
-
}
|
|
548
|
+
if let Some(storage_type) = storages.definition_type(&accepted_storage) {
|
|
549
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
550
|
+
if !fmt.supported_storages().contains(&storage_type) {
|
|
551
|
+
let supported = fmt.supported_storages().join(", ");
|
|
552
|
+
return Err(Box::new(ConfigError(format!(
|
|
553
|
+
"entity.name={} sink.accepted.format={} is not supported on {} storage (supported: {})",
|
|
554
|
+
entity.name, entity.sink.accepted.format, storage_type, supported
|
|
555
|
+
))));
|
|
559
556
|
}
|
|
560
557
|
}
|
|
561
558
|
validate_iceberg_catalog_binding(entity, storages, catalogs, &accepted_storage)?;
|
|
@@ -589,20 +586,23 @@ fn validate_sink(
|
|
|
589
586
|
}
|
|
590
587
|
|
|
591
588
|
fn validate_sink_write_mode(entity: &EntityConfig) -> FloeResult<()> {
|
|
592
|
-
let write_mode = entity.sink.
|
|
589
|
+
let write_mode = entity.sink.write_mode;
|
|
590
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
591
|
+
if !fmt.supported_modes().contains(&write_mode) {
|
|
592
|
+
return Err(Box::new(ConfigError(format!(
|
|
593
|
+
"entity.name={} sink.write_mode={} is not supported by sink.accepted.format={}",
|
|
594
|
+
entity.name,
|
|
595
|
+
write_mode.as_str(),
|
|
596
|
+
entity.sink.accepted.format
|
|
597
|
+
))));
|
|
598
|
+
}
|
|
599
|
+
|
|
593
600
|
let is_merge_mode = matches!(
|
|
594
601
|
write_mode,
|
|
595
602
|
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
596
603
|
);
|
|
597
604
|
if is_merge_mode {
|
|
598
605
|
let mode_name = write_mode.as_str();
|
|
599
|
-
if entity.sink.accepted.format != "delta" {
|
|
600
|
-
return Err(Box::new(ConfigError(format!(
|
|
601
|
-
"entity.name={} sink.write_mode={} requires sink.accepted.format=delta",
|
|
602
|
-
entity.name, mode_name
|
|
603
|
-
))));
|
|
604
|
-
}
|
|
605
|
-
|
|
606
606
|
let primary_key = entity.schema.primary_key.as_ref().ok_or_else(|| {
|
|
607
607
|
Box::new(ConfigError(format!(
|
|
608
608
|
"entity.name={} sink.write_mode={} requires schema.primary_key",
|
|
@@ -628,9 +628,16 @@ fn validate_merge_options(
|
|
|
628
628
|
return Ok(());
|
|
629
629
|
};
|
|
630
630
|
|
|
631
|
-
|
|
631
|
+
let fmt = sink_format(entity.sink.accepted.format.as_str())?;
|
|
632
|
+
let supports_merge = fmt.supported_modes().iter().any(|m| {
|
|
633
|
+
matches!(
|
|
634
|
+
m,
|
|
635
|
+
crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
|
|
636
|
+
)
|
|
637
|
+
});
|
|
638
|
+
if !supports_merge {
|
|
632
639
|
return Err(Box::new(ConfigError(format!(
|
|
633
|
-
"entity.name={} sink.accepted.merge is only supported when sink.accepted.format
|
|
640
|
+
"entity.name={} sink.accepted.merge is only supported when sink.accepted.format supports merge (e.g. delta)",
|
|
634
641
|
entity.name
|
|
635
642
|
))));
|
|
636
643
|
}
|
|
@@ -1063,18 +1070,6 @@ fn validate_sink_partitioning(entity: &EntityConfig) -> FloeResult<()> {
|
|
|
1063
1070
|
Ok(())
|
|
1064
1071
|
}
|
|
1065
1072
|
|
|
1066
|
-
fn validate_policy(entity: &EntityConfig) -> FloeResult<()> {
|
|
1067
|
-
if !ALLOWED_POLICY_SEVERITIES.contains(&entity.policy.severity.as_str()) {
|
|
1068
|
-
return Err(Box::new(ConfigError(format!(
|
|
1069
|
-
"entity.name={} policy.severity={} is unsupported (allowed: {})",
|
|
1070
|
-
entity.name,
|
|
1071
|
-
entity.policy.severity,
|
|
1072
|
-
ALLOWED_POLICY_SEVERITIES.join(", ")
|
|
1073
|
-
))));
|
|
1074
|
-
}
|
|
1075
|
-
Ok(())
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
1073
|
fn validate_schema(entity: &EntityConfig, config_version: ConfigVersion) -> FloeResult<()> {
|
|
1079
1074
|
if entity.source.format == "json" && entity.schema.columns.len() > MAX_JSON_COLUMNS {
|
|
1080
1075
|
return Err(Box::new(ConfigError(format!(
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
use crate::log::emit_log;
|
|
2
|
+
|
|
3
|
+
#[derive(Debug, thiserror::Error)]
|
|
4
|
+
#[error("{0}")]
|
|
5
|
+
pub struct ConfigError(pub String);
|
|
6
|
+
|
|
7
|
+
#[derive(Debug, thiserror::Error)]
|
|
8
|
+
#[error("{0}")]
|
|
9
|
+
pub struct RunError(pub String);
|
|
10
|
+
|
|
11
|
+
#[derive(Debug, thiserror::Error)]
|
|
12
|
+
#[error("{0}")]
|
|
13
|
+
pub struct StorageError(pub String);
|
|
14
|
+
|
|
15
|
+
#[derive(Debug, thiserror::Error)]
|
|
16
|
+
#[error("{0}")]
|
|
17
|
+
pub struct IoError(pub String);
|
|
18
|
+
|
|
19
|
+
pub fn emit(
|
|
20
|
+
run_id: &str,
|
|
21
|
+
entity: Option<&str>,
|
|
22
|
+
input: Option<&str>,
|
|
23
|
+
code: Option<&str>,
|
|
24
|
+
message: &str,
|
|
25
|
+
) {
|
|
26
|
+
emit_log("error", run_id, entity, input, code, message);
|
|
27
|
+
}
|