floe-python 0.4.4__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floe_python-0.4.4 → floe_python-0.4.6}/Cargo.lock +3 -3
- {floe_python-0.4.4 → floe_python-0.4.6}/PKG-INFO +1 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/Cargo.toml +2 -2
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/location.rs +41 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/mod.rs +3 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/parse.rs +4 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/storage.rs +47 -2
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/types.rs +2 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/format.rs +176 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/mod.rs +6 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/object_store.rs +9 -2
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/s3.rs +18 -6
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/context.rs +2 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/rest.rs +108 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/parquet.rs +31 -9
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/lib.rs +6 -2
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/builder.rs +204 -40
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/mod.rs +1 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/model.rs +28 -9
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/parse.rs +49 -5
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/types.rs +8 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/context.rs +84 -5
- floe_python-0.4.6/crates/floe-core/src/run/entity/accepted_buffer.rs +251 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/accepted_write.rs +7 -48
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/mod.rs +27 -17
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/validate_split.rs +72 -8
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/events.rs +5 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/mod.rs +21 -8
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/archive_run.rs +93 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_glue_run.rs +1 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_s3_run.rs +1 -1
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/local_run.rs +97 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_storage.rs +4 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_validation.rs +4 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/catalogs.rs +8 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/local_storage.rs +2 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/mod.rs +1 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/remote_base.rs +8 -0
- floe_python-0.4.6/crates/floe-core/tests/unit/config/storage_resolver_uri.rs +136 -0
- floe_python-0.4.6/crates/floe-core/tests/unit/io/format.rs +357 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +2 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/object_store.rs +10 -0
- floe_python-0.4.6/crates/floe-core/tests/unit/manifest/mod.rs +793 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/parse.rs +140 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/storage.rs +8 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +27 -13
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/Cargo.toml +2 -2
- {floe_python-0.4.4 → floe_python-0.4.6}/pyproject.toml +1 -1
- floe_python-0.4.4/crates/floe-core/tests/unit/io/format.rs +0 -19
- floe_python-0.4.4/crates/floe-core/tests/unit/manifest/mod.rs +0 -306
- {floe_python-0.4.4 → floe_python-0.4.6}/Cargo.toml +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/README.md +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/README.md +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/add_entity.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/cast.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/mismatch.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/normalize.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/not_null.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/unique.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/catalog.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/template.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/validate.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/yaml_decode.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/errors.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/avro.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/csv.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/json.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/json_selector.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/orc.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/parquet.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xlsx.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/target.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/accepted.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/csv.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/options.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/metrics.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/parts.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/sink_format.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/lineage/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/log.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/validate.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/build.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/entity.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/output.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/incremental.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/pii.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/precheck.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/process.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/resolve.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/file.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/output.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/perf.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runner/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runner/outcome.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runtime.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/state/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/vars/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/vars/resolve.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/warnings.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/delta_run.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/dry_run.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/common.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/lineage_validation.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/parse.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/templating.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/checks.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/incremental.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/lineage.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/pii.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/report.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/state/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/.gitignore +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/README.md +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/functions.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/lib.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/observer.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/config.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/errors.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/mod.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/outcome.rs +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/config.yml +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/profile.yml +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/test_floe.py +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/__init__.py +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/_floe.pyi +0 -0
- {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/py.typed +0 -0
|
@@ -3399,7 +3399,7 @@ dependencies = [
|
|
|
3399
3399
|
|
|
3400
3400
|
[[package]]
|
|
3401
3401
|
name = "floe-cli"
|
|
3402
|
-
version = "0.4.
|
|
3402
|
+
version = "0.4.6"
|
|
3403
3403
|
dependencies = [
|
|
3404
3404
|
"assert_cmd",
|
|
3405
3405
|
"clap",
|
|
@@ -3412,7 +3412,7 @@ dependencies = [
|
|
|
3412
3412
|
|
|
3413
3413
|
[[package]]
|
|
3414
3414
|
name = "floe-core"
|
|
3415
|
-
version = "0.4.
|
|
3415
|
+
version = "0.4.6"
|
|
3416
3416
|
dependencies = [
|
|
3417
3417
|
"apache-avro 0.16.0",
|
|
3418
3418
|
"arrow",
|
|
@@ -3455,7 +3455,7 @@ dependencies = [
|
|
|
3455
3455
|
|
|
3456
3456
|
[[package]]
|
|
3457
3457
|
name = "floe-python"
|
|
3458
|
-
version = "0.4.
|
|
3458
|
+
version = "0.4.6"
|
|
3459
3459
|
dependencies = [
|
|
3460
3460
|
"floe-core",
|
|
3461
3461
|
"pyo3",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "floe-core"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.6"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
description = "Core library for Floe, a YAML-driven technical ingestion tool."
|
|
6
6
|
license = "MIT"
|
|
@@ -14,7 +14,7 @@ path = "src/lib.rs"
|
|
|
14
14
|
|
|
15
15
|
[dependencies]
|
|
16
16
|
yaml-rust2 = "0.11"
|
|
17
|
-
polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
|
|
17
|
+
polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "new_streaming", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
|
|
18
18
|
calamine = "0.24"
|
|
19
19
|
rayon = "1"
|
|
20
20
|
deltalake = { version = "0.30.1", features = ["datafusion", "s3", "azure", "gcs"] }
|
|
@@ -45,7 +45,7 @@ pub fn resolve_config_location(input: &str) -> FloeResult<ConfigLocation> {
|
|
|
45
45
|
fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
|
|
46
46
|
if uri.starts_with("s3://") {
|
|
47
47
|
let location = storage::s3::parse_s3_uri(uri)?;
|
|
48
|
-
let client = storage::s3::S3Client::new(location.bucket, None)?;
|
|
48
|
+
let client = storage::s3::S3Client::new(location.bucket, None, None, None)?;
|
|
49
49
|
return client.download_to_temp(uri, temp_dir);
|
|
50
50
|
}
|
|
51
51
|
if uri.starts_with("gs://") {
|
|
@@ -63,6 +63,8 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
|
|
|
63
63
|
account: Some(location.account),
|
|
64
64
|
container: Some(location.container),
|
|
65
65
|
prefix: None,
|
|
66
|
+
endpoint: None,
|
|
67
|
+
path_style_access: None,
|
|
66
68
|
};
|
|
67
69
|
let client = storage::adls::AdlsClient::new(&definition)?;
|
|
68
70
|
return client.download_to_temp(uri, temp_dir);
|
|
@@ -70,6 +72,44 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
|
|
|
70
72
|
Err(format!("unsupported config uri: {}", uri).into())
|
|
71
73
|
}
|
|
72
74
|
|
|
75
|
+
/// Write `bytes` to a remote URI by staging them in a temp file then uploading.
|
|
76
|
+
pub fn write_bytes_to_remote_uri(bytes: &[u8], uri: &str) -> FloeResult<()> {
|
|
77
|
+
let temp_dir = TempDir::new()?;
|
|
78
|
+
let local_path = temp_dir.path().join("upload");
|
|
79
|
+
std::fs::write(&local_path, bytes)?;
|
|
80
|
+
upload_to_remote_uri(&local_path, uri)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
pub fn upload_to_remote_uri(local_path: &Path, uri: &str) -> FloeResult<()> {
|
|
84
|
+
if uri.starts_with("s3://") {
|
|
85
|
+
let location = storage::s3::parse_s3_uri(uri)?;
|
|
86
|
+
let client = storage::s3::S3Client::new(location.bucket, None, None, None)?;
|
|
87
|
+
return client.upload_from_path(local_path, uri);
|
|
88
|
+
}
|
|
89
|
+
if uri.starts_with("gs://") {
|
|
90
|
+
let location = storage::gcs::parse_gcs_uri(uri)?;
|
|
91
|
+
let client = storage::gcs::GcsClient::new(location.bucket)?;
|
|
92
|
+
return client.upload_from_path(local_path, uri);
|
|
93
|
+
}
|
|
94
|
+
if uri.starts_with("abfs://") {
|
|
95
|
+
let location = storage::adls::parse_adls_uri(uri)?;
|
|
96
|
+
let definition = StorageDefinition {
|
|
97
|
+
name: "manifest".to_string(),
|
|
98
|
+
fs_type: "adls".to_string(),
|
|
99
|
+
bucket: None,
|
|
100
|
+
region: None,
|
|
101
|
+
account: Some(location.account),
|
|
102
|
+
container: Some(location.container),
|
|
103
|
+
prefix: None,
|
|
104
|
+
endpoint: None,
|
|
105
|
+
path_style_access: None,
|
|
106
|
+
};
|
|
107
|
+
let client = storage::adls::AdlsClient::new(&definition)?;
|
|
108
|
+
return client.upload_from_path(local_path, uri);
|
|
109
|
+
}
|
|
110
|
+
Err(format!("unsupported manifest output uri: {uri}").into())
|
|
111
|
+
}
|
|
112
|
+
|
|
73
113
|
pub(crate) fn is_remote_uri(value: &str) -> bool {
|
|
74
114
|
value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
|
|
75
115
|
}
|
|
@@ -9,7 +9,9 @@ pub(crate) mod yaml_decode;
|
|
|
9
9
|
|
|
10
10
|
pub use catalog::{CatalogResolver, ResolvedDeltaCatalogTarget, ResolvedIcebergCatalogTarget};
|
|
11
11
|
pub(crate) use location::is_remote_uri;
|
|
12
|
-
pub use location::{
|
|
12
|
+
pub use location::{
|
|
13
|
+
resolve_config_location, upload_to_remote_uri, write_bytes_to_remote_uri, ConfigLocation,
|
|
14
|
+
};
|
|
13
15
|
pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver};
|
|
14
16
|
pub use types::*;
|
|
15
17
|
|
|
@@ -695,6 +695,8 @@ fn parse_storage_definition(value: &Yaml) -> FloeResult<StorageDefinition> {
|
|
|
695
695
|
"account",
|
|
696
696
|
"container",
|
|
697
697
|
"prefix",
|
|
698
|
+
"endpoint",
|
|
699
|
+
"path_style_access",
|
|
698
700
|
],
|
|
699
701
|
)?;
|
|
700
702
|
Ok(StorageDefinition {
|
|
@@ -705,6 +707,8 @@ fn parse_storage_definition(value: &Yaml) -> FloeResult<StorageDefinition> {
|
|
|
705
707
|
account: opt_string(hash, "account", "storages.definitions")?,
|
|
706
708
|
container: opt_string(hash, "container", "storages.definitions")?,
|
|
707
709
|
prefix: opt_string(hash, "prefix", "storages.definitions")?,
|
|
710
|
+
endpoint: opt_string(hash, "endpoint", "storages.definitions")?,
|
|
711
|
+
path_style_access: opt_bool(hash, "path_style_access", "storages.definitions")?,
|
|
708
712
|
})
|
|
709
713
|
}
|
|
710
714
|
|
|
@@ -215,7 +215,7 @@ impl StorageResolver {
|
|
|
215
215
|
raw_path: &str,
|
|
216
216
|
) -> FloeResult<ResolvedPath> {
|
|
217
217
|
let name = storage_name.unwrap_or(self.default_name.as_str());
|
|
218
|
-
if !self.has_config && name != "local" {
|
|
218
|
+
if !self.has_config && name != "local" && !self.definitions.contains_key(name) {
|
|
219
219
|
return Err(Box::new(ConfigError(format!(
|
|
220
220
|
"entity.name={} {field} references unknown storage {} (no storages block)",
|
|
221
221
|
entity_name, name
|
|
@@ -238,6 +238,8 @@ impl StorageResolver {
|
|
|
238
238
|
account: None,
|
|
239
239
|
container: None,
|
|
240
240
|
prefix: None,
|
|
241
|
+
endpoint: None,
|
|
242
|
+
path_style_access: None,
|
|
241
243
|
}
|
|
242
244
|
};
|
|
243
245
|
|
|
@@ -304,7 +306,7 @@ impl StorageResolver {
|
|
|
304
306
|
raw_path: &str,
|
|
305
307
|
) -> FloeResult<ResolvedPath> {
|
|
306
308
|
let name = storage_name.unwrap_or(self.default_name.as_str());
|
|
307
|
-
if !self.has_config && name != "local" {
|
|
309
|
+
if !self.has_config && name != "local" && !self.definitions.contains_key(name) {
|
|
308
310
|
return Err(Box::new(ConfigError(format!(
|
|
309
311
|
"report.storage references unknown storage {} (no storages block)",
|
|
310
312
|
name
|
|
@@ -327,6 +329,8 @@ impl StorageResolver {
|
|
|
327
329
|
account: None,
|
|
328
330
|
container: None,
|
|
329
331
|
prefix: None,
|
|
332
|
+
endpoint: None,
|
|
333
|
+
path_style_access: None,
|
|
330
334
|
}
|
|
331
335
|
};
|
|
332
336
|
|
|
@@ -386,9 +390,48 @@ impl StorageResolver {
|
|
|
386
390
|
}
|
|
387
391
|
}
|
|
388
392
|
|
|
393
|
+
/// Scan definitions for the first one whose scheme and bucket/account match `uri`.
|
|
394
|
+
/// Used in manifest mode to resolve a bare report URI back to a named definition.
|
|
395
|
+
pub fn find_definition_name_for_uri(&self, uri: &str) -> Option<String> {
|
|
396
|
+
for (name, def) in &self.definitions {
|
|
397
|
+
if uri.starts_with("s3://") && def.fs_type == "s3" {
|
|
398
|
+
if let Some(b) = &def.bucket {
|
|
399
|
+
if uri.starts_with(&format!("s3://{b}/")) || uri == format!("s3://{b}") {
|
|
400
|
+
return Some(name.clone());
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
if uri.starts_with("gs://") && def.fs_type == "gcs" {
|
|
405
|
+
if let Some(b) = &def.bucket {
|
|
406
|
+
if uri.starts_with(&format!("gs://{b}/")) || uri == format!("gs://{b}") {
|
|
407
|
+
return Some(name.clone());
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
if uri.starts_with("abfs://") && def.fs_type == "adls" {
|
|
412
|
+
if let (Some(c), Some(a)) = (&def.container, &def.account) {
|
|
413
|
+
if uri.starts_with(&format!("abfs://{c}@{a}.dfs.core.windows.net")) {
|
|
414
|
+
return Some(name.clone());
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
None
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/// Register a synthetic `StorageDefinition` into this resolver.
|
|
423
|
+
/// Used in manifest mode when the report URI has no matching definition in the config.
|
|
424
|
+
/// Does NOT flip `has_config`; entity resolution keeps its implicit-local fallback.
|
|
425
|
+
pub fn register_definition(&mut self, definition: StorageDefinition) {
|
|
426
|
+
self.definitions.insert(definition.name.clone(), definition);
|
|
427
|
+
}
|
|
428
|
+
|
|
389
429
|
pub fn definition(&self, name: &str) -> Option<StorageDefinition> {
|
|
390
430
|
if self.has_config {
|
|
391
431
|
self.definitions.get(name).cloned()
|
|
432
|
+
} else if let Some(def) = self.definitions.get(name) {
|
|
433
|
+
// Synthetic definition registered by register_definition (e.g. report target).
|
|
434
|
+
Some(def.clone())
|
|
392
435
|
} else if name == "local" {
|
|
393
436
|
Some(StorageDefinition {
|
|
394
437
|
name: "local".to_string(),
|
|
@@ -398,6 +441,8 @@ impl StorageResolver {
|
|
|
398
441
|
account: None,
|
|
399
442
|
container: None,
|
|
400
443
|
prefix: None,
|
|
444
|
+
endpoint: None,
|
|
445
|
+
path_style_access: None,
|
|
401
446
|
})
|
|
402
447
|
} else {
|
|
403
448
|
None
|
|
@@ -366,6 +366,8 @@ pub struct StorageDefinition {
|
|
|
366
366
|
pub account: Option<String>,
|
|
367
367
|
pub container: Option<String>,
|
|
368
368
|
pub prefix: Option<String>,
|
|
369
|
+
pub endpoint: Option<String>,
|
|
370
|
+
pub path_style_access: Option<bool>,
|
|
369
371
|
}
|
|
370
372
|
|
|
371
373
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
@@ -119,6 +119,182 @@ pub struct AcceptedWriteOutput {
|
|
|
119
119
|
pub perf: Option<AcceptedWritePerfBreakdown>,
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
/// Per-write sinks cap their reported `part_files` list at this many entries
|
|
123
|
+
/// (see `parquet.rs`). The reducer applies the same cap across flushes so
|
|
124
|
+
/// the run report does not grow to N × 50 entries for high-fanout entities.
|
|
125
|
+
pub const MAX_REPORTED_PART_FILES: usize = 50;
|
|
126
|
+
|
|
127
|
+
impl AcceptedWriteOutput {
|
|
128
|
+
/// Fold a later flush's output into this one. The receiver represents the
|
|
129
|
+
/// running total across N completed flushes; `next` is the output of the
|
|
130
|
+
/// (N+1)th flush.
|
|
131
|
+
///
|
|
132
|
+
/// Field semantics across flushes:
|
|
133
|
+
/// - `parts_written` (always known, the count of successful sink writes)
|
|
134
|
+
/// sums.
|
|
135
|
+
/// - `files_written` and the `Option<u64>` metric fields
|
|
136
|
+
/// (`total_bytes_written`, `small_files_count`, perf entries) sum
|
|
137
|
+
/// when *both* sides are `Some`; if either side is `None` the merged
|
|
138
|
+
/// result is `None`. "Unknown poisons" matches the per-flush
|
|
139
|
+
/// semantics: when any single flush could not determine its file
|
|
140
|
+
/// count (for example a remote Delta commit whose post-commit log
|
|
141
|
+
/// could not be read), reporting a partial sum would silently
|
|
142
|
+
/// under-count the total. The run report instead surfaces the value
|
|
143
|
+
/// as unknown.
|
|
144
|
+
/// - `part_files` concatenates and is capped at `MAX_REPORTED_PART_FILES`
|
|
145
|
+
/// so the reducer preserves the same cap the individual sink writers
|
|
146
|
+
/// apply per-flush.
|
|
147
|
+
/// - `table_version` / `snapshot_id` take the latest (Delta commit /
|
|
148
|
+
/// Iceberg snapshot move forward with every commit; the final state
|
|
149
|
+
/// is what readers see).
|
|
150
|
+
/// - `table_root_uri`, `catalog`, `schema_evolution` take the first
|
|
151
|
+
/// non-default value seen — table location and catalog registration
|
|
152
|
+
/// are established by the first write; schema evolution only fires on
|
|
153
|
+
/// the first (Overwrite) write because subsequent flushes are Append.
|
|
154
|
+
/// - `avg_file_size_mb` is recomputed from `total_bytes_written` divided
|
|
155
|
+
/// by `files_written` when available (so it matches the per-flush
|
|
156
|
+
/// semantics: for Parquet/Iceberg `files == parts`, but for Delta one
|
|
157
|
+
/// commit can write multiple `add` files and `parts != files`).
|
|
158
|
+
/// Falls back to `parts_written` when `files_written` is unknown.
|
|
159
|
+
/// - `perf` accumulates by summing each `Option<u64>` field.
|
|
160
|
+
/// - `merge` is unreachable in the buffered path (merge modes use the
|
|
161
|
+
/// legacy accumulate-then-write code path); the running value is
|
|
162
|
+
/// preserved if anything ever does pass one.
|
|
163
|
+
pub fn merge_in(&mut self, next: AcceptedWriteOutput) {
|
|
164
|
+
let AcceptedWriteOutput {
|
|
165
|
+
files_written,
|
|
166
|
+
parts_written,
|
|
167
|
+
part_files,
|
|
168
|
+
table_version,
|
|
169
|
+
snapshot_id,
|
|
170
|
+
table_root_uri,
|
|
171
|
+
catalog,
|
|
172
|
+
metrics,
|
|
173
|
+
merge,
|
|
174
|
+
schema_evolution,
|
|
175
|
+
perf,
|
|
176
|
+
} = next;
|
|
177
|
+
|
|
178
|
+
// `parts_written == 0` on the receiver means no prior flush has been
|
|
179
|
+
// merged. In that case `Option<u64>` fields on `self` start at `None`
|
|
180
|
+
// not because a flush returned unknown but because nothing has been
|
|
181
|
+
// recorded yet — distinguishing "vacuous" from "poisoned" matters
|
|
182
|
+
// because adopting the next flush's value verbatim on the first merge
|
|
183
|
+
// is correct, while applying poison-on-unknown semantics from `None`
|
|
184
|
+
// would always poison the very first merge.
|
|
185
|
+
let first_merge = self.parts_written == 0;
|
|
186
|
+
|
|
187
|
+
self.files_written = merge_option_u64(self.files_written, files_written, first_merge);
|
|
188
|
+
self.parts_written += parts_written;
|
|
189
|
+
let remaining = MAX_REPORTED_PART_FILES.saturating_sub(self.part_files.len());
|
|
190
|
+
if remaining > 0 {
|
|
191
|
+
self.part_files
|
|
192
|
+
.extend(part_files.into_iter().take(remaining));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if table_version.is_some() {
|
|
196
|
+
self.table_version = table_version;
|
|
197
|
+
}
|
|
198
|
+
if snapshot_id.is_some() {
|
|
199
|
+
self.snapshot_id = snapshot_id;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if self.table_root_uri.is_none() {
|
|
203
|
+
self.table_root_uri = table_root_uri;
|
|
204
|
+
}
|
|
205
|
+
if self.catalog.is_none() {
|
|
206
|
+
self.catalog = catalog;
|
|
207
|
+
}
|
|
208
|
+
if !self.schema_evolution.enabled
|
|
209
|
+
&& !self.schema_evolution.applied
|
|
210
|
+
&& self.schema_evolution.added_columns.is_empty()
|
|
211
|
+
&& !self.schema_evolution.incompatible_changes_detected
|
|
212
|
+
&& self.schema_evolution.mode.is_empty()
|
|
213
|
+
{
|
|
214
|
+
self.schema_evolution = schema_evolution;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
self.metrics.total_bytes_written = merge_option_u64(
|
|
218
|
+
self.metrics.total_bytes_written,
|
|
219
|
+
metrics.total_bytes_written,
|
|
220
|
+
first_merge,
|
|
221
|
+
);
|
|
222
|
+
self.metrics.small_files_count = merge_option_u64(
|
|
223
|
+
self.metrics.small_files_count,
|
|
224
|
+
metrics.small_files_count,
|
|
225
|
+
first_merge,
|
|
226
|
+
);
|
|
227
|
+
self.metrics.avg_file_size_mb = recompute_avg_file_size_mb(
|
|
228
|
+
self.metrics.total_bytes_written,
|
|
229
|
+
self.files_written,
|
|
230
|
+
self.parts_written,
|
|
231
|
+
);
|
|
232
|
+
|
|
233
|
+
if self.merge.is_none() {
|
|
234
|
+
self.merge = merge;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
match (self.perf.take(), perf) {
|
|
238
|
+
(Some(a), Some(b)) => self.perf = Some(sum_perf_breakdown(a, b)),
|
|
239
|
+
(Some(a), None) => self.perf = Some(a),
|
|
240
|
+
(None, Some(b)) => self.perf = Some(b),
|
|
241
|
+
(None, None) => self.perf = None,
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/// Sum two `Option<u64>` values with poison-on-unknown semantics: if either
|
|
247
|
+
/// side is `None`, the result is `None`. Reporting a partial sum as if it
|
|
248
|
+
/// were the total would silently under-count for any aggregation across
|
|
249
|
+
/// flushes where one flush could not determine the underlying count
|
|
250
|
+
/// (e.g. remote Delta commit-log read failures).
|
|
251
|
+
fn sum_option_u64(a: Option<u64>, b: Option<u64>) -> Option<u64> {
|
|
252
|
+
match (a, b) {
|
|
253
|
+
(Some(a), Some(b)) => Some(a + b),
|
|
254
|
+
_ => None,
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/// Progressive `Option<u64>` merge used by `merge_in`. On the first merge
|
|
259
|
+
/// (when the accumulator has no flush recorded yet) the next flush's value is
|
|
260
|
+
/// taken verbatim; on subsequent merges `sum_option_u64`'s poison-on-unknown
|
|
261
|
+
/// semantics apply.
|
|
262
|
+
fn merge_option_u64(acc: Option<u64>, next: Option<u64>, first_merge: bool) -> Option<u64> {
|
|
263
|
+
if first_merge {
|
|
264
|
+
next
|
|
265
|
+
} else {
|
|
266
|
+
sum_option_u64(acc, next)
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
fn recompute_avg_file_size_mb(
|
|
271
|
+
total_bytes: Option<u64>,
|
|
272
|
+
files_written: Option<u64>,
|
|
273
|
+
parts_written: u64,
|
|
274
|
+
) -> Option<f64> {
|
|
275
|
+
let bytes = total_bytes?;
|
|
276
|
+
let denominator = files_written.unwrap_or(parts_written);
|
|
277
|
+
if denominator == 0 {
|
|
278
|
+
return None;
|
|
279
|
+
}
|
|
280
|
+
let mb = (bytes as f64) / (denominator as f64) / (1024.0 * 1024.0);
|
|
281
|
+
Some(mb)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn sum_perf_breakdown(
|
|
285
|
+
a: AcceptedWritePerfBreakdown,
|
|
286
|
+
b: AcceptedWritePerfBreakdown,
|
|
287
|
+
) -> AcceptedWritePerfBreakdown {
|
|
288
|
+
AcceptedWritePerfBreakdown {
|
|
289
|
+
conversion_ms: sum_option_u64(a.conversion_ms, b.conversion_ms),
|
|
290
|
+
source_df_build_ms: sum_option_u64(a.source_df_build_ms, b.source_df_build_ms),
|
|
291
|
+
merge_exec_ms: sum_option_u64(a.merge_exec_ms, b.merge_exec_ms),
|
|
292
|
+
data_write_ms: sum_option_u64(a.data_write_ms, b.data_write_ms),
|
|
293
|
+
commit_ms: sum_option_u64(a.commit_ms, b.commit_ms),
|
|
294
|
+
metrics_read_ms: sum_option_u64(a.metrics_read_ms, b.metrics_read_ms),
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
122
298
|
pub trait InputAdapter: Send + Sync {
|
|
123
299
|
fn format(&self) -> &'static str;
|
|
124
300
|
|
|
@@ -129,7 +129,12 @@ fn build_client(definition: &config::StorageDefinition) -> FloeResult<Box<dyn St
|
|
|
129
129
|
"s3" => {
|
|
130
130
|
let bucket =
|
|
131
131
|
validation::require_field(definition, definition.bucket.as_ref(), "bucket", "s3")?;
|
|
132
|
-
Box::new(s3::S3Client::new(
|
|
132
|
+
Box::new(s3::S3Client::new(
|
|
133
|
+
bucket,
|
|
134
|
+
definition.region.as_deref(),
|
|
135
|
+
definition.endpoint.as_deref(),
|
|
136
|
+
definition.path_style_access,
|
|
137
|
+
)?)
|
|
133
138
|
}
|
|
134
139
|
"adls" => Box::new(adls::AdlsClient::new(definition)?),
|
|
135
140
|
"gcs" => {
|
|
@@ -111,9 +111,16 @@ pub fn iceberg_store_config(
|
|
|
111
111
|
Target::S3 { storage, uri, .. } => {
|
|
112
112
|
let mut file_io_props = HashMap::new();
|
|
113
113
|
if let Some(definition) = resolver.definition(storage) {
|
|
114
|
-
if let Some(region) = definition.region {
|
|
114
|
+
if let Some(region) = &definition.region {
|
|
115
115
|
file_io_props.insert(S3_REGION.to_string(), region.clone());
|
|
116
|
-
file_io_props.insert(CLIENT_REGION.to_string(), region);
|
|
116
|
+
file_io_props.insert(CLIENT_REGION.to_string(), region.clone());
|
|
117
|
+
}
|
|
118
|
+
if let Some(endpoint) = &definition.endpoint {
|
|
119
|
+
file_io_props.insert("s3.endpoint".to_string(), endpoint.clone());
|
|
120
|
+
}
|
|
121
|
+
if let Some(path_style) = definition.path_style_access {
|
|
122
|
+
file_io_props
|
|
123
|
+
.insert("s3.path-style-access".to_string(), path_style.to_string());
|
|
117
124
|
}
|
|
118
125
|
}
|
|
119
126
|
Ok(IcebergStoreConfig {
|
|
@@ -19,23 +19,35 @@ pub struct S3Client {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
impl S3Client {
|
|
22
|
-
pub fn new(
|
|
22
|
+
pub fn new(
|
|
23
|
+
bucket: String,
|
|
24
|
+
region: Option<&str>,
|
|
25
|
+
endpoint: Option<&str>,
|
|
26
|
+
path_style_access: Option<bool>,
|
|
27
|
+
) -> FloeResult<Self> {
|
|
23
28
|
let runtime = tokio::runtime::Builder::new_current_thread()
|
|
24
29
|
.enable_all()
|
|
25
30
|
.build()
|
|
26
31
|
.map_err(|err| Box::new(StorageError(format!("failed to build aws runtime: {err}"))))?;
|
|
32
|
+
let endpoint = endpoint.map(ToOwned::to_owned);
|
|
27
33
|
let config = runtime.block_on(async {
|
|
28
34
|
let region_provider = match region {
|
|
29
35
|
Some(region) => RegionProviderChain::first_try(Region::new(region.to_string()))
|
|
30
36
|
.or_default_provider(),
|
|
31
37
|
None => RegionProviderChain::default_provider(),
|
|
32
38
|
};
|
|
33
|
-
|
|
34
|
-
.region(region_provider)
|
|
35
|
-
|
|
36
|
-
.
|
|
39
|
+
let mut builder =
|
|
40
|
+
aws_config::defaults(aws_config::BehaviorVersion::latest()).region(region_provider);
|
|
41
|
+
if let Some(ep) = endpoint {
|
|
42
|
+
builder = builder.endpoint_url(ep);
|
|
43
|
+
}
|
|
44
|
+
builder.load().await
|
|
37
45
|
});
|
|
38
|
-
let
|
|
46
|
+
let mut s3_builder = aws_sdk_s3::config::Builder::from(&config);
|
|
47
|
+
if path_style_access.unwrap_or(false) {
|
|
48
|
+
s3_builder = s3_builder.force_path_style(true);
|
|
49
|
+
}
|
|
50
|
+
let client = Client::from_conf(s3_builder.build());
|
|
39
51
|
Ok(Self {
|
|
40
52
|
bucket,
|
|
41
53
|
client,
|
|
@@ -76,7 +76,8 @@ pub(super) fn build_iceberg_write_context(
|
|
|
76
76
|
latest_s3_metadata_location(client, base_key)?
|
|
77
77
|
}
|
|
78
78
|
None => {
|
|
79
|
-
let mut client =
|
|
79
|
+
let mut client =
|
|
80
|
+
io::storage::s3::S3Client::new(bucket.clone(), None, None, None)?;
|
|
80
81
|
latest_s3_metadata_location(&mut client, base_key)?
|
|
81
82
|
}
|
|
82
83
|
}
|
|
@@ -45,6 +45,7 @@ pub(crate) async fn build_rest_catalog(
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
if let Some(credential) = rest_cfg.credential.as_deref() {
|
|
48
|
+
let credential = expand_env_refs(credential, &rest_cfg.catalog_name)?;
|
|
48
49
|
if let Some(token_value) = credential.strip_prefix("token:") {
|
|
49
50
|
// Bearer PAT (Unity Catalog / Nessie)
|
|
50
51
|
props.insert("token".to_string(), token_value.to_string());
|
|
@@ -140,6 +141,39 @@ impl RestIcebergCatalogConfig {
|
|
|
140
141
|
}
|
|
141
142
|
}
|
|
142
143
|
|
|
144
|
+
fn expand_env_refs(value: &str, catalog_name: &str) -> FloeResult<String> {
|
|
145
|
+
if !value.contains("${") {
|
|
146
|
+
return Ok(value.to_string());
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
let mut parts = Vec::new();
|
|
150
|
+
for part in value.split(':') {
|
|
151
|
+
parts.push(expand_env_ref_part(part, catalog_name)?);
|
|
152
|
+
}
|
|
153
|
+
Ok(parts.join(":"))
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
fn expand_env_ref_part(part: &str, catalog_name: &str) -> FloeResult<String> {
|
|
157
|
+
let Some(inner) = part.strip_prefix("${") else {
|
|
158
|
+
return Ok(part.to_string());
|
|
159
|
+
};
|
|
160
|
+
let Some(name) = inner.strip_suffix('}') else {
|
|
161
|
+
return Err(Box::new(RunError(format!(
|
|
162
|
+
"rest iceberg catalog {catalog_name} credential has unclosed env placeholder"
|
|
163
|
+
))));
|
|
164
|
+
};
|
|
165
|
+
if name.is_empty() || name.contains('{') || name.contains('}') {
|
|
166
|
+
return Err(Box::new(RunError(format!(
|
|
167
|
+
"rest iceberg catalog {catalog_name} credential has invalid env placeholder"
|
|
168
|
+
))));
|
|
169
|
+
}
|
|
170
|
+
std::env::var(name).map_err(|_| {
|
|
171
|
+
Box::new(RunError(format!(
|
|
172
|
+
"rest iceberg catalog {catalog_name} credential references env var {name} which is not set"
|
|
173
|
+
))) as Box<dyn std::error::Error + Send + Sync>
|
|
174
|
+
})
|
|
175
|
+
}
|
|
176
|
+
|
|
143
177
|
pub(crate) async fn write_via_rest_catalog(
|
|
144
178
|
rest_cfg: &RestIcebergCatalogConfig,
|
|
145
179
|
table_root_uri: String,
|
|
@@ -342,3 +376,77 @@ async fn create_rest_table(
|
|
|
342
376
|
.await
|
|
343
377
|
.map_err(map_iceberg_err("rest catalog create_table failed"))
|
|
344
378
|
}
|
|
379
|
+
|
|
380
|
+
#[cfg(test)]
|
|
381
|
+
mod tests {
|
|
382
|
+
use super::expand_env_refs;
|
|
383
|
+
|
|
384
|
+
#[test]
|
|
385
|
+
fn expands_partial_env_refs_in_client_credentials() {
|
|
386
|
+
std::env::set_var("FLOE_TEST_REST_CLIENT_ID", "client-id");
|
|
387
|
+
std::env::set_var("FLOE_TEST_REST_CLIENT_SECRET", "client-secret");
|
|
388
|
+
|
|
389
|
+
let expanded = expand_env_refs(
|
|
390
|
+
"client_credentials:${FLOE_TEST_REST_CLIENT_ID}:${FLOE_TEST_REST_CLIENT_SECRET}",
|
|
391
|
+
"polaris",
|
|
392
|
+
)
|
|
393
|
+
.expect("expand credential");
|
|
394
|
+
|
|
395
|
+
assert_eq!(expanded, "client_credentials:client-id:client-secret");
|
|
396
|
+
std::env::remove_var("FLOE_TEST_REST_CLIENT_ID");
|
|
397
|
+
std::env::remove_var("FLOE_TEST_REST_CLIENT_SECRET");
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
#[test]
|
|
401
|
+
fn expands_exact_env_ref_in_token_credential() {
|
|
402
|
+
std::env::set_var("FLOE_TEST_REST_TOKEN", "pat-token");
|
|
403
|
+
|
|
404
|
+
let expanded =
|
|
405
|
+
expand_env_refs("token:${FLOE_TEST_REST_TOKEN}", "nessie").expect("expand token");
|
|
406
|
+
|
|
407
|
+
assert_eq!(expanded, "token:pat-token");
|
|
408
|
+
std::env::remove_var("FLOE_TEST_REST_TOKEN");
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
#[test]
|
|
412
|
+
fn preserves_literal_credential_text_that_contains_env_ref_syntax() {
|
|
413
|
+
let expanded =
|
|
414
|
+
expand_env_refs("token:abc${def}ghi", "nessie").expect("preserve literal credential");
|
|
415
|
+
|
|
416
|
+
assert_eq!(expanded, "token:abc${def}ghi");
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
#[test]
|
|
420
|
+
fn errors_when_env_ref_is_missing() {
|
|
421
|
+
std::env::remove_var("FLOE_TEST_REST_MISSING");
|
|
422
|
+
|
|
423
|
+
let err = expand_env_refs(
|
|
424
|
+
"client_credentials:${FLOE_TEST_REST_MISSING}:secret",
|
|
425
|
+
"polaris",
|
|
426
|
+
)
|
|
427
|
+
.unwrap_err();
|
|
428
|
+
|
|
429
|
+
assert_eq!(
|
|
430
|
+
err.to_string(),
|
|
431
|
+
"rest iceberg catalog polaris credential references env var FLOE_TEST_REST_MISSING which is not set"
|
|
432
|
+
);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
#[test]
|
|
436
|
+
fn errors_on_malformed_env_ref() {
|
|
437
|
+
std::env::set_var("ID", "client-id");
|
|
438
|
+
|
|
439
|
+
let err = expand_env_refs(
|
|
440
|
+
"client_credentials:${ID}:literal-secret:${UNCLOSED",
|
|
441
|
+
"polaris",
|
|
442
|
+
)
|
|
443
|
+
.unwrap_err();
|
|
444
|
+
|
|
445
|
+
assert_eq!(
|
|
446
|
+
err.to_string(),
|
|
447
|
+
"rest iceberg catalog polaris credential has unclosed env placeholder"
|
|
448
|
+
);
|
|
449
|
+
assert!(!err.to_string().contains("literal-secret"));
|
|
450
|
+
std::env::remove_var("ID");
|
|
451
|
+
}
|
|
452
|
+
}
|