floe-python 0.4.5__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floe_python-0.4.5 → floe_python-0.4.6}/Cargo.lock +3 -3
- {floe_python-0.4.5 → floe_python-0.4.6}/PKG-INFO +1 -1
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/Cargo.toml +2 -2
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/format.rs +176 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/rest.rs +108 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/parquet.rs +31 -9
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/builder.rs +17 -5
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/model.rs +10 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/parse.rs +49 -5
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/types.rs +8 -0
- floe_python-0.4.6/crates/floe-core/src/run/entity/accepted_buffer.rs +251 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/accepted_write.rs +7 -48
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/mod.rs +27 -17
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/validate_split.rs +72 -8
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/archive_run.rs +93 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/local_run.rs +97 -0
- floe_python-0.4.6/crates/floe-core/tests/unit/io/format.rs +357 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/manifest/mod.rs +162 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/parse.rs +140 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +27 -13
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/Cargo.toml +2 -2
- {floe_python-0.4.5 → floe_python-0.4.6}/pyproject.toml +1 -1
- floe_python-0.4.5/crates/floe-core/tests/unit/io/format.rs +0 -19
- {floe_python-0.4.5 → floe_python-0.4.6}/Cargo.toml +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/README.md +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/README.md +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/add_entity.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/cast.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/mismatch.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/normalize.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/not_null.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/unique.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/catalog.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/location.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/parse.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/storage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/template.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/types.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/validate.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/yaml_decode.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/errors.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/avro.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/csv.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/json.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/json_selector.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/orc.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/parquet.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xlsx.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/object_store.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/s3.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/target.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/accepted.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/csv.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/options.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/metrics.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/parts.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/sink_format.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/lib.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/lineage/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/log.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/validate.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/build.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/entity.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/output.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/context.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/incremental.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/pii.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/precheck.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/process.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/resolve.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/events.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/file.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/output.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/perf.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runner/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runner/outcome.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runtime.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/state/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/vars/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/vars/resolve.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/warnings.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/delta_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/dry_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/common.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/catalogs.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/lineage_validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/parse.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/storage_resolver_uri.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/templating.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/object_store.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/storage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/checks.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/incremental.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/lineage.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/pii.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/report.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/state/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/.gitignore +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/README.md +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/functions.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/lib.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/observer.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/config.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/errors.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/mod.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/outcome.rs +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/config.yml +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/profile.yml +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/test_floe.py +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/__init__.py +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/_floe.pyi +0 -0
- {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/py.typed +0 -0
|
@@ -3399,7 +3399,7 @@ dependencies = [
|
|
|
3399
3399
|
|
|
3400
3400
|
[[package]]
|
|
3401
3401
|
name = "floe-cli"
|
|
3402
|
-
version = "0.4.
|
|
3402
|
+
version = "0.4.6"
|
|
3403
3403
|
dependencies = [
|
|
3404
3404
|
"assert_cmd",
|
|
3405
3405
|
"clap",
|
|
@@ -3412,7 +3412,7 @@ dependencies = [
|
|
|
3412
3412
|
|
|
3413
3413
|
[[package]]
|
|
3414
3414
|
name = "floe-core"
|
|
3415
|
-
version = "0.4.
|
|
3415
|
+
version = "0.4.6"
|
|
3416
3416
|
dependencies = [
|
|
3417
3417
|
"apache-avro 0.16.0",
|
|
3418
3418
|
"arrow",
|
|
@@ -3455,7 +3455,7 @@ dependencies = [
|
|
|
3455
3455
|
|
|
3456
3456
|
[[package]]
|
|
3457
3457
|
name = "floe-python"
|
|
3458
|
-
version = "0.4.
|
|
3458
|
+
version = "0.4.6"
|
|
3459
3459
|
dependencies = [
|
|
3460
3460
|
"floe-core",
|
|
3461
3461
|
"pyo3",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "floe-core"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.6"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
description = "Core library for Floe, a YAML-driven technical ingestion tool."
|
|
6
6
|
license = "MIT"
|
|
@@ -14,7 +14,7 @@ path = "src/lib.rs"
|
|
|
14
14
|
|
|
15
15
|
[dependencies]
|
|
16
16
|
yaml-rust2 = "0.11"
|
|
17
|
-
polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
|
|
17
|
+
polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "new_streaming", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
|
|
18
18
|
calamine = "0.24"
|
|
19
19
|
rayon = "1"
|
|
20
20
|
deltalake = { version = "0.30.1", features = ["datafusion", "s3", "azure", "gcs"] }
|
|
@@ -119,6 +119,182 @@ pub struct AcceptedWriteOutput {
|
|
|
119
119
|
pub perf: Option<AcceptedWritePerfBreakdown>,
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
/// Per-write sinks cap their reported `part_files` list at this many entries
|
|
123
|
+
/// (see `parquet.rs`). The reducer applies the same cap across flushes so
|
|
124
|
+
/// the run report does not grow to N × 50 entries for high-fanout entities.
|
|
125
|
+
pub const MAX_REPORTED_PART_FILES: usize = 50;
|
|
126
|
+
|
|
127
|
+
impl AcceptedWriteOutput {
|
|
128
|
+
/// Fold a later flush's output into this one. The receiver represents the
|
|
129
|
+
/// running total across N completed flushes; `next` is the output of the
|
|
130
|
+
/// (N+1)th flush.
|
|
131
|
+
///
|
|
132
|
+
/// Field semantics across flushes:
|
|
133
|
+
/// - `parts_written` (always known, the count of successful sink writes)
|
|
134
|
+
/// sums.
|
|
135
|
+
/// - `files_written` and the `Option<u64>` metric fields
|
|
136
|
+
/// (`total_bytes_written`, `small_files_count`, perf entries) sum
|
|
137
|
+
/// when *both* sides are `Some`; if either side is `None` the merged
|
|
138
|
+
/// result is `None`. "Unknown poisons" matches the per-flush
|
|
139
|
+
/// semantics: when any single flush could not determine its file
|
|
140
|
+
/// count (for example a remote Delta commit whose post-commit log
|
|
141
|
+
/// could not be read), reporting a partial sum would silently
|
|
142
|
+
/// under-count the total. The run report instead surfaces the value
|
|
143
|
+
/// as unknown.
|
|
144
|
+
/// - `part_files` concatenates and is capped at `MAX_REPORTED_PART_FILES`
|
|
145
|
+
/// so the reducer preserves the same cap the individual sink writers
|
|
146
|
+
/// apply per-flush.
|
|
147
|
+
/// - `table_version` / `snapshot_id` take the latest (Delta commit /
|
|
148
|
+
/// Iceberg snapshot move forward with every commit; the final state
|
|
149
|
+
/// is what readers see).
|
|
150
|
+
/// - `table_root_uri`, `catalog`, `schema_evolution` take the first
|
|
151
|
+
/// non-default value seen — table location and catalog registration
|
|
152
|
+
/// are established by the first write; schema evolution only fires on
|
|
153
|
+
/// the first (Overwrite) write because subsequent flushes are Append.
|
|
154
|
+
/// - `avg_file_size_mb` is recomputed from `total_bytes_written` divided
|
|
155
|
+
/// by `files_written` when available (so it matches the per-flush
|
|
156
|
+
/// semantics: for Parquet/Iceberg `files == parts`, but for Delta one
|
|
157
|
+
/// commit can write multiple `add` files and `parts != files`).
|
|
158
|
+
/// Falls back to `parts_written` when `files_written` is unknown.
|
|
159
|
+
/// - `perf` accumulates by summing each `Option<u64>` field.
|
|
160
|
+
/// - `merge` is unreachable in the buffered path (merge modes use the
|
|
161
|
+
/// legacy accumulate-then-write code path); the running value is
|
|
162
|
+
/// preserved if anything ever does pass one.
|
|
163
|
+
pub fn merge_in(&mut self, next: AcceptedWriteOutput) {
|
|
164
|
+
let AcceptedWriteOutput {
|
|
165
|
+
files_written,
|
|
166
|
+
parts_written,
|
|
167
|
+
part_files,
|
|
168
|
+
table_version,
|
|
169
|
+
snapshot_id,
|
|
170
|
+
table_root_uri,
|
|
171
|
+
catalog,
|
|
172
|
+
metrics,
|
|
173
|
+
merge,
|
|
174
|
+
schema_evolution,
|
|
175
|
+
perf,
|
|
176
|
+
} = next;
|
|
177
|
+
|
|
178
|
+
// `parts_written == 0` on the receiver means no prior flush has been
|
|
179
|
+
// merged. In that case `Option<u64>` fields on `self` start at `None`
|
|
180
|
+
// not because a flush returned unknown but because nothing has been
|
|
181
|
+
// recorded yet — distinguishing "vacuous" from "poisoned" matters
|
|
182
|
+
// because adopting the next flush's value verbatim on the first merge
|
|
183
|
+
// is correct, while applying poison-on-unknown semantics from `None`
|
|
184
|
+
// would always poison the very first merge.
|
|
185
|
+
let first_merge = self.parts_written == 0;
|
|
186
|
+
|
|
187
|
+
self.files_written = merge_option_u64(self.files_written, files_written, first_merge);
|
|
188
|
+
self.parts_written += parts_written;
|
|
189
|
+
let remaining = MAX_REPORTED_PART_FILES.saturating_sub(self.part_files.len());
|
|
190
|
+
if remaining > 0 {
|
|
191
|
+
self.part_files
|
|
192
|
+
.extend(part_files.into_iter().take(remaining));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if table_version.is_some() {
|
|
196
|
+
self.table_version = table_version;
|
|
197
|
+
}
|
|
198
|
+
if snapshot_id.is_some() {
|
|
199
|
+
self.snapshot_id = snapshot_id;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if self.table_root_uri.is_none() {
|
|
203
|
+
self.table_root_uri = table_root_uri;
|
|
204
|
+
}
|
|
205
|
+
if self.catalog.is_none() {
|
|
206
|
+
self.catalog = catalog;
|
|
207
|
+
}
|
|
208
|
+
if !self.schema_evolution.enabled
|
|
209
|
+
&& !self.schema_evolution.applied
|
|
210
|
+
&& self.schema_evolution.added_columns.is_empty()
|
|
211
|
+
&& !self.schema_evolution.incompatible_changes_detected
|
|
212
|
+
&& self.schema_evolution.mode.is_empty()
|
|
213
|
+
{
|
|
214
|
+
self.schema_evolution = schema_evolution;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
self.metrics.total_bytes_written = merge_option_u64(
|
|
218
|
+
self.metrics.total_bytes_written,
|
|
219
|
+
metrics.total_bytes_written,
|
|
220
|
+
first_merge,
|
|
221
|
+
);
|
|
222
|
+
self.metrics.small_files_count = merge_option_u64(
|
|
223
|
+
self.metrics.small_files_count,
|
|
224
|
+
metrics.small_files_count,
|
|
225
|
+
first_merge,
|
|
226
|
+
);
|
|
227
|
+
self.metrics.avg_file_size_mb = recompute_avg_file_size_mb(
|
|
228
|
+
self.metrics.total_bytes_written,
|
|
229
|
+
self.files_written,
|
|
230
|
+
self.parts_written,
|
|
231
|
+
);
|
|
232
|
+
|
|
233
|
+
if self.merge.is_none() {
|
|
234
|
+
self.merge = merge;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
match (self.perf.take(), perf) {
|
|
238
|
+
(Some(a), Some(b)) => self.perf = Some(sum_perf_breakdown(a, b)),
|
|
239
|
+
(Some(a), None) => self.perf = Some(a),
|
|
240
|
+
(None, Some(b)) => self.perf = Some(b),
|
|
241
|
+
(None, None) => self.perf = None,
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/// Sum two `Option<u64>` values with poison-on-unknown semantics: if either
|
|
247
|
+
/// side is `None`, the result is `None`. Reporting a partial sum as if it
|
|
248
|
+
/// were the total would silently under-count for any aggregation across
|
|
249
|
+
/// flushes where one flush could not determine the underlying count
|
|
250
|
+
/// (e.g. remote Delta commit-log read failures).
|
|
251
|
+
fn sum_option_u64(a: Option<u64>, b: Option<u64>) -> Option<u64> {
|
|
252
|
+
match (a, b) {
|
|
253
|
+
(Some(a), Some(b)) => Some(a + b),
|
|
254
|
+
_ => None,
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/// Progressive `Option<u64>` merge used by `merge_in`. On the first merge
|
|
259
|
+
/// (when the accumulator has no flush recorded yet) the next flush's value is
|
|
260
|
+
/// taken verbatim; on subsequent merges `sum_option_u64`'s poison-on-unknown
|
|
261
|
+
/// semantics apply.
|
|
262
|
+
fn merge_option_u64(acc: Option<u64>, next: Option<u64>, first_merge: bool) -> Option<u64> {
|
|
263
|
+
if first_merge {
|
|
264
|
+
next
|
|
265
|
+
} else {
|
|
266
|
+
sum_option_u64(acc, next)
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
fn recompute_avg_file_size_mb(
|
|
271
|
+
total_bytes: Option<u64>,
|
|
272
|
+
files_written: Option<u64>,
|
|
273
|
+
parts_written: u64,
|
|
274
|
+
) -> Option<f64> {
|
|
275
|
+
let bytes = total_bytes?;
|
|
276
|
+
let denominator = files_written.unwrap_or(parts_written);
|
|
277
|
+
if denominator == 0 {
|
|
278
|
+
return None;
|
|
279
|
+
}
|
|
280
|
+
let mb = (bytes as f64) / (denominator as f64) / (1024.0 * 1024.0);
|
|
281
|
+
Some(mb)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn sum_perf_breakdown(
|
|
285
|
+
a: AcceptedWritePerfBreakdown,
|
|
286
|
+
b: AcceptedWritePerfBreakdown,
|
|
287
|
+
) -> AcceptedWritePerfBreakdown {
|
|
288
|
+
AcceptedWritePerfBreakdown {
|
|
289
|
+
conversion_ms: sum_option_u64(a.conversion_ms, b.conversion_ms),
|
|
290
|
+
source_df_build_ms: sum_option_u64(a.source_df_build_ms, b.source_df_build_ms),
|
|
291
|
+
merge_exec_ms: sum_option_u64(a.merge_exec_ms, b.merge_exec_ms),
|
|
292
|
+
data_write_ms: sum_option_u64(a.data_write_ms, b.data_write_ms),
|
|
293
|
+
commit_ms: sum_option_u64(a.commit_ms, b.commit_ms),
|
|
294
|
+
metrics_read_ms: sum_option_u64(a.metrics_read_ms, b.metrics_read_ms),
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
122
298
|
pub trait InputAdapter: Send + Sync {
|
|
123
299
|
fn format(&self) -> &'static str;
|
|
124
300
|
|
|
@@ -45,6 +45,7 @@ pub(crate) async fn build_rest_catalog(
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
if let Some(credential) = rest_cfg.credential.as_deref() {
|
|
48
|
+
let credential = expand_env_refs(credential, &rest_cfg.catalog_name)?;
|
|
48
49
|
if let Some(token_value) = credential.strip_prefix("token:") {
|
|
49
50
|
// Bearer PAT (Unity Catalog / Nessie)
|
|
50
51
|
props.insert("token".to_string(), token_value.to_string());
|
|
@@ -140,6 +141,39 @@ impl RestIcebergCatalogConfig {
|
|
|
140
141
|
}
|
|
141
142
|
}
|
|
142
143
|
|
|
144
|
+
fn expand_env_refs(value: &str, catalog_name: &str) -> FloeResult<String> {
|
|
145
|
+
if !value.contains("${") {
|
|
146
|
+
return Ok(value.to_string());
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
let mut parts = Vec::new();
|
|
150
|
+
for part in value.split(':') {
|
|
151
|
+
parts.push(expand_env_ref_part(part, catalog_name)?);
|
|
152
|
+
}
|
|
153
|
+
Ok(parts.join(":"))
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
fn expand_env_ref_part(part: &str, catalog_name: &str) -> FloeResult<String> {
|
|
157
|
+
let Some(inner) = part.strip_prefix("${") else {
|
|
158
|
+
return Ok(part.to_string());
|
|
159
|
+
};
|
|
160
|
+
let Some(name) = inner.strip_suffix('}') else {
|
|
161
|
+
return Err(Box::new(RunError(format!(
|
|
162
|
+
"rest iceberg catalog {catalog_name} credential has unclosed env placeholder"
|
|
163
|
+
))));
|
|
164
|
+
};
|
|
165
|
+
if name.is_empty() || name.contains('{') || name.contains('}') {
|
|
166
|
+
return Err(Box::new(RunError(format!(
|
|
167
|
+
"rest iceberg catalog {catalog_name} credential has invalid env placeholder"
|
|
168
|
+
))));
|
|
169
|
+
}
|
|
170
|
+
std::env::var(name).map_err(|_| {
|
|
171
|
+
Box::new(RunError(format!(
|
|
172
|
+
"rest iceberg catalog {catalog_name} credential references env var {name} which is not set"
|
|
173
|
+
))) as Box<dyn std::error::Error + Send + Sync>
|
|
174
|
+
})
|
|
175
|
+
}
|
|
176
|
+
|
|
143
177
|
pub(crate) async fn write_via_rest_catalog(
|
|
144
178
|
rest_cfg: &RestIcebergCatalogConfig,
|
|
145
179
|
table_root_uri: String,
|
|
@@ -342,3 +376,77 @@ async fn create_rest_table(
|
|
|
342
376
|
.await
|
|
343
377
|
.map_err(map_iceberg_err("rest catalog create_table failed"))
|
|
344
378
|
}
|
|
379
|
+
|
|
380
|
+
#[cfg(test)]
|
|
381
|
+
mod tests {
|
|
382
|
+
use super::expand_env_refs;
|
|
383
|
+
|
|
384
|
+
#[test]
|
|
385
|
+
fn expands_partial_env_refs_in_client_credentials() {
|
|
386
|
+
std::env::set_var("FLOE_TEST_REST_CLIENT_ID", "client-id");
|
|
387
|
+
std::env::set_var("FLOE_TEST_REST_CLIENT_SECRET", "client-secret");
|
|
388
|
+
|
|
389
|
+
let expanded = expand_env_refs(
|
|
390
|
+
"client_credentials:${FLOE_TEST_REST_CLIENT_ID}:${FLOE_TEST_REST_CLIENT_SECRET}",
|
|
391
|
+
"polaris",
|
|
392
|
+
)
|
|
393
|
+
.expect("expand credential");
|
|
394
|
+
|
|
395
|
+
assert_eq!(expanded, "client_credentials:client-id:client-secret");
|
|
396
|
+
std::env::remove_var("FLOE_TEST_REST_CLIENT_ID");
|
|
397
|
+
std::env::remove_var("FLOE_TEST_REST_CLIENT_SECRET");
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
#[test]
|
|
401
|
+
fn expands_exact_env_ref_in_token_credential() {
|
|
402
|
+
std::env::set_var("FLOE_TEST_REST_TOKEN", "pat-token");
|
|
403
|
+
|
|
404
|
+
let expanded =
|
|
405
|
+
expand_env_refs("token:${FLOE_TEST_REST_TOKEN}", "nessie").expect("expand token");
|
|
406
|
+
|
|
407
|
+
assert_eq!(expanded, "token:pat-token");
|
|
408
|
+
std::env::remove_var("FLOE_TEST_REST_TOKEN");
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
#[test]
|
|
412
|
+
fn preserves_literal_credential_text_that_contains_env_ref_syntax() {
|
|
413
|
+
let expanded =
|
|
414
|
+
expand_env_refs("token:abc${def}ghi", "nessie").expect("preserve literal credential");
|
|
415
|
+
|
|
416
|
+
assert_eq!(expanded, "token:abc${def}ghi");
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
#[test]
|
|
420
|
+
fn errors_when_env_ref_is_missing() {
|
|
421
|
+
std::env::remove_var("FLOE_TEST_REST_MISSING");
|
|
422
|
+
|
|
423
|
+
let err = expand_env_refs(
|
|
424
|
+
"client_credentials:${FLOE_TEST_REST_MISSING}:secret",
|
|
425
|
+
"polaris",
|
|
426
|
+
)
|
|
427
|
+
.unwrap_err();
|
|
428
|
+
|
|
429
|
+
assert_eq!(
|
|
430
|
+
err.to_string(),
|
|
431
|
+
"rest iceberg catalog polaris credential references env var FLOE_TEST_REST_MISSING which is not set"
|
|
432
|
+
);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
#[test]
|
|
436
|
+
fn errors_on_malformed_env_ref() {
|
|
437
|
+
std::env::set_var("ID", "client-id");
|
|
438
|
+
|
|
439
|
+
let err = expand_env_refs(
|
|
440
|
+
"client_credentials:${ID}:literal-secret:${UNCLOSED",
|
|
441
|
+
"polaris",
|
|
442
|
+
)
|
|
443
|
+
.unwrap_err();
|
|
444
|
+
|
|
445
|
+
assert_eq!(
|
|
446
|
+
err.to_string(),
|
|
447
|
+
"rest iceberg catalog polaris credential has unclosed env placeholder"
|
|
448
|
+
);
|
|
449
|
+
assert!(!err.to_string().contains("literal-secret"));
|
|
450
|
+
std::env::remove_var("ID");
|
|
451
|
+
}
|
|
452
|
+
}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
use std::path::Path;
|
|
2
2
|
|
|
3
|
-
use polars::
|
|
3
|
+
use polars::polars_utils::plpath::PlPathRef;
|
|
4
|
+
use polars::prelude::{
|
|
5
|
+
DataFrame, IntoLazy, ParquetCompression, ParquetWriteOptions, SinkOptions as PolarsSinkOptions,
|
|
6
|
+
SinkTarget,
|
|
7
|
+
};
|
|
4
8
|
|
|
5
9
|
use crate::checks::normalize::rename_output_columns;
|
|
6
10
|
use crate::errors::{IoError, StorageError};
|
|
@@ -46,11 +50,32 @@ pub fn write_parquet_to_path(
|
|
|
46
50
|
if let Some(parent) = output_path.parent() {
|
|
47
51
|
std::fs::create_dir_all(parent)?;
|
|
48
52
|
}
|
|
49
|
-
let
|
|
50
|
-
let
|
|
53
|
+
let write_options = build_parquet_write_options(options)?;
|
|
54
|
+
let sink_options = PolarsSinkOptions {
|
|
55
|
+
mkdir: false,
|
|
56
|
+
..PolarsSinkOptions::default()
|
|
57
|
+
};
|
|
58
|
+
let target = SinkTarget::Path(PlPathRef::from_local_path(output_path).into_owned());
|
|
59
|
+
// The outer chunking loop in `ParquetSinkFormat::write` discards each
|
|
60
|
+
// chunk after the write, so taking the DataFrame here is safe. Using
|
|
61
|
+
// `std::mem::take` lets us hand ownership to `LazyFrame` without an
|
|
62
|
+
// extra clone of the Arrow buffers.
|
|
63
|
+
let frame = std::mem::take(df);
|
|
64
|
+
frame
|
|
65
|
+
.lazy()
|
|
66
|
+
.sink_parquet(target, write_options, None, sink_options)
|
|
67
|
+
.and_then(|lf| lf.with_new_streaming(true).collect())
|
|
68
|
+
.map_err(|err| Box::new(IoError(format!("parquet write failed: {err}"))))?;
|
|
69
|
+
Ok(())
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
fn build_parquet_write_options(
|
|
73
|
+
options: Option<&config::SinkOptions>,
|
|
74
|
+
) -> FloeResult<ParquetWriteOptions> {
|
|
75
|
+
let mut write_options = ParquetWriteOptions::default();
|
|
51
76
|
if let Some(options) = options {
|
|
52
77
|
if let Some(compression) = &options.compression {
|
|
53
|
-
|
|
78
|
+
write_options.compression = parse_parquet_compression(compression)?;
|
|
54
79
|
}
|
|
55
80
|
if let Some(row_group_size) = options.row_group_size {
|
|
56
81
|
let row_group_size = usize::try_from(row_group_size).map_err(|_| {
|
|
@@ -58,13 +83,10 @@ pub fn write_parquet_to_path(
|
|
|
58
83
|
"parquet row_group_size is too large: {row_group_size}"
|
|
59
84
|
)))
|
|
60
85
|
})?;
|
|
61
|
-
|
|
86
|
+
write_options.row_group_size = Some(row_group_size);
|
|
62
87
|
}
|
|
63
88
|
}
|
|
64
|
-
|
|
65
|
-
.finish(df)
|
|
66
|
-
.map_err(|err| Box::new(IoError(format!("parquet write failed: {err}"))))?;
|
|
67
|
-
Ok(())
|
|
89
|
+
Ok(write_options)
|
|
68
90
|
}
|
|
69
91
|
|
|
70
92
|
impl SinkFormat for ParquetSinkFormat {
|
|
@@ -6,9 +6,9 @@ use sha2::{Digest, Sha256};
|
|
|
6
6
|
use crate::config::{ConfigLocation, RootConfig, SourceOptions, StorageResolver};
|
|
7
7
|
use crate::manifest::model::{
|
|
8
8
|
CommonManifest, ManifestArchiveTarget, ManifestColumnDef, ManifestDomain, ManifestEntity,
|
|
9
|
-
ManifestEntitySchema, ManifestExecution, ManifestExecutionDefaults,
|
|
10
|
-
ManifestRunnerAuth, ManifestRunnerDefinition, ManifestRunnerResources,
|
|
11
|
-
ManifestRunners, ManifestSinkTarget, ManifestSinks, ManifestSource,
|
|
9
|
+
ManifestEntitySchema, ManifestExecution, ManifestExecutionDefaults, ManifestOrchestration,
|
|
10
|
+
ManifestResultContract, ManifestRunnerAuth, ManifestRunnerDefinition, ManifestRunnerResources,
|
|
11
|
+
ManifestRunnerSecret, ManifestRunners, ManifestSinkTarget, ManifestSinks, ManifestSource,
|
|
12
12
|
};
|
|
13
13
|
use crate::profile::ProfileConfig;
|
|
14
14
|
use crate::FloeResult;
|
|
@@ -415,7 +415,7 @@ fn build_common_manifest(
|
|
|
415
415
|
.unwrap_or_else(|| domain.incoming_dir.clone()),
|
|
416
416
|
})
|
|
417
417
|
.collect(),
|
|
418
|
-
execution: default_execution_contract(options),
|
|
418
|
+
execution: default_execution_contract(options, profile),
|
|
419
419
|
runners: runners_contract(profile),
|
|
420
420
|
entities: manifest_entities,
|
|
421
421
|
storages,
|
|
@@ -525,7 +525,10 @@ fn map_source_options(options: Option<&SourceOptions>) -> Option<serde_json::Val
|
|
|
525
525
|
Some(serde_json::Value::Object(map))
|
|
526
526
|
}
|
|
527
527
|
|
|
528
|
-
fn default_execution_contract(
|
|
528
|
+
fn default_execution_contract(
|
|
529
|
+
options: &ManifestOptions,
|
|
530
|
+
profile: Option<&ProfileConfig>,
|
|
531
|
+
) -> ManifestExecution {
|
|
529
532
|
let mut exit_codes = BTreeMap::new();
|
|
530
533
|
exit_codes.insert("0", "success_or_rejected");
|
|
531
534
|
exit_codes.insert("1", "technical_failure");
|
|
@@ -554,6 +557,14 @@ fn default_execution_contract(options: &ManifestOptions) -> ManifestExecution {
|
|
|
554
557
|
})
|
|
555
558
|
.collect();
|
|
556
559
|
|
|
560
|
+
let orchestration = profile
|
|
561
|
+
.and_then(|p| p.execution.as_ref())
|
|
562
|
+
.and_then(|e| e.orchestration.as_ref())
|
|
563
|
+
.map(|o| ManifestOrchestration {
|
|
564
|
+
max_concurrent_entities: o.max_concurrent_entities,
|
|
565
|
+
strategy: o.strategy.clone(),
|
|
566
|
+
});
|
|
567
|
+
|
|
557
568
|
ManifestExecution {
|
|
558
569
|
entrypoint: "floe",
|
|
559
570
|
base_args,
|
|
@@ -568,6 +579,7 @@ fn default_execution_contract(options: &ManifestOptions) -> ManifestExecution {
|
|
|
568
579
|
env: BTreeMap::new(),
|
|
569
580
|
workdir: None,
|
|
570
581
|
},
|
|
582
|
+
orchestration,
|
|
571
583
|
}
|
|
572
584
|
}
|
|
573
585
|
|
|
@@ -49,6 +49,16 @@ pub struct ManifestExecution {
|
|
|
49
49
|
pub log_format: &'static str,
|
|
50
50
|
pub result_contract: ManifestResultContract,
|
|
51
51
|
pub defaults: ManifestExecutionDefaults,
|
|
52
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
53
|
+
pub orchestration: Option<ManifestOrchestration>,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
#[derive(Debug, Serialize)]
|
|
57
|
+
pub struct ManifestOrchestration {
|
|
58
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
59
|
+
pub max_concurrent_entities: Option<u64>,
|
|
60
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
61
|
+
pub strategy: Option<String>,
|
|
52
62
|
}
|
|
53
63
|
|
|
54
64
|
#[derive(Debug, Serialize)]
|
|
@@ -8,9 +8,9 @@ use crate::config::yaml_decode::{
|
|
|
8
8
|
hash_get, load_yaml, validate_known_keys, yaml_array, yaml_hash, yaml_string,
|
|
9
9
|
};
|
|
10
10
|
use crate::profile::types::{
|
|
11
|
-
ProfileConfig, ProfileExecution, ProfileMetadata,
|
|
12
|
-
ProfileRunnerResources, ProfileRunnerSecret, ProfileValidation,
|
|
13
|
-
PROFILE_KIND,
|
|
11
|
+
ProfileConfig, ProfileExecution, ProfileMetadata, ProfileOrchestration, ProfileRunner,
|
|
12
|
+
ProfileRunnerAuth, ProfileRunnerResources, ProfileRunnerSecret, ProfileValidation,
|
|
13
|
+
PROFILE_API_VERSION, PROFILE_KIND,
|
|
14
14
|
};
|
|
15
15
|
use crate::{ConfigError, FloeResult};
|
|
16
16
|
|
|
@@ -162,7 +162,7 @@ fn parse_metadata(value: &Yaml) -> FloeResult<ProfileMetadata> {
|
|
|
162
162
|
|
|
163
163
|
fn parse_execution(value: &Yaml) -> FloeResult<ProfileExecution> {
|
|
164
164
|
let hash = yaml_hash(value, "profile.execution")?;
|
|
165
|
-
validate_known_keys(hash, "profile.execution", &["runner"])?;
|
|
165
|
+
validate_known_keys(hash, "profile.execution", &["runner", "orchestration"])?;
|
|
166
166
|
|
|
167
167
|
let runner_yaml = hash_get(hash, "runner").ok_or_else(|| {
|
|
168
168
|
Box::new(ConfigError(
|
|
@@ -171,7 +171,51 @@ fn parse_execution(value: &Yaml) -> FloeResult<ProfileExecution> {
|
|
|
171
171
|
})?;
|
|
172
172
|
let runner = parse_runner(runner_yaml)?;
|
|
173
173
|
|
|
174
|
-
|
|
174
|
+
let orchestration = match hash_get(hash, "orchestration") {
|
|
175
|
+
Some(value) => Some(parse_orchestration(value)?),
|
|
176
|
+
None => None,
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
Ok(ProfileExecution {
|
|
180
|
+
runner,
|
|
181
|
+
orchestration,
|
|
182
|
+
})
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
fn parse_orchestration(value: &Yaml) -> FloeResult<ProfileOrchestration> {
|
|
186
|
+
let hash = yaml_hash(value, "profile.execution.orchestration")?;
|
|
187
|
+
validate_known_keys(
|
|
188
|
+
hash,
|
|
189
|
+
"profile.execution.orchestration",
|
|
190
|
+
&["max_concurrent_entities", "strategy"],
|
|
191
|
+
)?;
|
|
192
|
+
|
|
193
|
+
let max_concurrent_entities = get_optional_u64(
|
|
194
|
+
hash,
|
|
195
|
+
"max_concurrent_entities",
|
|
196
|
+
"profile.execution.orchestration",
|
|
197
|
+
)?;
|
|
198
|
+
|
|
199
|
+
if let Some(0) = max_concurrent_entities {
|
|
200
|
+
return Err(Box::new(ConfigError(
|
|
201
|
+
"profile.execution.orchestration.max_concurrent_entities: must be >= 1".to_string(),
|
|
202
|
+
)));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
let strategy = get_optional_string(hash, "strategy", "profile.execution.orchestration")?;
|
|
206
|
+
|
|
207
|
+
if let Some(ref s) = strategy {
|
|
208
|
+
if s != "sequential" && s != "parallel" {
|
|
209
|
+
return Err(Box::new(ConfigError(format!(
|
|
210
|
+
"profile.execution.orchestration.strategy: expected \"sequential\" or \"parallel\", got \"{s}\""
|
|
211
|
+
))));
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
Ok(ProfileOrchestration {
|
|
216
|
+
max_concurrent_entities,
|
|
217
|
+
strategy,
|
|
218
|
+
})
|
|
175
219
|
}
|
|
176
220
|
|
|
177
221
|
fn parse_runner(value: &Yaml) -> FloeResult<ProfileRunner> {
|
|
@@ -27,6 +27,14 @@ pub struct ProfileMetadata {
|
|
|
27
27
|
#[derive(Debug, Clone)]
|
|
28
28
|
pub struct ProfileExecution {
|
|
29
29
|
pub runner: ProfileRunner,
|
|
30
|
+
pub orchestration: Option<ProfileOrchestration>,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#[derive(Debug, Clone)]
|
|
34
|
+
pub struct ProfileOrchestration {
|
|
35
|
+
pub max_concurrent_entities: Option<u64>,
|
|
36
|
+
/// "sequential" | "parallel"
|
|
37
|
+
pub strategy: Option<String>,
|
|
30
38
|
}
|
|
31
39
|
|
|
32
40
|
#[derive(Debug, Clone)]
|