floe-python 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floe_python-0.4.2 → floe_python-0.4.3}/Cargo.lock +3 -3
- {floe_python-0.4.2 → floe_python-0.4.3}/PKG-INFO +42 -1
- {floe_python-0.4.2/crates/floe-python → floe_python-0.4.3}/README.md +41 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/Cargo.toml +1 -1
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/location.rs +1 -1
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/mod.rs +1 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/parse.rs +2 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/types.rs +1 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/lib.rs +9 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/lineage/mod.rs +141 -44
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/context.rs +28 -1
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/mod.rs +4 -2
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/lineage_validation.rs +1 -0
- floe_python-0.4.3/crates/floe-core/tests/unit/run/lineage.rs +923 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/Cargo.toml +2 -2
- {floe_python-0.4.2 → floe_python-0.4.3/crates/floe-python}/README.md +41 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/pyproject.toml +1 -1
- floe_python-0.4.2/crates/floe-core/tests/unit/run/lineage.rs +0 -366
- {floe_python-0.4.2 → floe_python-0.4.3}/Cargo.toml +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/README.md +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/add_entity.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/cast.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/mismatch.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/normalize.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/not_null.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/checks/unique.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/catalog.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/storage.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/template.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/validate.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/config/yaml_decode.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/errors.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/format.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/avro.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/csv.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/json.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/json_selector.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/orc.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/parquet.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/xlsx.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/xml.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/object_store.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/providers/s3.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/storage/target.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/accepted.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/csv.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta/options.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/delta.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/rest.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/iceberg.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/metrics.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/parquet.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/parts.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/sink_format.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/log.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/manifest/builder.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/manifest/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/manifest/model.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/profile/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/profile/parse.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/profile/types.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/profile/validate.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/report/build.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/report/entity.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/report/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/report/output.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/accepted_write.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/incremental.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/pii.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/precheck.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/process.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/resolve.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/entity/validate_split.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/events.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/file.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/output.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/run/perf.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/runner/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/runner/outcome.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/runtime.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/state/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/vars/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/vars/resolve.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/src/warnings.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/archive_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/delta_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/dry_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/local_run.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/integration.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/common.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/adls_validation.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/catalogs.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/parse.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/config/templating.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/format.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/object_store.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/manifest/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/profile/parse.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/report/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/report/storage.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/checks.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/entity/incremental.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/pii.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/report.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/state/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-core/tests/unit.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/.gitignore +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/functions.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/lib.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/observer.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/types/config.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/types/errors.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/types/mod.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/src/types/outcome.rs +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/tests/fixtures/config.yml +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/tests/fixtures/profile.yml +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/crates/floe-python/tests/test_floe.py +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/python/floe/__init__.py +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/python/floe/_floe.pyi +0 -0
- {floe_python-0.4.2 → floe_python-0.4.3}/python/floe/py.typed +0 -0
|
@@ -3399,7 +3399,7 @@ dependencies = [
|
|
|
3399
3399
|
|
|
3400
3400
|
[[package]]
|
|
3401
3401
|
name = "floe-cli"
|
|
3402
|
-
version = "0.4.
|
|
3402
|
+
version = "0.4.3"
|
|
3403
3403
|
dependencies = [
|
|
3404
3404
|
"assert_cmd",
|
|
3405
3405
|
"clap",
|
|
@@ -3412,7 +3412,7 @@ dependencies = [
|
|
|
3412
3412
|
|
|
3413
3413
|
[[package]]
|
|
3414
3414
|
name = "floe-core"
|
|
3415
|
-
version = "0.4.
|
|
3415
|
+
version = "0.4.3"
|
|
3416
3416
|
dependencies = [
|
|
3417
3417
|
"apache-avro 0.16.0",
|
|
3418
3418
|
"arrow",
|
|
@@ -3455,7 +3455,7 @@ dependencies = [
|
|
|
3455
3455
|
|
|
3456
3456
|
[[package]]
|
|
3457
3457
|
name = "floe-python"
|
|
3458
|
-
version = "0.4.
|
|
3458
|
+
version = "0.4.3"
|
|
3459
3459
|
dependencies = [
|
|
3460
3460
|
"floe-core",
|
|
3461
3461
|
"pyo3",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: floe-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -98,6 +98,43 @@ except floe.FloeError as e:
|
|
|
98
98
|
| `set_observer(callback)` | Register a live-event callback |
|
|
99
99
|
| `clear_observer()` | Remove the current callback |
|
|
100
100
|
|
|
101
|
+
## Jupyter
|
|
102
|
+
|
|
103
|
+
`RunOutcome` renders as a color-coded HTML table automatically in Jupyter — no extra code needed. Just end a cell with the variable:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
outcome = floe.run("orders.yml")
|
|
107
|
+
outcome # renders inline HTML table with per-entity status, accepted/rejected counts
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Use `outcome.to_dict()` to turn results into a plain dict for pandas:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
import pandas as pd
|
|
114
|
+
df = pd.DataFrame(outcome.entity_reports)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Observing progress
|
|
118
|
+
|
|
119
|
+
Register a callback to receive live events as the run proceeds:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
floe.set_observer(lambda e: print(f"[{e['event']}]", e.get("name", e.get("entity", ""))))
|
|
123
|
+
outcome = floe.run("orders.yml")
|
|
124
|
+
floe.clear_observer()
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Event types: `run_started`, `entity_started`, `file_started`, `file_finished`, `schema_evolution_applied`, `entity_finished`, `run_finished`, `log`. See the [full guide](../../docs/python-bindings.md#observing-runs-in-real-time) for all event fields.
|
|
128
|
+
|
|
129
|
+
## Profile overrides
|
|
130
|
+
|
|
131
|
+
Override config variables or cloud credentials without editing the YAML:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
floe.run("orders.yml", profile_vars={"incoming_root": "s3://my-bucket/incoming"})
|
|
135
|
+
floe.run("orders.yml", profile_path="prod.yml")
|
|
136
|
+
```
|
|
137
|
+
|
|
101
138
|
## Building from source
|
|
102
139
|
|
|
103
140
|
```bash
|
|
@@ -111,3 +148,7 @@ maturin develop
|
|
|
111
148
|
|
|
112
149
|
Apache 2.0
|
|
113
150
|
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
→ Full API reference and examples: [docs/python-bindings.md](../../docs/python-bindings.md)
|
|
154
|
+
|
|
@@ -70,6 +70,43 @@ except floe.FloeError as e:
|
|
|
70
70
|
| `set_observer(callback)` | Register a live-event callback |
|
|
71
71
|
| `clear_observer()` | Remove the current callback |
|
|
72
72
|
|
|
73
|
+
## Jupyter
|
|
74
|
+
|
|
75
|
+
`RunOutcome` renders as a color-coded HTML table automatically in Jupyter — no extra code needed. Just end a cell with the variable:
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
outcome = floe.run("orders.yml")
|
|
79
|
+
outcome # renders inline HTML table with per-entity status, accepted/rejected counts
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Use `outcome.to_dict()` to turn results into a plain dict for pandas:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import pandas as pd
|
|
86
|
+
df = pd.DataFrame(outcome.entity_reports)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Observing progress
|
|
90
|
+
|
|
91
|
+
Register a callback to receive live events as the run proceeds:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
floe.set_observer(lambda e: print(f"[{e['event']}]", e.get("name", e.get("entity", ""))))
|
|
95
|
+
outcome = floe.run("orders.yml")
|
|
96
|
+
floe.clear_observer()
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Event types: `run_started`, `entity_started`, `file_started`, `file_finished`, `schema_evolution_applied`, `entity_finished`, `run_finished`, `log`. See the [full guide](../../docs/python-bindings.md#observing-runs-in-real-time) for all event fields.
|
|
100
|
+
|
|
101
|
+
## Profile overrides
|
|
102
|
+
|
|
103
|
+
Override config variables or cloud credentials without editing the YAML:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
floe.run("orders.yml", profile_vars={"incoming_root": "s3://my-bucket/incoming"})
|
|
107
|
+
floe.run("orders.yml", profile_path="prod.yml")
|
|
108
|
+
```
|
|
109
|
+
|
|
73
110
|
## Building from source
|
|
74
111
|
|
|
75
112
|
```bash
|
|
@@ -82,3 +119,7 @@ maturin develop
|
|
|
82
119
|
## License
|
|
83
120
|
|
|
84
121
|
Apache 2.0
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
→ Full API reference and examples: [docs/python-bindings.md](../../docs/python-bindings.md)
|
|
@@ -70,6 +70,6 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
|
|
|
70
70
|
Err(format!("unsupported config uri: {}", uri).into())
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
fn is_remote_uri(value: &str) -> bool {
|
|
73
|
+
pub(crate) fn is_remote_uri(value: &str) -> bool {
|
|
74
74
|
value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
|
|
75
75
|
}
|
|
@@ -8,6 +8,7 @@ mod validate;
|
|
|
8
8
|
pub(crate) mod yaml_decode;
|
|
9
9
|
|
|
10
10
|
pub use catalog::{CatalogResolver, ResolvedDeltaCatalogTarget, ResolvedIcebergCatalogTarget};
|
|
11
|
+
pub(crate) use location::is_remote_uri;
|
|
11
12
|
pub use location::{resolve_config_location, ConfigLocation};
|
|
12
13
|
pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver};
|
|
13
14
|
pub use types::*;
|
|
@@ -1141,6 +1141,7 @@ pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
|
1141
1141
|
"namespace",
|
|
1142
1142
|
"producer",
|
|
1143
1143
|
"max_failures",
|
|
1144
|
+
"job_name",
|
|
1144
1145
|
],
|
|
1145
1146
|
)?;
|
|
1146
1147
|
Ok(LineageConfig {
|
|
@@ -1150,5 +1151,6 @@ pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
|
|
|
1150
1151
|
namespace: get_string(hash, "namespace", "lineage")?,
|
|
1151
1152
|
producer: opt_string(hash, "producer", "lineage")?,
|
|
1152
1153
|
max_failures: opt_u32(hash, "max_failures", "lineage")?,
|
|
1154
|
+
job_name: opt_string(hash, "job_name", "lineage")?,
|
|
1153
1155
|
})
|
|
1154
1156
|
}
|
|
@@ -84,6 +84,15 @@ pub fn load_config(config_path: &Path) -> FloeResult<config::RootConfig> {
|
|
|
84
84
|
config::parse_config(config_path)
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
+
/// Read manifest JSON from any supported URI (local path, `s3://`, `gs://`, `abfs://`).
|
|
88
|
+
/// For remote URIs the file is downloaded to a temp directory that is cleaned up before
|
|
89
|
+
/// this function returns; the caller receives the raw JSON text as a `String`.
|
|
90
|
+
pub fn read_manifest_text(uri: &str) -> FloeResult<String> {
|
|
91
|
+
let location = config::resolve_config_location(uri)?;
|
|
92
|
+
let text = std::fs::read_to_string(&location.path)?;
|
|
93
|
+
Ok(text)
|
|
94
|
+
}
|
|
95
|
+
|
|
87
96
|
pub fn load_config_with_profile_vars(
|
|
88
97
|
config_path: &Path,
|
|
89
98
|
profile_vars: &std::collections::HashMap<String, String>,
|
|
@@ -8,6 +8,18 @@ use serde_json::{json, Value};
|
|
|
8
8
|
use crate::config::{EntityConfig, LineageConfig};
|
|
9
9
|
use crate::run::events::{RunEvent, RunObserver};
|
|
10
10
|
|
|
11
|
+
const DEFAULT_PRODUCER: &str = concat!(
|
|
12
|
+
"https://github.com/malon64/floe/releases/tag/v",
|
|
13
|
+
env!("CARGO_PKG_VERSION")
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
#[derive(Clone)]
|
|
17
|
+
struct ColumnMapping {
|
|
18
|
+
output_name: String,
|
|
19
|
+
column_type: String,
|
|
20
|
+
source_field: Option<String>,
|
|
21
|
+
}
|
|
22
|
+
|
|
11
23
|
struct EntityUris {
|
|
12
24
|
source: String,
|
|
13
25
|
accepted: String,
|
|
@@ -20,14 +32,19 @@ pub struct OpenLineageObserver {
|
|
|
20
32
|
entity_start_ms: Mutex<HashMap<String, u128>>,
|
|
21
33
|
entity_run_ids: Mutex<HashMap<String, String>>,
|
|
22
34
|
run_start_ms: Mutex<Option<u128>>,
|
|
23
|
-
entity_schemas: HashMap<String, Vec<
|
|
35
|
+
entity_schemas: HashMap<String, Vec<ColumnMapping>>,
|
|
24
36
|
entity_uris: HashMap<String, EntityUris>,
|
|
37
|
+
run_job_name: String,
|
|
25
38
|
consecutive_failures: AtomicUsize,
|
|
26
39
|
circuit_open: AtomicBool,
|
|
27
40
|
}
|
|
28
41
|
|
|
29
42
|
impl OpenLineageObserver {
|
|
30
|
-
pub fn new(
|
|
43
|
+
pub fn new(
|
|
44
|
+
config: &LineageConfig,
|
|
45
|
+
entities: &[EntityConfig],
|
|
46
|
+
config_path: &str,
|
|
47
|
+
) -> crate::FloeResult<Self> {
|
|
31
48
|
let timeout = Duration::from_secs(config.timeout_secs.unwrap_or(5));
|
|
32
49
|
let client = reqwest::blocking::Client::builder()
|
|
33
50
|
.timeout(timeout)
|
|
@@ -38,14 +55,30 @@ impl OpenLineageObserver {
|
|
|
38
55
|
))) as Box<dyn std::error::Error + Send + Sync>
|
|
39
56
|
})?;
|
|
40
57
|
|
|
58
|
+
let run_job_name = config
|
|
59
|
+
.job_name
|
|
60
|
+
.clone()
|
|
61
|
+
.filter(|s| !s.is_empty())
|
|
62
|
+
.unwrap_or_else(|| {
|
|
63
|
+
std::path::Path::new(config_path)
|
|
64
|
+
.file_stem()
|
|
65
|
+
.and_then(|s| s.to_str())
|
|
66
|
+
.unwrap_or("floe-run")
|
|
67
|
+
.to_string()
|
|
68
|
+
});
|
|
69
|
+
|
|
41
70
|
let entity_schemas = entities
|
|
42
71
|
.iter()
|
|
43
72
|
.map(|e| {
|
|
44
|
-
let fields: Vec<
|
|
73
|
+
let fields: Vec<ColumnMapping> = e
|
|
45
74
|
.schema
|
|
46
75
|
.columns
|
|
47
76
|
.iter()
|
|
48
|
-
.map(|c|
|
|
77
|
+
.map(|c| ColumnMapping {
|
|
78
|
+
output_name: c.name.clone(),
|
|
79
|
+
column_type: c.column_type.clone(),
|
|
80
|
+
source_field: c.source.clone(),
|
|
81
|
+
})
|
|
49
82
|
.collect();
|
|
50
83
|
(e.name.clone(), fields)
|
|
51
84
|
})
|
|
@@ -73,6 +106,7 @@ impl OpenLineageObserver {
|
|
|
73
106
|
run_start_ms: Mutex::new(None),
|
|
74
107
|
entity_schemas,
|
|
75
108
|
entity_uris,
|
|
109
|
+
run_job_name,
|
|
76
110
|
consecutive_failures: AtomicUsize::new(0),
|
|
77
111
|
circuit_open: AtomicBool::new(false),
|
|
78
112
|
})
|
|
@@ -155,10 +189,7 @@ impl OpenLineageObserver {
|
|
|
155
189
|
}
|
|
156
190
|
|
|
157
191
|
fn producer(&self) -> &str {
|
|
158
|
-
self.config
|
|
159
|
-
.producer
|
|
160
|
-
.as_deref()
|
|
161
|
-
.unwrap_or("https://github.com/malon64/floe")
|
|
192
|
+
self.config.producer.as_deref().unwrap_or(DEFAULT_PRODUCER)
|
|
162
193
|
}
|
|
163
194
|
|
|
164
195
|
fn parent_run_facet(&self) -> Option<Value> {
|
|
@@ -212,71 +243,112 @@ impl OpenLineageObserver {
|
|
|
212
243
|
run_facets["parent"] = parent;
|
|
213
244
|
}
|
|
214
245
|
|
|
215
|
-
// Build inputs
|
|
216
|
-
|
|
246
|
+
// Build inputs/outputs based on whether stats and uris are present (COMPLETE/FAIL)
|
|
247
|
+
// or absent (START — keep both empty).
|
|
248
|
+
let (inputs, outputs) = match (stats.as_ref(), uris) {
|
|
217
249
|
(Some(s), Some(u)) => {
|
|
218
250
|
let rejection_rate = if s.rows > 0 {
|
|
219
251
|
s.rejected as f64 / s.rows as f64
|
|
220
252
|
} else {
|
|
221
253
|
0.0
|
|
222
254
|
};
|
|
255
|
+
|
|
256
|
+
// Input: source dataset — sub-namespace avoids collision with real entity names.
|
|
257
|
+
let (src_ns, src_path) = split_storage_uri(&u.source);
|
|
258
|
+
let inputs = json!([{
|
|
259
|
+
"namespace": format!("{}.source", self.config.namespace),
|
|
260
|
+
"name": name,
|
|
261
|
+
"facets": {
|
|
262
|
+
"symlinks": symlinks_facet(self.producer(), &src_ns, &src_path, "DIRECTORY")
|
|
263
|
+
}
|
|
264
|
+
}]);
|
|
265
|
+
|
|
266
|
+
// Accepted output: entity name as logical identifier, TABLE type.
|
|
267
|
+
let (acc_ns, acc_path) = split_storage_uri(&u.accepted);
|
|
223
268
|
let schema_facet = json!({
|
|
224
|
-
"fields": s.schema_fields.iter().map(|
|
|
225
|
-
json!({ "name":
|
|
269
|
+
"fields": s.schema_fields.iter().map(|col| {
|
|
270
|
+
json!({ "name": col.output_name, "type": col.column_type })
|
|
226
271
|
}).collect::<Vec<_>>(),
|
|
227
272
|
"_producer": self.producer(),
|
|
228
273
|
"_schemaURL": "https://openlineage.io/spec/facets/1-1-1/SchemaDatasetFacet.json"
|
|
229
274
|
});
|
|
230
|
-
let
|
|
231
|
-
"rowCount": s.
|
|
275
|
+
let accepted_dq_facet = json!({
|
|
276
|
+
"rowCount": s.accepted,
|
|
232
277
|
"validCount": s.accepted,
|
|
233
|
-
"invalidCount":
|
|
278
|
+
"invalidCount": 0u64,
|
|
234
279
|
"_producer": self.producer(),
|
|
235
|
-
"_schemaURL": "https://openlineage.io/spec/facets/1-0-2/
|
|
280
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsOutputDatasetFacet.json"
|
|
236
281
|
});
|
|
237
282
|
let floe_facet = json!({
|
|
238
283
|
"entity": name,
|
|
239
284
|
"rejectionRate": rejection_rate,
|
|
240
285
|
"files": s.files,
|
|
241
286
|
"rows": s.rows,
|
|
242
|
-
"accepted": s.accepted,
|
|
243
|
-
"rejected": s.rejected,
|
|
244
287
|
"warnings": s.warnings,
|
|
245
288
|
"errors": s.errors,
|
|
246
289
|
"_producer": self.producer(),
|
|
247
290
|
"_schemaURL": "https://github.com/malon64/floe/schemas/FloeQualityRunFacet.json"
|
|
248
291
|
});
|
|
249
|
-
json!([{
|
|
250
|
-
"namespace": self.config.namespace,
|
|
251
|
-
"name": u.source,
|
|
252
|
-
"facets": {
|
|
253
|
-
"schema": schema_facet,
|
|
254
|
-
"dataQualityMetrics": dq_facet,
|
|
255
|
-
"floeQualityRun": floe_facet
|
|
256
|
-
}
|
|
257
|
-
}])
|
|
258
|
-
}
|
|
259
|
-
_ => json!([]),
|
|
260
|
-
};
|
|
261
292
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
293
|
+
let mut accepted_facets = json!({
|
|
294
|
+
"symlinks": symlinks_facet(self.producer(), &acc_ns, &acc_path, "TABLE"),
|
|
295
|
+
"schema": schema_facet,
|
|
296
|
+
"dataQualityMetrics": accepted_dq_facet,
|
|
297
|
+
"floeQualityRun": floe_facet
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
if !s.schema_fields.is_empty() {
|
|
301
|
+
let fields_map: serde_json::Map<String, Value> = s
|
|
302
|
+
.schema_fields
|
|
303
|
+
.iter()
|
|
304
|
+
.map(|col| {
|
|
305
|
+
let src = col.source_field.as_deref().unwrap_or(&col.output_name);
|
|
306
|
+
let entry = json!({
|
|
307
|
+
"inputFields": [{
|
|
308
|
+
"namespace": format!("{}.source", self.config.namespace),
|
|
309
|
+
"name": name,
|
|
310
|
+
"field": src
|
|
311
|
+
}]
|
|
312
|
+
});
|
|
313
|
+
(col.output_name.clone(), entry)
|
|
314
|
+
})
|
|
315
|
+
.collect();
|
|
316
|
+
accepted_facets["columnLineage"] = json!({
|
|
317
|
+
"fields": fields_map,
|
|
318
|
+
"_producer": self.producer(),
|
|
319
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-1-1/ColumnLineageDatasetFacet.json"
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
|
|
265
323
|
let mut out = vec![json!({
|
|
266
324
|
"namespace": self.config.namespace,
|
|
267
|
-
"name":
|
|
268
|
-
"facets":
|
|
325
|
+
"name": name,
|
|
326
|
+
"facets": accepted_facets
|
|
269
327
|
})];
|
|
328
|
+
|
|
329
|
+
// Rejected output (when configured): DIRECTORY type, rejected-row quality metrics.
|
|
270
330
|
if let Some(ref rej) = u.rejected {
|
|
331
|
+
let (rej_ns, rej_path) = split_storage_uri(rej);
|
|
332
|
+
let rejected_dq_facet = json!({
|
|
333
|
+
"rowCount": s.rejected,
|
|
334
|
+
"validCount": 0u64,
|
|
335
|
+
"invalidCount": s.rejected,
|
|
336
|
+
"_producer": self.producer(),
|
|
337
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsOutputDatasetFacet.json"
|
|
338
|
+
});
|
|
271
339
|
out.push(json!({
|
|
272
|
-
"namespace": self.config.namespace,
|
|
273
|
-
"name":
|
|
274
|
-
"facets": {
|
|
340
|
+
"namespace": format!("{}.rejected", self.config.namespace),
|
|
341
|
+
"name": name,
|
|
342
|
+
"facets": {
|
|
343
|
+
"symlinks": symlinks_facet(self.producer(), &rej_ns, &rej_path, "DIRECTORY"),
|
|
344
|
+
"dataQualityMetrics": rejected_dq_facet
|
|
345
|
+
}
|
|
275
346
|
}));
|
|
276
347
|
}
|
|
277
|
-
|
|
348
|
+
|
|
349
|
+
(inputs, json!(out))
|
|
278
350
|
}
|
|
279
|
-
|
|
351
|
+
_ => (json!([]), json!([])),
|
|
280
352
|
};
|
|
281
353
|
|
|
282
354
|
let body = json!({
|
|
@@ -307,7 +379,7 @@ struct EntityStats {
|
|
|
307
379
|
rejected: u64,
|
|
308
380
|
warnings: u64,
|
|
309
381
|
errors: u64,
|
|
310
|
-
schema_fields: Vec<
|
|
382
|
+
schema_fields: Vec<ColumnMapping>,
|
|
311
383
|
}
|
|
312
384
|
|
|
313
385
|
fn ms_to_iso8601(ms: u128) -> String {
|
|
@@ -324,6 +396,30 @@ fn ms_to_iso8601(ms: u128) -> String {
|
|
|
324
396
|
}
|
|
325
397
|
}
|
|
326
398
|
|
|
399
|
+
fn split_storage_uri(uri: &str) -> (String, String) {
|
|
400
|
+
// abfss:// must precede abfs:// so the longer prefix matches first.
|
|
401
|
+
let cloud_prefixes = ["s3://", "gs://", "gcs://", "az://", "abfss://", "abfs://"];
|
|
402
|
+
for prefix in cloud_prefixes {
|
|
403
|
+
if let Some(after_scheme) = uri.strip_prefix(prefix) {
|
|
404
|
+
if let Some(slash) = after_scheme.find('/') {
|
|
405
|
+
let authority = uri[..prefix.len() + slash].to_string();
|
|
406
|
+
let path = after_scheme[slash..].to_string();
|
|
407
|
+
return (authority, path);
|
|
408
|
+
}
|
|
409
|
+
return (uri.to_string(), "/".to_string());
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
("file".to_string(), uri.to_string())
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
fn symlinks_facet(producer: &str, namespace: &str, name: &str, ds_type: &str) -> Value {
|
|
416
|
+
json!({
|
|
417
|
+
"identifiers": [{ "namespace": namespace, "name": name, "type": ds_type }],
|
|
418
|
+
"_producer": producer,
|
|
419
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SymlinksDatasetFacet.json"
|
|
420
|
+
})
|
|
421
|
+
}
|
|
422
|
+
|
|
327
423
|
impl RunObserver for OpenLineageObserver {
|
|
328
424
|
fn on_event(&self, event: RunEvent) {
|
|
329
425
|
match event {
|
|
@@ -349,7 +445,7 @@ impl RunObserver for OpenLineageObserver {
|
|
|
349
445
|
},
|
|
350
446
|
"job": {
|
|
351
447
|
"namespace": self.config.namespace,
|
|
352
|
-
"name":
|
|
448
|
+
"name": self.run_job_name,
|
|
353
449
|
"facets": {}
|
|
354
450
|
},
|
|
355
451
|
"inputs": [],
|
|
@@ -444,7 +540,7 @@ impl RunObserver for OpenLineageObserver {
|
|
|
444
540
|
},
|
|
445
541
|
"job": {
|
|
446
542
|
"namespace": self.config.namespace,
|
|
447
|
-
"name":
|
|
543
|
+
"name": self.run_job_name,
|
|
448
544
|
"facets": {}
|
|
449
545
|
},
|
|
450
546
|
"inputs": [],
|
|
@@ -462,8 +558,9 @@ impl RunObserver for OpenLineageObserver {
|
|
|
462
558
|
pub fn build_observer(
|
|
463
559
|
config: &LineageConfig,
|
|
464
560
|
entities: &[EntityConfig],
|
|
561
|
+
config_path: &str,
|
|
465
562
|
) -> crate::FloeResult<Arc<dyn RunObserver>> {
|
|
466
|
-
let obs = OpenLineageObserver::new(config, entities)?;
|
|
563
|
+
let obs = OpenLineageObserver::new(config, entities, config_path)?;
|
|
467
564
|
Ok(Arc::new(obs))
|
|
468
565
|
}
|
|
469
566
|
|
|
@@ -98,7 +98,13 @@ impl RunContext {
|
|
|
98
98
|
let catalog_resolver = config::CatalogResolver::new(&config)?;
|
|
99
99
|
let config_dir =
|
|
100
100
|
crate::io::storage::paths::normalize_local_path(storage_resolver.config_dir());
|
|
101
|
-
let
|
|
101
|
+
let manifest_str = manifest_path.to_string_lossy();
|
|
102
|
+
let config_path = if config::is_remote_uri(&manifest_str) {
|
|
103
|
+
// Preserve the URI string as-is; normalize_local_path would collapse s3:// → s3:/
|
|
104
|
+
std::path::PathBuf::from(manifest_str.as_ref())
|
|
105
|
+
} else {
|
|
106
|
+
crate::io::storage::paths::normalize_local_path(manifest_path)
|
|
107
|
+
};
|
|
102
108
|
|
|
103
109
|
// The manifest embeds report_base_uri; resolve it to a target if it looks local.
|
|
104
110
|
let (report_target, report_base_path) =
|
|
@@ -148,3 +154,24 @@ impl RunContext {
|
|
|
148
154
|
})
|
|
149
155
|
}
|
|
150
156
|
}
|
|
157
|
+
|
|
158
|
+
#[cfg(test)]
|
|
159
|
+
mod tests {
|
|
160
|
+
use std::path::Path;
|
|
161
|
+
|
|
162
|
+
#[test]
|
|
163
|
+
fn remote_uri_preserved_via_pathbuf_from() {
|
|
164
|
+
// normalize_local_path iterates Path::components(), which collapses the double slash
|
|
165
|
+
// in "s3://..." producing "s3:/..." — this documents the bug that the fix avoids.
|
|
166
|
+
let uri = "s3://bucket/manifests/prod.json";
|
|
167
|
+
let normalized = crate::io::storage::paths::normalize_local_path(Path::new(uri));
|
|
168
|
+
assert_ne!(
|
|
169
|
+
normalized.display().to_string(),
|
|
170
|
+
uri,
|
|
171
|
+
"normalize_local_path should mangle s3:// (confirming the bug we guard against)"
|
|
172
|
+
);
|
|
173
|
+
// PathBuf::from preserves the raw bytes, so display() round-trips the URI correctly.
|
|
174
|
+
let preserved = std::path::PathBuf::from(uri);
|
|
175
|
+
assert_eq!(preserved.display().to_string(), uri);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -104,9 +104,11 @@ pub(crate) fn run_with_manifest_runtime(
|
|
|
104
104
|
runtime: &mut dyn Runtime,
|
|
105
105
|
) -> FloeResult<RunOutcome> {
|
|
106
106
|
init_thread_pool();
|
|
107
|
-
let
|
|
107
|
+
let manifest_str = manifest_path.to_string_lossy();
|
|
108
|
+
let location = config::resolve_config_location(&manifest_str)?;
|
|
109
|
+
let json = std::fs::read_to_string(&location.path)?;
|
|
108
110
|
let (config, report_base_uri) = crate::manifest::config_from_manifest_json(&json)?;
|
|
109
|
-
let config_base =
|
|
111
|
+
let config_base = location.base.clone();
|
|
110
112
|
if !options.entities.is_empty() {
|
|
111
113
|
validate_entities(&config, &options.entities)?;
|
|
112
114
|
}
|