floe-python 0.4.4__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {floe_python-0.4.4 → floe_python-0.4.6}/Cargo.lock +3 -3
  2. {floe_python-0.4.4 → floe_python-0.4.6}/PKG-INFO +1 -1
  3. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/Cargo.toml +2 -2
  4. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/location.rs +41 -1
  5. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/mod.rs +3 -1
  6. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/parse.rs +4 -0
  7. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/storage.rs +47 -2
  8. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/types.rs +2 -0
  9. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/format.rs +176 -0
  10. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/mod.rs +6 -1
  11. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/object_store.rs +9 -2
  12. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/s3.rs +18 -6
  13. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/context.rs +2 -1
  14. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/rest.rs +108 -0
  15. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/parquet.rs +31 -9
  16. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/lib.rs +6 -2
  17. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/builder.rs +204 -40
  18. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/mod.rs +1 -1
  19. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/model.rs +28 -9
  20. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/parse.rs +49 -5
  21. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/types.rs +8 -0
  22. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/context.rs +84 -5
  23. floe_python-0.4.6/crates/floe-core/src/run/entity/accepted_buffer.rs +251 -0
  24. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/accepted_write.rs +7 -48
  25. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/mod.rs +27 -17
  26. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/validate_split.rs +72 -8
  27. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/events.rs +5 -0
  28. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/mod.rs +21 -8
  29. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/archive_run.rs +93 -0
  30. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_glue_run.rs +1 -1
  31. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_s3_run.rs +1 -1
  32. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/local_run.rs +97 -0
  33. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_storage.rs +4 -0
  34. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_validation.rs +4 -0
  35. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/catalogs.rs +8 -0
  36. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/local_storage.rs +2 -0
  37. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/mod.rs +1 -0
  38. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/remote_base.rs +8 -0
  39. floe_python-0.4.6/crates/floe-core/tests/unit/config/storage_resolver_uri.rs +136 -0
  40. floe_python-0.4.6/crates/floe-core/tests/unit/io/format.rs +357 -0
  41. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +2 -0
  42. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/object_store.rs +10 -0
  43. floe_python-0.4.6/crates/floe-core/tests/unit/manifest/mod.rs +793 -0
  44. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/parse.rs +140 -0
  45. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/storage.rs +8 -0
  46. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +27 -13
  47. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/Cargo.toml +2 -2
  48. {floe_python-0.4.4 → floe_python-0.4.6}/pyproject.toml +1 -1
  49. floe_python-0.4.4/crates/floe-core/tests/unit/io/format.rs +0 -19
  50. floe_python-0.4.4/crates/floe-core/tests/unit/manifest/mod.rs +0 -306
  51. {floe_python-0.4.4 → floe_python-0.4.6}/Cargo.toml +0 -0
  52. {floe_python-0.4.4 → floe_python-0.4.6}/README.md +0 -0
  53. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/README.md +0 -0
  54. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/add_entity.rs +0 -0
  55. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/cast.rs +0 -0
  56. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/mismatch.rs +0 -0
  57. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/mod.rs +0 -0
  58. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/normalize.rs +0 -0
  59. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/not_null.rs +0 -0
  60. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/checks/unique.rs +0 -0
  61. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/catalog.rs +0 -0
  62. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/template.rs +0 -0
  63. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/validate.rs +0 -0
  64. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/config/yaml_decode.rs +0 -0
  65. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/errors.rs +0 -0
  66. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/mod.rs +0 -0
  67. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/avro.rs +0 -0
  68. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/csv.rs +0 -0
  69. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
  70. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/json.rs +0 -0
  71. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/json_selector.rs +0 -0
  72. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/mod.rs +0 -0
  73. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/orc.rs +0 -0
  74. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/parquet.rs +0 -0
  75. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xlsx.rs +0 -0
  76. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml.rs +0 -0
  77. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
  78. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
  79. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
  80. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
  81. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
  82. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
  83. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
  84. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
  85. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
  86. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
  87. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
  88. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
  89. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
  90. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
  91. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
  92. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
  93. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/storage/target.rs +0 -0
  94. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
  95. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/accepted.rs +0 -0
  96. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
  97. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/csv.rs +0 -0
  98. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
  99. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/options.rs +0 -0
  100. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
  101. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
  102. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
  103. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta.rs +0 -0
  104. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
  105. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
  106. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
  107. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
  108. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg.rs +0 -0
  109. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/metrics.rs +0 -0
  110. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/mod.rs +0 -0
  111. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/parts.rs +0 -0
  112. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/sink_format.rs +0 -0
  113. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
  114. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
  115. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
  116. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
  117. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
  118. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
  119. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
  120. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/lineage/mod.rs +0 -0
  121. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/log.rs +0 -0
  122. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
  123. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/mod.rs +0 -0
  124. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/profile/validate.rs +0 -0
  125. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/build.rs +0 -0
  126. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/entity.rs +0 -0
  127. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/mod.rs +0 -0
  128. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/report/output.rs +0 -0
  129. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/incremental.rs +0 -0
  130. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/pii.rs +0 -0
  131. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/precheck.rs +0 -0
  132. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/process.rs +0 -0
  133. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/entity/resolve.rs +0 -0
  134. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/file.rs +0 -0
  135. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/output.rs +0 -0
  136. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/run/perf.rs +0 -0
  137. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runner/mod.rs +0 -0
  138. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runner/outcome.rs +0 -0
  139. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/runtime.rs +0 -0
  140. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/state/mod.rs +0 -0
  141. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/vars/mod.rs +0 -0
  142. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/vars/resolve.rs +0 -0
  143. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/src/warnings.rs +0 -0
  144. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
  145. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/delta_run.rs +0 -0
  146. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/dry_run.rs +0 -0
  147. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
  148. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
  149. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
  150. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
  151. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/mod.rs +0 -0
  152. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
  153. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
  154. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/integration.rs +0 -0
  155. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/common.rs +0 -0
  156. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
  157. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
  158. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
  159. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
  160. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/lineage_validation.rs +0 -0
  161. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/parse.rs +0 -0
  162. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
  163. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/templating.rs +0 -0
  164. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/mod.rs +0 -0
  165. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
  166. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
  167. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
  168. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
  169. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
  170. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
  171. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
  172. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
  173. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
  174. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
  175. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
  176. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
  177. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
  178. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
  179. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
  180. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
  181. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
  182. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
  183. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
  184. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
  185. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
  186. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
  187. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
  188. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
  189. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
  190. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
  191. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
  192. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
  193. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
  194. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/mod.rs +0 -0
  195. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
  196. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
  197. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
  198. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/mod.rs +0 -0
  199. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
  200. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/checks.rs +0 -0
  201. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/incremental.rs +0 -0
  202. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
  203. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/lineage.rs +0 -0
  204. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/mod.rs +0 -0
  205. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
  206. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/pii.rs +0 -0
  207. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/report.rs +0 -0
  208. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
  209. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
  210. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
  211. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/state/mod.rs +0 -0
  212. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
  213. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
  214. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-core/tests/unit.rs +0 -0
  215. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/.gitignore +0 -0
  216. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/README.md +0 -0
  217. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/functions.rs +0 -0
  218. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/lib.rs +0 -0
  219. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/observer.rs +0 -0
  220. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/config.rs +0 -0
  221. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/errors.rs +0 -0
  222. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/mod.rs +0 -0
  223. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/src/types/outcome.rs +0 -0
  224. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/config.yml +0 -0
  225. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
  226. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
  227. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/profile.yml +0 -0
  228. {floe_python-0.4.4 → floe_python-0.4.6}/crates/floe-python/tests/test_floe.py +0 -0
  229. {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/__init__.py +0 -0
  230. {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/_floe.pyi +0 -0
  231. {floe_python-0.4.4 → floe_python-0.4.6}/python/floe/py.typed +0 -0
@@ -3399,7 +3399,7 @@ dependencies = [
3399
3399
 
3400
3400
  [[package]]
3401
3401
  name = "floe-cli"
3402
- version = "0.4.4"
3402
+ version = "0.4.6"
3403
3403
  dependencies = [
3404
3404
  "assert_cmd",
3405
3405
  "clap",
@@ -3412,7 +3412,7 @@ dependencies = [
3412
3412
 
3413
3413
  [[package]]
3414
3414
  name = "floe-core"
3415
- version = "0.4.4"
3415
+ version = "0.4.6"
3416
3416
  dependencies = [
3417
3417
  "apache-avro 0.16.0",
3418
3418
  "arrow",
@@ -3455,7 +3455,7 @@ dependencies = [
3455
3455
 
3456
3456
  [[package]]
3457
3457
  name = "floe-python"
3458
- version = "0.4.4"
3458
+ version = "0.4.6"
3459
3459
  dependencies = [
3460
3460
  "floe-core",
3461
3461
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: floe-python
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "floe-core"
3
- version = "0.4.4"
3
+ version = "0.4.6"
4
4
  edition = "2021"
5
5
  description = "Core library for Floe, a YAML-driven technical ingestion tool."
6
6
  license = "MIT"
@@ -14,7 +14,7 @@ path = "src/lib.rs"
14
14
 
15
15
  [dependencies]
16
16
  yaml-rust2 = "0.11"
17
- polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
17
+ polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "new_streaming", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
18
18
  calamine = "0.24"
19
19
  rayon = "1"
20
20
  deltalake = { version = "0.30.1", features = ["datafusion", "s3", "azure", "gcs"] }
@@ -45,7 +45,7 @@ pub fn resolve_config_location(input: &str) -> FloeResult<ConfigLocation> {
45
45
  fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
46
46
  if uri.starts_with("s3://") {
47
47
  let location = storage::s3::parse_s3_uri(uri)?;
48
- let client = storage::s3::S3Client::new(location.bucket, None)?;
48
+ let client = storage::s3::S3Client::new(location.bucket, None, None, None)?;
49
49
  return client.download_to_temp(uri, temp_dir);
50
50
  }
51
51
  if uri.starts_with("gs://") {
@@ -63,6 +63,8 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
63
63
  account: Some(location.account),
64
64
  container: Some(location.container),
65
65
  prefix: None,
66
+ endpoint: None,
67
+ path_style_access: None,
66
68
  };
67
69
  let client = storage::adls::AdlsClient::new(&definition)?;
68
70
  return client.download_to_temp(uri, temp_dir);
@@ -70,6 +72,44 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
70
72
  Err(format!("unsupported config uri: {}", uri).into())
71
73
  }
72
74
 
75
+ /// Write `bytes` to a remote URI by staging them in a temp file then uploading.
76
+ pub fn write_bytes_to_remote_uri(bytes: &[u8], uri: &str) -> FloeResult<()> {
77
+ let temp_dir = TempDir::new()?;
78
+ let local_path = temp_dir.path().join("upload");
79
+ std::fs::write(&local_path, bytes)?;
80
+ upload_to_remote_uri(&local_path, uri)
81
+ }
82
+
83
+ pub fn upload_to_remote_uri(local_path: &Path, uri: &str) -> FloeResult<()> {
84
+ if uri.starts_with("s3://") {
85
+ let location = storage::s3::parse_s3_uri(uri)?;
86
+ let client = storage::s3::S3Client::new(location.bucket, None, None, None)?;
87
+ return client.upload_from_path(local_path, uri);
88
+ }
89
+ if uri.starts_with("gs://") {
90
+ let location = storage::gcs::parse_gcs_uri(uri)?;
91
+ let client = storage::gcs::GcsClient::new(location.bucket)?;
92
+ return client.upload_from_path(local_path, uri);
93
+ }
94
+ if uri.starts_with("abfs://") {
95
+ let location = storage::adls::parse_adls_uri(uri)?;
96
+ let definition = StorageDefinition {
97
+ name: "manifest".to_string(),
98
+ fs_type: "adls".to_string(),
99
+ bucket: None,
100
+ region: None,
101
+ account: Some(location.account),
102
+ container: Some(location.container),
103
+ prefix: None,
104
+ endpoint: None,
105
+ path_style_access: None,
106
+ };
107
+ let client = storage::adls::AdlsClient::new(&definition)?;
108
+ return client.upload_from_path(local_path, uri);
109
+ }
110
+ Err(format!("unsupported manifest output uri: {uri}").into())
111
+ }
112
+
73
113
  pub(crate) fn is_remote_uri(value: &str) -> bool {
74
114
  value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
75
115
  }
@@ -9,7 +9,9 @@ pub(crate) mod yaml_decode;
9
9
 
10
10
  pub use catalog::{CatalogResolver, ResolvedDeltaCatalogTarget, ResolvedIcebergCatalogTarget};
11
11
  pub(crate) use location::is_remote_uri;
12
- pub use location::{resolve_config_location, ConfigLocation};
12
+ pub use location::{
13
+ resolve_config_location, upload_to_remote_uri, write_bytes_to_remote_uri, ConfigLocation,
14
+ };
13
15
  pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver};
14
16
  pub use types::*;
15
17
 
@@ -695,6 +695,8 @@ fn parse_storage_definition(value: &Yaml) -> FloeResult<StorageDefinition> {
695
695
  "account",
696
696
  "container",
697
697
  "prefix",
698
+ "endpoint",
699
+ "path_style_access",
698
700
  ],
699
701
  )?;
700
702
  Ok(StorageDefinition {
@@ -705,6 +707,8 @@ fn parse_storage_definition(value: &Yaml) -> FloeResult<StorageDefinition> {
705
707
  account: opt_string(hash, "account", "storages.definitions")?,
706
708
  container: opt_string(hash, "container", "storages.definitions")?,
707
709
  prefix: opt_string(hash, "prefix", "storages.definitions")?,
710
+ endpoint: opt_string(hash, "endpoint", "storages.definitions")?,
711
+ path_style_access: opt_bool(hash, "path_style_access", "storages.definitions")?,
708
712
  })
709
713
  }
710
714
 
@@ -215,7 +215,7 @@ impl StorageResolver {
215
215
  raw_path: &str,
216
216
  ) -> FloeResult<ResolvedPath> {
217
217
  let name = storage_name.unwrap_or(self.default_name.as_str());
218
- if !self.has_config && name != "local" {
218
+ if !self.has_config && name != "local" && !self.definitions.contains_key(name) {
219
219
  return Err(Box::new(ConfigError(format!(
220
220
  "entity.name={} {field} references unknown storage {} (no storages block)",
221
221
  entity_name, name
@@ -238,6 +238,8 @@ impl StorageResolver {
238
238
  account: None,
239
239
  container: None,
240
240
  prefix: None,
241
+ endpoint: None,
242
+ path_style_access: None,
241
243
  }
242
244
  };
243
245
 
@@ -304,7 +306,7 @@ impl StorageResolver {
304
306
  raw_path: &str,
305
307
  ) -> FloeResult<ResolvedPath> {
306
308
  let name = storage_name.unwrap_or(self.default_name.as_str());
307
- if !self.has_config && name != "local" {
309
+ if !self.has_config && name != "local" && !self.definitions.contains_key(name) {
308
310
  return Err(Box::new(ConfigError(format!(
309
311
  "report.storage references unknown storage {} (no storages block)",
310
312
  name
@@ -327,6 +329,8 @@ impl StorageResolver {
327
329
  account: None,
328
330
  container: None,
329
331
  prefix: None,
332
+ endpoint: None,
333
+ path_style_access: None,
330
334
  }
331
335
  };
332
336
 
@@ -386,9 +390,48 @@ impl StorageResolver {
386
390
  }
387
391
  }
388
392
 
393
+ /// Scan definitions for the first one whose scheme and bucket/account match `uri`.
394
+ /// Used in manifest mode to resolve a bare report URI back to a named definition.
395
+ pub fn find_definition_name_for_uri(&self, uri: &str) -> Option<String> {
396
+ for (name, def) in &self.definitions {
397
+ if uri.starts_with("s3://") && def.fs_type == "s3" {
398
+ if let Some(b) = &def.bucket {
399
+ if uri.starts_with(&format!("s3://{b}/")) || uri == format!("s3://{b}") {
400
+ return Some(name.clone());
401
+ }
402
+ }
403
+ }
404
+ if uri.starts_with("gs://") && def.fs_type == "gcs" {
405
+ if let Some(b) = &def.bucket {
406
+ if uri.starts_with(&format!("gs://{b}/")) || uri == format!("gs://{b}") {
407
+ return Some(name.clone());
408
+ }
409
+ }
410
+ }
411
+ if uri.starts_with("abfs://") && def.fs_type == "adls" {
412
+ if let (Some(c), Some(a)) = (&def.container, &def.account) {
413
+ if uri.starts_with(&format!("abfs://{c}@{a}.dfs.core.windows.net")) {
414
+ return Some(name.clone());
415
+ }
416
+ }
417
+ }
418
+ }
419
+ None
420
+ }
421
+
422
+ /// Register a synthetic `StorageDefinition` into this resolver.
423
+ /// Used in manifest mode when the report URI has no matching definition in the config.
424
+ /// Does NOT flip `has_config`; entity resolution keeps its implicit-local fallback.
425
+ pub fn register_definition(&mut self, definition: StorageDefinition) {
426
+ self.definitions.insert(definition.name.clone(), definition);
427
+ }
428
+
389
429
  pub fn definition(&self, name: &str) -> Option<StorageDefinition> {
390
430
  if self.has_config {
391
431
  self.definitions.get(name).cloned()
432
+ } else if let Some(def) = self.definitions.get(name) {
433
+ // Synthetic definition registered by register_definition (e.g. report target).
434
+ Some(def.clone())
392
435
  } else if name == "local" {
393
436
  Some(StorageDefinition {
394
437
  name: "local".to_string(),
@@ -398,6 +441,8 @@ impl StorageResolver {
398
441
  account: None,
399
442
  container: None,
400
443
  prefix: None,
444
+ endpoint: None,
445
+ path_style_access: None,
401
446
  })
402
447
  } else {
403
448
  None
@@ -366,6 +366,8 @@ pub struct StorageDefinition {
366
366
  pub account: Option<String>,
367
367
  pub container: Option<String>,
368
368
  pub prefix: Option<String>,
369
+ pub endpoint: Option<String>,
370
+ pub path_style_access: Option<bool>,
369
371
  }
370
372
 
371
373
  #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -119,6 +119,182 @@ pub struct AcceptedWriteOutput {
119
119
  pub perf: Option<AcceptedWritePerfBreakdown>,
120
120
  }
121
121
 
122
+ /// Per-write sinks cap their reported `part_files` list at this many entries
123
+ /// (see `parquet.rs`). The reducer applies the same cap across flushes so
124
+ /// the run report does not grow to N × 50 entries for high-fanout entities.
125
+ pub const MAX_REPORTED_PART_FILES: usize = 50;
126
+
127
+ impl AcceptedWriteOutput {
128
+ /// Fold a later flush's output into this one. The receiver represents the
129
+ /// running total across N completed flushes; `next` is the output of the
130
+ /// (N+1)th flush.
131
+ ///
132
+ /// Field semantics across flushes:
133
+ /// - `parts_written` (always known, the count of successful sink writes)
134
+ /// sums.
135
+ /// - `files_written` and the `Option<u64>` metric fields
136
+ /// (`total_bytes_written`, `small_files_count`, perf entries) sum
137
+ /// when *both* sides are `Some`; if either side is `None` the merged
138
+ /// result is `None`. "Unknown poisons" matches the per-flush
139
+ /// semantics: when any single flush could not determine its file
140
+ /// count (for example a remote Delta commit whose post-commit log
141
+ /// could not be read), reporting a partial sum would silently
142
+ /// under-count the total. The run report instead surfaces the value
143
+ /// as unknown.
144
+ /// - `part_files` concatenates and is capped at `MAX_REPORTED_PART_FILES`
145
+ /// so the reducer preserves the same cap the individual sink writers
146
+ /// apply per-flush.
147
+ /// - `table_version` / `snapshot_id` take the latest (Delta commit /
148
+ /// Iceberg snapshot move forward with every commit; the final state
149
+ /// is what readers see).
150
+ /// - `table_root_uri`, `catalog`, `schema_evolution` take the first
151
+ /// non-default value seen — table location and catalog registration
152
+ /// are established by the first write; schema evolution only fires on
153
+ /// the first (Overwrite) write because subsequent flushes are Append.
154
+ /// - `avg_file_size_mb` is recomputed from `total_bytes_written` divided
155
+ /// by `files_written` when available (so it matches the per-flush
156
+ /// semantics: for Parquet/Iceberg `files == parts`, but for Delta one
157
+ /// commit can write multiple `add` files and `parts != files`).
158
+ /// Falls back to `parts_written` when `files_written` is unknown.
159
+ /// - `perf` accumulates by summing each `Option<u64>` field.
160
+ /// - `merge` is unreachable in the buffered path (merge modes use the
161
+ /// legacy accumulate-then-write code path); the running value is
162
+ /// preserved if anything ever does pass one.
163
+ pub fn merge_in(&mut self, next: AcceptedWriteOutput) {
164
+ let AcceptedWriteOutput {
165
+ files_written,
166
+ parts_written,
167
+ part_files,
168
+ table_version,
169
+ snapshot_id,
170
+ table_root_uri,
171
+ catalog,
172
+ metrics,
173
+ merge,
174
+ schema_evolution,
175
+ perf,
176
+ } = next;
177
+
178
+ // `parts_written == 0` on the receiver means no prior flush has been
179
+ // merged. In that case `Option<u64>` fields on `self` start at `None`
180
+ // not because a flush returned unknown but because nothing has been
181
+ // recorded yet — distinguishing "vacuous" from "poisoned" matters
182
+ // because adopting the next flush's value verbatim on the first merge
183
+ // is correct, while applying poison-on-unknown semantics from `None`
184
+ // would always poison the very first merge.
185
+ let first_merge = self.parts_written == 0;
186
+
187
+ self.files_written = merge_option_u64(self.files_written, files_written, first_merge);
188
+ self.parts_written += parts_written;
189
+ let remaining = MAX_REPORTED_PART_FILES.saturating_sub(self.part_files.len());
190
+ if remaining > 0 {
191
+ self.part_files
192
+ .extend(part_files.into_iter().take(remaining));
193
+ }
194
+
195
+ if table_version.is_some() {
196
+ self.table_version = table_version;
197
+ }
198
+ if snapshot_id.is_some() {
199
+ self.snapshot_id = snapshot_id;
200
+ }
201
+
202
+ if self.table_root_uri.is_none() {
203
+ self.table_root_uri = table_root_uri;
204
+ }
205
+ if self.catalog.is_none() {
206
+ self.catalog = catalog;
207
+ }
208
+ if !self.schema_evolution.enabled
209
+ && !self.schema_evolution.applied
210
+ && self.schema_evolution.added_columns.is_empty()
211
+ && !self.schema_evolution.incompatible_changes_detected
212
+ && self.schema_evolution.mode.is_empty()
213
+ {
214
+ self.schema_evolution = schema_evolution;
215
+ }
216
+
217
+ self.metrics.total_bytes_written = merge_option_u64(
218
+ self.metrics.total_bytes_written,
219
+ metrics.total_bytes_written,
220
+ first_merge,
221
+ );
222
+ self.metrics.small_files_count = merge_option_u64(
223
+ self.metrics.small_files_count,
224
+ metrics.small_files_count,
225
+ first_merge,
226
+ );
227
+ self.metrics.avg_file_size_mb = recompute_avg_file_size_mb(
228
+ self.metrics.total_bytes_written,
229
+ self.files_written,
230
+ self.parts_written,
231
+ );
232
+
233
+ if self.merge.is_none() {
234
+ self.merge = merge;
235
+ }
236
+
237
+ match (self.perf.take(), perf) {
238
+ (Some(a), Some(b)) => self.perf = Some(sum_perf_breakdown(a, b)),
239
+ (Some(a), None) => self.perf = Some(a),
240
+ (None, Some(b)) => self.perf = Some(b),
241
+ (None, None) => self.perf = None,
242
+ }
243
+ }
244
+ }
245
+
246
+ /// Sum two `Option<u64>` values with poison-on-unknown semantics: if either
247
+ /// side is `None`, the result is `None`. Reporting a partial sum as if it
248
+ /// were the total would silently under-count for any aggregation across
249
+ /// flushes where one flush could not determine the underlying count
250
+ /// (e.g. remote Delta commit-log read failures).
251
+ fn sum_option_u64(a: Option<u64>, b: Option<u64>) -> Option<u64> {
252
+ match (a, b) {
253
+ (Some(a), Some(b)) => Some(a + b),
254
+ _ => None,
255
+ }
256
+ }
257
+
258
+ /// Progressive `Option<u64>` merge used by `merge_in`. On the first merge
259
+ /// (when the accumulator has no flush recorded yet) the next flush's value is
260
+ /// taken verbatim; on subsequent merges `sum_option_u64`'s poison-on-unknown
261
+ /// semantics apply.
262
+ fn merge_option_u64(acc: Option<u64>, next: Option<u64>, first_merge: bool) -> Option<u64> {
263
+ if first_merge {
264
+ next
265
+ } else {
266
+ sum_option_u64(acc, next)
267
+ }
268
+ }
269
+
270
+ fn recompute_avg_file_size_mb(
271
+ total_bytes: Option<u64>,
272
+ files_written: Option<u64>,
273
+ parts_written: u64,
274
+ ) -> Option<f64> {
275
+ let bytes = total_bytes?;
276
+ let denominator = files_written.unwrap_or(parts_written);
277
+ if denominator == 0 {
278
+ return None;
279
+ }
280
+ let mb = (bytes as f64) / (denominator as f64) / (1024.0 * 1024.0);
281
+ Some(mb)
282
+ }
283
+
284
+ fn sum_perf_breakdown(
285
+ a: AcceptedWritePerfBreakdown,
286
+ b: AcceptedWritePerfBreakdown,
287
+ ) -> AcceptedWritePerfBreakdown {
288
+ AcceptedWritePerfBreakdown {
289
+ conversion_ms: sum_option_u64(a.conversion_ms, b.conversion_ms),
290
+ source_df_build_ms: sum_option_u64(a.source_df_build_ms, b.source_df_build_ms),
291
+ merge_exec_ms: sum_option_u64(a.merge_exec_ms, b.merge_exec_ms),
292
+ data_write_ms: sum_option_u64(a.data_write_ms, b.data_write_ms),
293
+ commit_ms: sum_option_u64(a.commit_ms, b.commit_ms),
294
+ metrics_read_ms: sum_option_u64(a.metrics_read_ms, b.metrics_read_ms),
295
+ }
296
+ }
297
+
122
298
  pub trait InputAdapter: Send + Sync {
123
299
  fn format(&self) -> &'static str;
124
300
 
@@ -129,7 +129,12 @@ fn build_client(definition: &config::StorageDefinition) -> FloeResult<Box<dyn St
129
129
  "s3" => {
130
130
  let bucket =
131
131
  validation::require_field(definition, definition.bucket.as_ref(), "bucket", "s3")?;
132
- Box::new(s3::S3Client::new(bucket, definition.region.as_deref())?)
132
+ Box::new(s3::S3Client::new(
133
+ bucket,
134
+ definition.region.as_deref(),
135
+ definition.endpoint.as_deref(),
136
+ definition.path_style_access,
137
+ )?)
133
138
  }
134
139
  "adls" => Box::new(adls::AdlsClient::new(definition)?),
135
140
  "gcs" => {
@@ -111,9 +111,16 @@ pub fn iceberg_store_config(
111
111
  Target::S3 { storage, uri, .. } => {
112
112
  let mut file_io_props = HashMap::new();
113
113
  if let Some(definition) = resolver.definition(storage) {
114
- if let Some(region) = definition.region {
114
+ if let Some(region) = &definition.region {
115
115
  file_io_props.insert(S3_REGION.to_string(), region.clone());
116
- file_io_props.insert(CLIENT_REGION.to_string(), region);
116
+ file_io_props.insert(CLIENT_REGION.to_string(), region.clone());
117
+ }
118
+ if let Some(endpoint) = &definition.endpoint {
119
+ file_io_props.insert("s3.endpoint".to_string(), endpoint.clone());
120
+ }
121
+ if let Some(path_style) = definition.path_style_access {
122
+ file_io_props
123
+ .insert("s3.path-style-access".to_string(), path_style.to_string());
117
124
  }
118
125
  }
119
126
  Ok(IcebergStoreConfig {
@@ -19,23 +19,35 @@ pub struct S3Client {
19
19
  }
20
20
 
21
21
  impl S3Client {
22
- pub fn new(bucket: String, region: Option<&str>) -> FloeResult<Self> {
22
+ pub fn new(
23
+ bucket: String,
24
+ region: Option<&str>,
25
+ endpoint: Option<&str>,
26
+ path_style_access: Option<bool>,
27
+ ) -> FloeResult<Self> {
23
28
  let runtime = tokio::runtime::Builder::new_current_thread()
24
29
  .enable_all()
25
30
  .build()
26
31
  .map_err(|err| Box::new(StorageError(format!("failed to build aws runtime: {err}"))))?;
32
+ let endpoint = endpoint.map(ToOwned::to_owned);
27
33
  let config = runtime.block_on(async {
28
34
  let region_provider = match region {
29
35
  Some(region) => RegionProviderChain::first_try(Region::new(region.to_string()))
30
36
  .or_default_provider(),
31
37
  None => RegionProviderChain::default_provider(),
32
38
  };
33
- aws_config::defaults(aws_config::BehaviorVersion::latest())
34
- .region(region_provider)
35
- .load()
36
- .await
39
+ let mut builder =
40
+ aws_config::defaults(aws_config::BehaviorVersion::latest()).region(region_provider);
41
+ if let Some(ep) = endpoint {
42
+ builder = builder.endpoint_url(ep);
43
+ }
44
+ builder.load().await
37
45
  });
38
- let client = Client::new(&config);
46
+ let mut s3_builder = aws_sdk_s3::config::Builder::from(&config);
47
+ if path_style_access.unwrap_or(false) {
48
+ s3_builder = s3_builder.force_path_style(true);
49
+ }
50
+ let client = Client::from_conf(s3_builder.build());
39
51
  Ok(Self {
40
52
  bucket,
41
53
  client,
@@ -76,7 +76,8 @@ pub(super) fn build_iceberg_write_context(
76
76
  latest_s3_metadata_location(client, base_key)?
77
77
  }
78
78
  None => {
79
- let mut client = io::storage::s3::S3Client::new(bucket.clone(), None)?;
79
+ let mut client =
80
+ io::storage::s3::S3Client::new(bucket.clone(), None, None, None)?;
80
81
  latest_s3_metadata_location(&mut client, base_key)?
81
82
  }
82
83
  }
@@ -45,6 +45,7 @@ pub(crate) async fn build_rest_catalog(
45
45
  }
46
46
 
47
47
  if let Some(credential) = rest_cfg.credential.as_deref() {
48
+ let credential = expand_env_refs(credential, &rest_cfg.catalog_name)?;
48
49
  if let Some(token_value) = credential.strip_prefix("token:") {
49
50
  // Bearer PAT (Unity Catalog / Nessie)
50
51
  props.insert("token".to_string(), token_value.to_string());
@@ -140,6 +141,39 @@ impl RestIcebergCatalogConfig {
140
141
  }
141
142
  }
142
143
 
144
+ fn expand_env_refs(value: &str, catalog_name: &str) -> FloeResult<String> {
145
+ if !value.contains("${") {
146
+ return Ok(value.to_string());
147
+ }
148
+
149
+ let mut parts = Vec::new();
150
+ for part in value.split(':') {
151
+ parts.push(expand_env_ref_part(part, catalog_name)?);
152
+ }
153
+ Ok(parts.join(":"))
154
+ }
155
+
156
+ fn expand_env_ref_part(part: &str, catalog_name: &str) -> FloeResult<String> {
157
+ let Some(inner) = part.strip_prefix("${") else {
158
+ return Ok(part.to_string());
159
+ };
160
+ let Some(name) = inner.strip_suffix('}') else {
161
+ return Err(Box::new(RunError(format!(
162
+ "rest iceberg catalog {catalog_name} credential has unclosed env placeholder"
163
+ ))));
164
+ };
165
+ if name.is_empty() || name.contains('{') || name.contains('}') {
166
+ return Err(Box::new(RunError(format!(
167
+ "rest iceberg catalog {catalog_name} credential has invalid env placeholder"
168
+ ))));
169
+ }
170
+ std::env::var(name).map_err(|_| {
171
+ Box::new(RunError(format!(
172
+ "rest iceberg catalog {catalog_name} credential references env var {name} which is not set"
173
+ ))) as Box<dyn std::error::Error + Send + Sync>
174
+ })
175
+ }
176
+
143
177
  pub(crate) async fn write_via_rest_catalog(
144
178
  rest_cfg: &RestIcebergCatalogConfig,
145
179
  table_root_uri: String,
@@ -342,3 +376,77 @@ async fn create_rest_table(
342
376
  .await
343
377
  .map_err(map_iceberg_err("rest catalog create_table failed"))
344
378
  }
379
+
380
+ #[cfg(test)]
381
+ mod tests {
382
+ use super::expand_env_refs;
383
+
384
+ #[test]
385
+ fn expands_partial_env_refs_in_client_credentials() {
386
+ std::env::set_var("FLOE_TEST_REST_CLIENT_ID", "client-id");
387
+ std::env::set_var("FLOE_TEST_REST_CLIENT_SECRET", "client-secret");
388
+
389
+ let expanded = expand_env_refs(
390
+ "client_credentials:${FLOE_TEST_REST_CLIENT_ID}:${FLOE_TEST_REST_CLIENT_SECRET}",
391
+ "polaris",
392
+ )
393
+ .expect("expand credential");
394
+
395
+ assert_eq!(expanded, "client_credentials:client-id:client-secret");
396
+ std::env::remove_var("FLOE_TEST_REST_CLIENT_ID");
397
+ std::env::remove_var("FLOE_TEST_REST_CLIENT_SECRET");
398
+ }
399
+
400
+ #[test]
401
+ fn expands_exact_env_ref_in_token_credential() {
402
+ std::env::set_var("FLOE_TEST_REST_TOKEN", "pat-token");
403
+
404
+ let expanded =
405
+ expand_env_refs("token:${FLOE_TEST_REST_TOKEN}", "nessie").expect("expand token");
406
+
407
+ assert_eq!(expanded, "token:pat-token");
408
+ std::env::remove_var("FLOE_TEST_REST_TOKEN");
409
+ }
410
+
411
+ #[test]
412
+ fn preserves_literal_credential_text_that_contains_env_ref_syntax() {
413
+ let expanded =
414
+ expand_env_refs("token:abc${def}ghi", "nessie").expect("preserve literal credential");
415
+
416
+ assert_eq!(expanded, "token:abc${def}ghi");
417
+ }
418
+
419
+ #[test]
420
+ fn errors_when_env_ref_is_missing() {
421
+ std::env::remove_var("FLOE_TEST_REST_MISSING");
422
+
423
+ let err = expand_env_refs(
424
+ "client_credentials:${FLOE_TEST_REST_MISSING}:secret",
425
+ "polaris",
426
+ )
427
+ .unwrap_err();
428
+
429
+ assert_eq!(
430
+ err.to_string(),
431
+ "rest iceberg catalog polaris credential references env var FLOE_TEST_REST_MISSING which is not set"
432
+ );
433
+ }
434
+
435
+ #[test]
436
+ fn errors_on_malformed_env_ref() {
437
+ std::env::set_var("ID", "client-id");
438
+
439
+ let err = expand_env_refs(
440
+ "client_credentials:${ID}:literal-secret:${UNCLOSED",
441
+ "polaris",
442
+ )
443
+ .unwrap_err();
444
+
445
+ assert_eq!(
446
+ err.to_string(),
447
+ "rest iceberg catalog polaris credential has unclosed env placeholder"
448
+ );
449
+ assert!(!err.to_string().contains("literal-secret"));
450
+ std::env::remove_var("ID");
451
+ }
452
+ }