floe-python 0.4.5__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. {floe_python-0.4.5 → floe_python-0.4.6}/Cargo.lock +3 -3
  2. {floe_python-0.4.5 → floe_python-0.4.6}/PKG-INFO +1 -1
  3. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/Cargo.toml +2 -2
  4. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/format.rs +176 -0
  5. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/rest.rs +108 -0
  6. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/parquet.rs +31 -9
  7. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/builder.rs +17 -5
  8. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/model.rs +10 -0
  9. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/parse.rs +49 -5
  10. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/types.rs +8 -0
  11. floe_python-0.4.6/crates/floe-core/src/run/entity/accepted_buffer.rs +251 -0
  12. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/accepted_write.rs +7 -48
  13. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/mod.rs +27 -17
  14. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/validate_split.rs +72 -8
  15. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/archive_run.rs +93 -0
  16. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/local_run.rs +97 -0
  17. floe_python-0.4.6/crates/floe-core/tests/unit/io/format.rs +357 -0
  18. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/manifest/mod.rs +162 -0
  19. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/parse.rs +140 -0
  20. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +27 -13
  21. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/Cargo.toml +2 -2
  22. {floe_python-0.4.5 → floe_python-0.4.6}/pyproject.toml +1 -1
  23. floe_python-0.4.5/crates/floe-core/tests/unit/io/format.rs +0 -19
  24. {floe_python-0.4.5 → floe_python-0.4.6}/Cargo.toml +0 -0
  25. {floe_python-0.4.5 → floe_python-0.4.6}/README.md +0 -0
  26. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/README.md +0 -0
  27. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/add_entity.rs +0 -0
  28. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/cast.rs +0 -0
  29. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/mismatch.rs +0 -0
  30. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/mod.rs +0 -0
  31. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/normalize.rs +0 -0
  32. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/not_null.rs +0 -0
  33. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/checks/unique.rs +0 -0
  34. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/catalog.rs +0 -0
  35. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/location.rs +0 -0
  36. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/mod.rs +0 -0
  37. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/parse.rs +0 -0
  38. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/storage.rs +0 -0
  39. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/template.rs +0 -0
  40. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/types.rs +0 -0
  41. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/validate.rs +0 -0
  42. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/config/yaml_decode.rs +0 -0
  43. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/errors.rs +0 -0
  44. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/mod.rs +0 -0
  45. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/avro.rs +0 -0
  46. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/csv.rs +0 -0
  47. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
  48. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/json.rs +0 -0
  49. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/json_selector.rs +0 -0
  50. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/mod.rs +0 -0
  51. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/orc.rs +0 -0
  52. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/parquet.rs +0 -0
  53. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xlsx.rs +0 -0
  54. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml.rs +0 -0
  55. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
  56. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
  57. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
  58. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
  59. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
  60. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
  61. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
  62. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
  63. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/mod.rs +0 -0
  64. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/object_store.rs +0 -0
  65. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
  66. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
  67. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
  68. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
  69. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
  70. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
  71. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
  72. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
  73. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/providers/s3.rs +0 -0
  74. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/storage/target.rs +0 -0
  75. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
  76. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/accepted.rs +0 -0
  77. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
  78. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/csv.rs +0 -0
  79. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
  80. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/options.rs +0 -0
  81. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
  82. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
  83. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
  84. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/delta.rs +0 -0
  85. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
  86. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
  87. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
  88. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
  89. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
  90. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/iceberg.rs +0 -0
  91. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/metrics.rs +0 -0
  92. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/mod.rs +0 -0
  93. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/parts.rs +0 -0
  94. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/sink_format.rs +0 -0
  95. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
  96. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
  97. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
  98. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
  99. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
  100. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
  101. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
  102. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/lib.rs +0 -0
  103. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/lineage/mod.rs +0 -0
  104. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/log.rs +0 -0
  105. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/mod.rs +0 -0
  106. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
  107. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/mod.rs +0 -0
  108. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/profile/validate.rs +0 -0
  109. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/build.rs +0 -0
  110. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/entity.rs +0 -0
  111. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/mod.rs +0 -0
  112. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/report/output.rs +0 -0
  113. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/context.rs +0 -0
  114. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/incremental.rs +0 -0
  115. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/pii.rs +0 -0
  116. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/precheck.rs +0 -0
  117. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/process.rs +0 -0
  118. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/entity/resolve.rs +0 -0
  119. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/events.rs +0 -0
  120. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/file.rs +0 -0
  121. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/mod.rs +0 -0
  122. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/output.rs +0 -0
  123. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/run/perf.rs +0 -0
  124. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runner/mod.rs +0 -0
  125. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runner/outcome.rs +0 -0
  126. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/runtime.rs +0 -0
  127. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/state/mod.rs +0 -0
  128. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/vars/mod.rs +0 -0
  129. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/vars/resolve.rs +0 -0
  130. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/src/warnings.rs +0 -0
  131. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
  132. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/delta_run.rs +0 -0
  133. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/dry_run.rs +0 -0
  134. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
  135. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
  136. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
  137. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
  138. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
  139. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
  140. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/mod.rs +0 -0
  141. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
  142. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
  143. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/integration.rs +0 -0
  144. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/common.rs +0 -0
  145. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
  146. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
  147. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/adls_validation.rs +0 -0
  148. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/catalogs.rs +0 -0
  149. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
  150. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
  151. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
  152. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/lineage_validation.rs +0 -0
  153. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
  154. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/mod.rs +0 -0
  155. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/parse.rs +0 -0
  156. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
  157. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
  158. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/storage_resolver_uri.rs +0 -0
  159. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/config/templating.rs +0 -0
  160. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/mod.rs +0 -0
  161. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
  162. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
  163. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
  164. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
  165. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
  166. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
  167. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
  168. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
  169. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
  170. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
  171. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
  172. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
  173. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
  174. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
  175. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
  176. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
  177. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
  178. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
  179. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
  180. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
  181. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
  182. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
  183. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
  184. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
  185. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
  186. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
  187. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
  188. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/object_store.rs +0 -0
  189. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
  190. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
  191. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
  192. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/mod.rs +0 -0
  193. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
  194. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
  195. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
  196. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/mod.rs +0 -0
  197. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/report/storage.rs +0 -0
  198. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
  199. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/checks.rs +0 -0
  200. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/incremental.rs +0 -0
  201. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
  202. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/lineage.rs +0 -0
  203. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/mod.rs +0 -0
  204. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
  205. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/pii.rs +0 -0
  206. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/report.rs +0 -0
  207. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
  208. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
  209. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
  210. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/state/mod.rs +0 -0
  211. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
  212. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
  213. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-core/tests/unit.rs +0 -0
  214. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/.gitignore +0 -0
  215. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/README.md +0 -0
  216. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/functions.rs +0 -0
  217. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/lib.rs +0 -0
  218. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/observer.rs +0 -0
  219. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/config.rs +0 -0
  220. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/errors.rs +0 -0
  221. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/mod.rs +0 -0
  222. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/src/types/outcome.rs +0 -0
  223. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/config.yml +0 -0
  224. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
  225. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
  226. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/fixtures/profile.yml +0 -0
  227. {floe_python-0.4.5 → floe_python-0.4.6}/crates/floe-python/tests/test_floe.py +0 -0
  228. {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/__init__.py +0 -0
  229. {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/_floe.pyi +0 -0
  230. {floe_python-0.4.5 → floe_python-0.4.6}/python/floe/py.typed +0 -0
@@ -3399,7 +3399,7 @@ dependencies = [
3399
3399
 
3400
3400
  [[package]]
3401
3401
  name = "floe-cli"
3402
- version = "0.4.5"
3402
+ version = "0.4.6"
3403
3403
  dependencies = [
3404
3404
  "assert_cmd",
3405
3405
  "clap",
@@ -3412,7 +3412,7 @@ dependencies = [
3412
3412
 
3413
3413
  [[package]]
3414
3414
  name = "floe-core"
3415
- version = "0.4.5"
3415
+ version = "0.4.6"
3416
3416
  dependencies = [
3417
3417
  "apache-avro 0.16.0",
3418
3418
  "arrow",
@@ -3455,7 +3455,7 @@ dependencies = [
3455
3455
 
3456
3456
  [[package]]
3457
3457
  name = "floe-python"
3458
- version = "0.4.5"
3458
+ version = "0.4.6"
3459
3459
  dependencies = [
3460
3460
  "floe-core",
3461
3461
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: floe-python
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "floe-core"
3
- version = "0.4.5"
3
+ version = "0.4.6"
4
4
  edition = "2021"
5
5
  description = "Core library for Floe, a YAML-driven technical ingestion tool."
6
6
  license = "MIT"
@@ -14,7 +14,7 @@ path = "src/lib.rs"
14
14
 
15
15
  [dependencies]
16
16
  yaml-rust2 = "0.11"
17
- polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
17
+ polars = { version = "0.52.0", features = ["csv", "parquet", "lazy", "new_streaming", "timezones", "dtype-date", "dtype-datetime", "dtype-time", "polars-ops", "is_unique", "is_first_distinct"] }
18
18
  calamine = "0.24"
19
19
  rayon = "1"
20
20
  deltalake = { version = "0.30.1", features = ["datafusion", "s3", "azure", "gcs"] }
@@ -119,6 +119,182 @@ pub struct AcceptedWriteOutput {
119
119
  pub perf: Option<AcceptedWritePerfBreakdown>,
120
120
  }
121
121
 
122
+ /// Per-write sinks cap their reported `part_files` list at this many entries
123
+ /// (see `parquet.rs`). The reducer applies the same cap across flushes so
124
+ /// the run report does not grow to N × 50 entries for high-fanout entities.
125
+ pub const MAX_REPORTED_PART_FILES: usize = 50;
126
+
127
+ impl AcceptedWriteOutput {
128
+ /// Fold a later flush's output into this one. The receiver represents the
129
+ /// running total across N completed flushes; `next` is the output of the
130
+ /// (N+1)th flush.
131
+ ///
132
+ /// Field semantics across flushes:
133
+ /// - `parts_written` (always known, the count of successful sink writes)
134
+ /// sums.
135
+ /// - `files_written` and the `Option<u64>` metric fields
136
+ /// (`total_bytes_written`, `small_files_count`, perf entries) sum
137
+ /// when *both* sides are `Some`; if either side is `None` the merged
138
+ /// result is `None`. "Unknown poisons" matches the per-flush
139
+ /// semantics: when any single flush could not determine its file
140
+ /// count (for example a remote Delta commit whose post-commit log
141
+ /// could not be read), reporting a partial sum would silently
142
+ /// under-count the total. The run report instead surfaces the value
143
+ /// as unknown.
144
+ /// - `part_files` concatenates and is capped at `MAX_REPORTED_PART_FILES`
145
+ /// so the reducer preserves the same cap the individual sink writers
146
+ /// apply per-flush.
147
+ /// - `table_version` / `snapshot_id` take the latest (Delta commit /
148
+ /// Iceberg snapshot move forward with every commit; the final state
149
+ /// is what readers see).
150
+ /// - `table_root_uri`, `catalog`, `schema_evolution` take the first
151
+ /// non-default value seen — table location and catalog registration
152
+ /// are established by the first write; schema evolution only fires on
153
+ /// the first (Overwrite) write because subsequent flushes are Append.
154
+ /// - `avg_file_size_mb` is recomputed from `total_bytes_written` divided
155
+ /// by `files_written` when available (so it matches the per-flush
156
+ /// semantics: for Parquet/Iceberg `files == parts`, but for Delta one
157
+ /// commit can write multiple `add` files and `parts != files`).
158
+ /// Falls back to `parts_written` when `files_written` is unknown.
159
+ /// - `perf` accumulates by summing each `Option<u64>` field.
160
+ /// - `merge` is unreachable in the buffered path (merge modes use the
161
+ /// legacy accumulate-then-write code path); the running value is
162
+ /// preserved if anything ever does pass one.
163
+ pub fn merge_in(&mut self, next: AcceptedWriteOutput) {
164
+ let AcceptedWriteOutput {
165
+ files_written,
166
+ parts_written,
167
+ part_files,
168
+ table_version,
169
+ snapshot_id,
170
+ table_root_uri,
171
+ catalog,
172
+ metrics,
173
+ merge,
174
+ schema_evolution,
175
+ perf,
176
+ } = next;
177
+
178
+ // `parts_written == 0` on the receiver means no prior flush has been
179
+ // merged. In that case `Option<u64>` fields on `self` start at `None`
180
+ // not because a flush returned unknown but because nothing has been
181
+ // recorded yet — distinguishing "vacuous" from "poisoned" matters
182
+ // because adopting the next flush's value verbatim on the first merge
183
+ // is correct, while applying poison-on-unknown semantics from `None`
184
+ // would always poison the very first merge.
185
+ let first_merge = self.parts_written == 0;
186
+
187
+ self.files_written = merge_option_u64(self.files_written, files_written, first_merge);
188
+ self.parts_written += parts_written;
189
+ let remaining = MAX_REPORTED_PART_FILES.saturating_sub(self.part_files.len());
190
+ if remaining > 0 {
191
+ self.part_files
192
+ .extend(part_files.into_iter().take(remaining));
193
+ }
194
+
195
+ if table_version.is_some() {
196
+ self.table_version = table_version;
197
+ }
198
+ if snapshot_id.is_some() {
199
+ self.snapshot_id = snapshot_id;
200
+ }
201
+
202
+ if self.table_root_uri.is_none() {
203
+ self.table_root_uri = table_root_uri;
204
+ }
205
+ if self.catalog.is_none() {
206
+ self.catalog = catalog;
207
+ }
208
+ if !self.schema_evolution.enabled
209
+ && !self.schema_evolution.applied
210
+ && self.schema_evolution.added_columns.is_empty()
211
+ && !self.schema_evolution.incompatible_changes_detected
212
+ && self.schema_evolution.mode.is_empty()
213
+ {
214
+ self.schema_evolution = schema_evolution;
215
+ }
216
+
217
+ self.metrics.total_bytes_written = merge_option_u64(
218
+ self.metrics.total_bytes_written,
219
+ metrics.total_bytes_written,
220
+ first_merge,
221
+ );
222
+ self.metrics.small_files_count = merge_option_u64(
223
+ self.metrics.small_files_count,
224
+ metrics.small_files_count,
225
+ first_merge,
226
+ );
227
+ self.metrics.avg_file_size_mb = recompute_avg_file_size_mb(
228
+ self.metrics.total_bytes_written,
229
+ self.files_written,
230
+ self.parts_written,
231
+ );
232
+
233
+ if self.merge.is_none() {
234
+ self.merge = merge;
235
+ }
236
+
237
+ match (self.perf.take(), perf) {
238
+ (Some(a), Some(b)) => self.perf = Some(sum_perf_breakdown(a, b)),
239
+ (Some(a), None) => self.perf = Some(a),
240
+ (None, Some(b)) => self.perf = Some(b),
241
+ (None, None) => self.perf = None,
242
+ }
243
+ }
244
+ }
245
+
246
+ /// Sum two `Option<u64>` values with poison-on-unknown semantics: if either
247
+ /// side is `None`, the result is `None`. Reporting a partial sum as if it
248
+ /// were the total would silently under-count for any aggregation across
249
+ /// flushes where one flush could not determine the underlying count
250
+ /// (e.g. remote Delta commit-log read failures).
251
+ fn sum_option_u64(a: Option<u64>, b: Option<u64>) -> Option<u64> {
252
+ match (a, b) {
253
+ (Some(a), Some(b)) => Some(a + b),
254
+ _ => None,
255
+ }
256
+ }
257
+
258
+ /// Progressive `Option<u64>` merge used by `merge_in`. On the first merge
259
+ /// (when the accumulator has no flush recorded yet) the next flush's value is
260
+ /// taken verbatim; on subsequent merges `sum_option_u64`'s poison-on-unknown
261
+ /// semantics apply.
262
+ fn merge_option_u64(acc: Option<u64>, next: Option<u64>, first_merge: bool) -> Option<u64> {
263
+ if first_merge {
264
+ next
265
+ } else {
266
+ sum_option_u64(acc, next)
267
+ }
268
+ }
269
+
270
+ fn recompute_avg_file_size_mb(
271
+ total_bytes: Option<u64>,
272
+ files_written: Option<u64>,
273
+ parts_written: u64,
274
+ ) -> Option<f64> {
275
+ let bytes = total_bytes?;
276
+ let denominator = files_written.unwrap_or(parts_written);
277
+ if denominator == 0 {
278
+ return None;
279
+ }
280
+ let mb = (bytes as f64) / (denominator as f64) / (1024.0 * 1024.0);
281
+ Some(mb)
282
+ }
283
+
284
+ fn sum_perf_breakdown(
285
+ a: AcceptedWritePerfBreakdown,
286
+ b: AcceptedWritePerfBreakdown,
287
+ ) -> AcceptedWritePerfBreakdown {
288
+ AcceptedWritePerfBreakdown {
289
+ conversion_ms: sum_option_u64(a.conversion_ms, b.conversion_ms),
290
+ source_df_build_ms: sum_option_u64(a.source_df_build_ms, b.source_df_build_ms),
291
+ merge_exec_ms: sum_option_u64(a.merge_exec_ms, b.merge_exec_ms),
292
+ data_write_ms: sum_option_u64(a.data_write_ms, b.data_write_ms),
293
+ commit_ms: sum_option_u64(a.commit_ms, b.commit_ms),
294
+ metrics_read_ms: sum_option_u64(a.metrics_read_ms, b.metrics_read_ms),
295
+ }
296
+ }
297
+
122
298
  pub trait InputAdapter: Send + Sync {
123
299
  fn format(&self) -> &'static str;
124
300
 
@@ -45,6 +45,7 @@ pub(crate) async fn build_rest_catalog(
45
45
  }
46
46
 
47
47
  if let Some(credential) = rest_cfg.credential.as_deref() {
48
+ let credential = expand_env_refs(credential, &rest_cfg.catalog_name)?;
48
49
  if let Some(token_value) = credential.strip_prefix("token:") {
49
50
  // Bearer PAT (Unity Catalog / Nessie)
50
51
  props.insert("token".to_string(), token_value.to_string());
@@ -140,6 +141,39 @@ impl RestIcebergCatalogConfig {
140
141
  }
141
142
  }
142
143
 
144
+ fn expand_env_refs(value: &str, catalog_name: &str) -> FloeResult<String> {
145
+ if !value.contains("${") {
146
+ return Ok(value.to_string());
147
+ }
148
+
149
+ let mut parts = Vec::new();
150
+ for part in value.split(':') {
151
+ parts.push(expand_env_ref_part(part, catalog_name)?);
152
+ }
153
+ Ok(parts.join(":"))
154
+ }
155
+
156
+ fn expand_env_ref_part(part: &str, catalog_name: &str) -> FloeResult<String> {
157
+ let Some(inner) = part.strip_prefix("${") else {
158
+ return Ok(part.to_string());
159
+ };
160
+ let Some(name) = inner.strip_suffix('}') else {
161
+ return Err(Box::new(RunError(format!(
162
+ "rest iceberg catalog {catalog_name} credential has unclosed env placeholder"
163
+ ))));
164
+ };
165
+ if name.is_empty() || name.contains('{') || name.contains('}') {
166
+ return Err(Box::new(RunError(format!(
167
+ "rest iceberg catalog {catalog_name} credential has invalid env placeholder"
168
+ ))));
169
+ }
170
+ std::env::var(name).map_err(|_| {
171
+ Box::new(RunError(format!(
172
+ "rest iceberg catalog {catalog_name} credential references env var {name} which is not set"
173
+ ))) as Box<dyn std::error::Error + Send + Sync>
174
+ })
175
+ }
176
+
143
177
  pub(crate) async fn write_via_rest_catalog(
144
178
  rest_cfg: &RestIcebergCatalogConfig,
145
179
  table_root_uri: String,
@@ -342,3 +376,77 @@ async fn create_rest_table(
342
376
  .await
343
377
  .map_err(map_iceberg_err("rest catalog create_table failed"))
344
378
  }
379
+
380
+ #[cfg(test)]
381
+ mod tests {
382
+ use super::expand_env_refs;
383
+
384
+ #[test]
385
+ fn expands_partial_env_refs_in_client_credentials() {
386
+ std::env::set_var("FLOE_TEST_REST_CLIENT_ID", "client-id");
387
+ std::env::set_var("FLOE_TEST_REST_CLIENT_SECRET", "client-secret");
388
+
389
+ let expanded = expand_env_refs(
390
+ "client_credentials:${FLOE_TEST_REST_CLIENT_ID}:${FLOE_TEST_REST_CLIENT_SECRET}",
391
+ "polaris",
392
+ )
393
+ .expect("expand credential");
394
+
395
+ assert_eq!(expanded, "client_credentials:client-id:client-secret");
396
+ std::env::remove_var("FLOE_TEST_REST_CLIENT_ID");
397
+ std::env::remove_var("FLOE_TEST_REST_CLIENT_SECRET");
398
+ }
399
+
400
+ #[test]
401
+ fn expands_exact_env_ref_in_token_credential() {
402
+ std::env::set_var("FLOE_TEST_REST_TOKEN", "pat-token");
403
+
404
+ let expanded =
405
+ expand_env_refs("token:${FLOE_TEST_REST_TOKEN}", "nessie").expect("expand token");
406
+
407
+ assert_eq!(expanded, "token:pat-token");
408
+ std::env::remove_var("FLOE_TEST_REST_TOKEN");
409
+ }
410
+
411
+ #[test]
412
+ fn preserves_literal_credential_text_that_contains_env_ref_syntax() {
413
+ let expanded =
414
+ expand_env_refs("token:abc${def}ghi", "nessie").expect("preserve literal credential");
415
+
416
+ assert_eq!(expanded, "token:abc${def}ghi");
417
+ }
418
+
419
+ #[test]
420
+ fn errors_when_env_ref_is_missing() {
421
+ std::env::remove_var("FLOE_TEST_REST_MISSING");
422
+
423
+ let err = expand_env_refs(
424
+ "client_credentials:${FLOE_TEST_REST_MISSING}:secret",
425
+ "polaris",
426
+ )
427
+ .unwrap_err();
428
+
429
+ assert_eq!(
430
+ err.to_string(),
431
+ "rest iceberg catalog polaris credential references env var FLOE_TEST_REST_MISSING which is not set"
432
+ );
433
+ }
434
+
435
+ #[test]
436
+ fn errors_on_malformed_env_ref() {
437
+ std::env::set_var("ID", "client-id");
438
+
439
+ let err = expand_env_refs(
440
+ "client_credentials:${ID}:literal-secret:${UNCLOSED",
441
+ "polaris",
442
+ )
443
+ .unwrap_err();
444
+
445
+ assert_eq!(
446
+ err.to_string(),
447
+ "rest iceberg catalog polaris credential has unclosed env placeholder"
448
+ );
449
+ assert!(!err.to_string().contains("literal-secret"));
450
+ std::env::remove_var("ID");
451
+ }
452
+ }
@@ -1,6 +1,10 @@
1
1
  use std::path::Path;
2
2
 
3
- use polars::prelude::{DataFrame, ParquetCompression, ParquetWriter};
3
+ use polars::polars_utils::plpath::PlPathRef;
4
+ use polars::prelude::{
5
+ DataFrame, IntoLazy, ParquetCompression, ParquetWriteOptions, SinkOptions as PolarsSinkOptions,
6
+ SinkTarget,
7
+ };
4
8
 
5
9
  use crate::checks::normalize::rename_output_columns;
6
10
  use crate::errors::{IoError, StorageError};
@@ -46,11 +50,32 @@ pub fn write_parquet_to_path(
46
50
  if let Some(parent) = output_path.parent() {
47
51
  std::fs::create_dir_all(parent)?;
48
52
  }
49
- let file = std::fs::File::create(output_path)?;
50
- let mut writer = ParquetWriter::new(file);
53
+ let write_options = build_parquet_write_options(options)?;
54
+ let sink_options = PolarsSinkOptions {
55
+ mkdir: false,
56
+ ..PolarsSinkOptions::default()
57
+ };
58
+ let target = SinkTarget::Path(PlPathRef::from_local_path(output_path).into_owned());
59
+ // The outer chunking loop in `ParquetSinkFormat::write` discards each
60
+ // chunk after the write, so taking the DataFrame here is safe. Using
61
+ // `std::mem::take` lets us hand ownership to `LazyFrame` without an
62
+ // extra clone of the Arrow buffers.
63
+ let frame = std::mem::take(df);
64
+ frame
65
+ .lazy()
66
+ .sink_parquet(target, write_options, None, sink_options)
67
+ .and_then(|lf| lf.with_new_streaming(true).collect())
68
+ .map_err(|err| Box::new(IoError(format!("parquet write failed: {err}"))))?;
69
+ Ok(())
70
+ }
71
+
72
+ fn build_parquet_write_options(
73
+ options: Option<&config::SinkOptions>,
74
+ ) -> FloeResult<ParquetWriteOptions> {
75
+ let mut write_options = ParquetWriteOptions::default();
51
76
  if let Some(options) = options {
52
77
  if let Some(compression) = &options.compression {
53
- writer = writer.with_compression(parse_parquet_compression(compression)?);
78
+ write_options.compression = parse_parquet_compression(compression)?;
54
79
  }
55
80
  if let Some(row_group_size) = options.row_group_size {
56
81
  let row_group_size = usize::try_from(row_group_size).map_err(|_| {
@@ -58,13 +83,10 @@ pub fn write_parquet_to_path(
58
83
  "parquet row_group_size is too large: {row_group_size}"
59
84
  )))
60
85
  })?;
61
- writer = writer.with_row_group_size(Some(row_group_size));
86
+ write_options.row_group_size = Some(row_group_size);
62
87
  }
63
88
  }
64
- writer
65
- .finish(df)
66
- .map_err(|err| Box::new(IoError(format!("parquet write failed: {err}"))))?;
67
- Ok(())
89
+ Ok(write_options)
68
90
  }
69
91
 
70
92
  impl SinkFormat for ParquetSinkFormat {
@@ -6,9 +6,9 @@ use sha2::{Digest, Sha256};
6
6
  use crate::config::{ConfigLocation, RootConfig, SourceOptions, StorageResolver};
7
7
  use crate::manifest::model::{
8
8
  CommonManifest, ManifestArchiveTarget, ManifestColumnDef, ManifestDomain, ManifestEntity,
9
- ManifestEntitySchema, ManifestExecution, ManifestExecutionDefaults, ManifestResultContract,
10
- ManifestRunnerAuth, ManifestRunnerDefinition, ManifestRunnerResources, ManifestRunnerSecret,
11
- ManifestRunners, ManifestSinkTarget, ManifestSinks, ManifestSource,
9
+ ManifestEntitySchema, ManifestExecution, ManifestExecutionDefaults, ManifestOrchestration,
10
+ ManifestResultContract, ManifestRunnerAuth, ManifestRunnerDefinition, ManifestRunnerResources,
11
+ ManifestRunnerSecret, ManifestRunners, ManifestSinkTarget, ManifestSinks, ManifestSource,
12
12
  };
13
13
  use crate::profile::ProfileConfig;
14
14
  use crate::FloeResult;
@@ -415,7 +415,7 @@ fn build_common_manifest(
415
415
  .unwrap_or_else(|| domain.incoming_dir.clone()),
416
416
  })
417
417
  .collect(),
418
- execution: default_execution_contract(options),
418
+ execution: default_execution_contract(options, profile),
419
419
  runners: runners_contract(profile),
420
420
  entities: manifest_entities,
421
421
  storages,
@@ -525,7 +525,10 @@ fn map_source_options(options: Option<&SourceOptions>) -> Option<serde_json::Val
525
525
  Some(serde_json::Value::Object(map))
526
526
  }
527
527
 
528
- fn default_execution_contract(options: &ManifestOptions) -> ManifestExecution {
528
+ fn default_execution_contract(
529
+ options: &ManifestOptions,
530
+ profile: Option<&ProfileConfig>,
531
+ ) -> ManifestExecution {
529
532
  let mut exit_codes = BTreeMap::new();
530
533
  exit_codes.insert("0", "success_or_rejected");
531
534
  exit_codes.insert("1", "technical_failure");
@@ -554,6 +557,14 @@ fn default_execution_contract(options: &ManifestOptions) -> ManifestExecution {
554
557
  })
555
558
  .collect();
556
559
 
560
+ let orchestration = profile
561
+ .and_then(|p| p.execution.as_ref())
562
+ .and_then(|e| e.orchestration.as_ref())
563
+ .map(|o| ManifestOrchestration {
564
+ max_concurrent_entities: o.max_concurrent_entities,
565
+ strategy: o.strategy.clone(),
566
+ });
567
+
557
568
  ManifestExecution {
558
569
  entrypoint: "floe",
559
570
  base_args,
@@ -568,6 +579,7 @@ fn default_execution_contract(options: &ManifestOptions) -> ManifestExecution {
568
579
  env: BTreeMap::new(),
569
580
  workdir: None,
570
581
  },
582
+ orchestration,
571
583
  }
572
584
  }
573
585
 
@@ -49,6 +49,16 @@ pub struct ManifestExecution {
49
49
  pub log_format: &'static str,
50
50
  pub result_contract: ManifestResultContract,
51
51
  pub defaults: ManifestExecutionDefaults,
52
+ #[serde(skip_serializing_if = "Option::is_none")]
53
+ pub orchestration: Option<ManifestOrchestration>,
54
+ }
55
+
56
+ #[derive(Debug, Serialize)]
57
+ pub struct ManifestOrchestration {
58
+ #[serde(skip_serializing_if = "Option::is_none")]
59
+ pub max_concurrent_entities: Option<u64>,
60
+ #[serde(skip_serializing_if = "Option::is_none")]
61
+ pub strategy: Option<String>,
52
62
  }
53
63
 
54
64
  #[derive(Debug, Serialize)]
@@ -8,9 +8,9 @@ use crate::config::yaml_decode::{
8
8
  hash_get, load_yaml, validate_known_keys, yaml_array, yaml_hash, yaml_string,
9
9
  };
10
10
  use crate::profile::types::{
11
- ProfileConfig, ProfileExecution, ProfileMetadata, ProfileRunner, ProfileRunnerAuth,
12
- ProfileRunnerResources, ProfileRunnerSecret, ProfileValidation, PROFILE_API_VERSION,
13
- PROFILE_KIND,
11
+ ProfileConfig, ProfileExecution, ProfileMetadata, ProfileOrchestration, ProfileRunner,
12
+ ProfileRunnerAuth, ProfileRunnerResources, ProfileRunnerSecret, ProfileValidation,
13
+ PROFILE_API_VERSION, PROFILE_KIND,
14
14
  };
15
15
  use crate::{ConfigError, FloeResult};
16
16
 
@@ -162,7 +162,7 @@ fn parse_metadata(value: &Yaml) -> FloeResult<ProfileMetadata> {
162
162
 
163
163
  fn parse_execution(value: &Yaml) -> FloeResult<ProfileExecution> {
164
164
  let hash = yaml_hash(value, "profile.execution")?;
165
- validate_known_keys(hash, "profile.execution", &["runner"])?;
165
+ validate_known_keys(hash, "profile.execution", &["runner", "orchestration"])?;
166
166
 
167
167
  let runner_yaml = hash_get(hash, "runner").ok_or_else(|| {
168
168
  Box::new(ConfigError(
@@ -171,7 +171,51 @@ fn parse_execution(value: &Yaml) -> FloeResult<ProfileExecution> {
171
171
  })?;
172
172
  let runner = parse_runner(runner_yaml)?;
173
173
 
174
- Ok(ProfileExecution { runner })
174
+ let orchestration = match hash_get(hash, "orchestration") {
175
+ Some(value) => Some(parse_orchestration(value)?),
176
+ None => None,
177
+ };
178
+
179
+ Ok(ProfileExecution {
180
+ runner,
181
+ orchestration,
182
+ })
183
+ }
184
+
185
+ fn parse_orchestration(value: &Yaml) -> FloeResult<ProfileOrchestration> {
186
+ let hash = yaml_hash(value, "profile.execution.orchestration")?;
187
+ validate_known_keys(
188
+ hash,
189
+ "profile.execution.orchestration",
190
+ &["max_concurrent_entities", "strategy"],
191
+ )?;
192
+
193
+ let max_concurrent_entities = get_optional_u64(
194
+ hash,
195
+ "max_concurrent_entities",
196
+ "profile.execution.orchestration",
197
+ )?;
198
+
199
+ if let Some(0) = max_concurrent_entities {
200
+ return Err(Box::new(ConfigError(
201
+ "profile.execution.orchestration.max_concurrent_entities: must be >= 1".to_string(),
202
+ )));
203
+ }
204
+
205
+ let strategy = get_optional_string(hash, "strategy", "profile.execution.orchestration")?;
206
+
207
+ if let Some(ref s) = strategy {
208
+ if s != "sequential" && s != "parallel" {
209
+ return Err(Box::new(ConfigError(format!(
210
+ "profile.execution.orchestration.strategy: expected \"sequential\" or \"parallel\", got \"{s}\""
211
+ ))));
212
+ }
213
+ }
214
+
215
+ Ok(ProfileOrchestration {
216
+ max_concurrent_entities,
217
+ strategy,
218
+ })
175
219
  }
176
220
 
177
221
  fn parse_runner(value: &Yaml) -> FloeResult<ProfileRunner> {
@@ -27,6 +27,14 @@ pub struct ProfileMetadata {
27
27
  #[derive(Debug, Clone)]
28
28
  pub struct ProfileExecution {
29
29
  pub runner: ProfileRunner,
30
+ pub orchestration: Option<ProfileOrchestration>,
31
+ }
32
+
33
+ #[derive(Debug, Clone)]
34
+ pub struct ProfileOrchestration {
35
+ pub max_concurrent_entities: Option<u64>,
36
+ /// "sequential" | "parallel"
37
+ pub strategy: Option<String>,
30
38
  }
31
39
 
32
40
  #[derive(Debug, Clone)]