floe-python 0.3.10__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {floe_python-0.3.10 → floe_python-0.4.0}/Cargo.lock +56 -3
  2. {floe_python-0.3.10 → floe_python-0.4.0}/PKG-INFO +1 -1
  3. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/Cargo.toml +3 -1
  4. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/mismatch.rs +4 -3
  5. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/parse.rs +37 -6
  6. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/storage.rs +1 -0
  7. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/types.rs +27 -38
  8. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/validate.rs +39 -51
  9. floe_python-0.4.0/crates/floe-core/src/errors.rs +27 -0
  10. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/format.rs +33 -36
  11. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/mod.rs +42 -0
  12. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/inputs.rs +5 -1
  13. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/adls.rs +121 -1
  14. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/gcs.rs +123 -1
  15. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/local.rs +110 -1
  16. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/s3.rs +113 -1
  17. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/unique_seed/mod.rs +15 -41
  18. floe_python-0.4.0/crates/floe-core/src/io/write/accepted.rs +1 -0
  19. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/unity.rs +16 -4
  20. floe_python-0.4.0/crates/floe-core/src/io/write/delta/unity_tests.rs +209 -0
  21. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta.rs +73 -30
  22. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/rest.rs +6 -4
  23. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg.rs +280 -44
  24. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/mod.rs +1 -0
  25. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/parquet.rs +93 -30
  26. floe_python-0.4.0/crates/floe-core/src/io/write/sink_format.rs +66 -0
  27. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +2 -1
  28. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +15 -15
  29. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/shared.rs +55 -112
  30. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/mod.rs +1 -1
  31. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/lineage/mod.rs +85 -20
  32. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/builder.rs +24 -7
  33. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/model.rs +8 -1
  34. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/parse.rs +66 -2
  35. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/types.rs +21 -1
  36. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/validate.rs +24 -0
  37. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/entity.rs +35 -14
  38. floe_python-0.4.0/crates/floe-core/src/run/entity/accepted_write.rs +156 -0
  39. floe_python-0.4.0/crates/floe-core/src/run/entity/incremental.rs +281 -0
  40. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/mod.rs +53 -52
  41. floe_python-0.4.0/crates/floe-core/src/run/entity/pii.rs +446 -0
  42. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/precheck.rs +3 -2
  43. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/validate_split.rs +12 -16
  44. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/mod.rs +40 -37
  45. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/output.rs +13 -16
  46. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runtime.rs +5 -4
  47. floe_python-0.4.0/crates/floe-core/src/state/mod.rs +730 -0
  48. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/delta_run.rs +2 -2
  49. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/add_entity.rs +2 -1
  50. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/adls_validation.rs +1 -1
  51. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/catalogs.rs +2 -2
  52. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/config_validation.rs +4 -4
  53. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/lineage_validation.rs +57 -0
  54. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/parse.rs +4 -4
  55. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/inputs.rs +1 -1
  56. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/local.rs +48 -1
  57. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/delta_write.rs +1 -1
  58. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +1 -1
  59. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/object_store.rs +8 -8
  60. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +1 -1
  61. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/manifest/mod.rs +22 -3
  62. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/parse.rs +28 -4
  63. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/validate.rs +36 -1
  64. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/incremental.rs +39 -0
  65. floe_python-0.4.0/crates/floe-core/tests/unit/run/lineage.rs +210 -0
  66. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/mod.rs +1 -0
  67. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/state/mod.rs +136 -77
  68. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/Cargo.toml +2 -2
  69. {floe_python-0.3.10 → floe_python-0.4.0}/pyproject.toml +1 -1
  70. floe_python-0.3.10/crates/floe-core/src/errors.rs +0 -57
  71. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/delta.rs +0 -47
  72. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/iceberg.rs +0 -299
  73. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/parquet.rs +0 -80
  74. floe_python-0.3.10/crates/floe-core/src/io/write/accepted.rs +0 -56
  75. floe_python-0.3.10/crates/floe-core/src/run/entity/accepted_write.rs +0 -242
  76. floe_python-0.3.10/crates/floe-core/src/run/entity/incremental.rs +0 -177
  77. floe_python-0.3.10/crates/floe-core/src/run/entity/pii.rs +0 -182
  78. floe_python-0.3.10/crates/floe-core/src/state/mod.rs +0 -356
  79. {floe_python-0.3.10 → floe_python-0.4.0}/Cargo.toml +0 -0
  80. {floe_python-0.3.10 → floe_python-0.4.0}/README.md +0 -0
  81. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/README.md +0 -0
  82. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/add_entity.rs +0 -0
  83. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/cast.rs +0 -0
  84. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/mod.rs +0 -0
  85. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/normalize.rs +0 -0
  86. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/not_null.rs +0 -0
  87. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/checks/unique.rs +0 -0
  88. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/catalog.rs +0 -0
  89. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/location.rs +0 -0
  90. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/mod.rs +0 -0
  91. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/template.rs +0 -0
  92. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/config/yaml_decode.rs +0 -0
  93. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/mod.rs +0 -0
  94. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/avro.rs +0 -0
  95. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/csv.rs +0 -0
  96. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
  97. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/json.rs +0 -0
  98. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/json_selector.rs +0 -0
  99. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/mod.rs +0 -0
  100. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/orc.rs +0 -0
  101. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/parquet.rs +0 -0
  102. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xlsx.rs +0 -0
  103. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xml.rs +0 -0
  104. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
  105. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
  106. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
  107. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
  108. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
  109. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
  110. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
  111. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
  112. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/object_store.rs +0 -0
  113. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
  114. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
  115. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
  116. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
  117. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/storage/target.rs +0 -0
  118. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
  119. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/csv.rs +0 -0
  120. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
  121. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/options.rs +0 -0
  122. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
  123. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
  124. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
  125. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
  126. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
  127. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
  128. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/metrics.rs +0 -0
  129. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/parts.rs +0 -0
  130. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
  131. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
  132. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
  133. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/lib.rs +0 -0
  134. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/log.rs +0 -0
  135. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/manifest/mod.rs +0 -0
  136. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/profile/mod.rs +0 -0
  137. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/build.rs +0 -0
  138. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/mod.rs +0 -0
  139. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/report/output.rs +0 -0
  140. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/context.rs +0 -0
  141. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/process.rs +0 -0
  142. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/entity/resolve.rs +0 -0
  143. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/events.rs +0 -0
  144. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/file.rs +0 -0
  145. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/run/perf.rs +0 -0
  146. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runner/mod.rs +0 -0
  147. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/runner/outcome.rs +0 -0
  148. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/vars/mod.rs +0 -0
  149. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/vars/resolve.rs +0 -0
  150. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/src/warnings.rs +0 -0
  151. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/archive_run.rs +0 -0
  152. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
  153. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/dry_run.rs +0 -0
  154. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
  155. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
  156. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
  157. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
  158. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
  159. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
  160. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/local_run.rs +0 -0
  161. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/mod.rs +0 -0
  162. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
  163. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
  164. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/integration.rs +0 -0
  165. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/common.rs +0 -0
  166. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
  167. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
  168. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
  169. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
  170. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/mod.rs +0 -0
  171. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
  172. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
  173. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/config/templating.rs +0 -0
  174. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/format.rs +0 -0
  175. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/mod.rs +0 -0
  176. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
  177. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
  178. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
  179. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
  180. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
  181. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
  182. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
  183. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
  184. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
  185. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
  186. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
  187. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
  188. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
  189. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
  190. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
  191. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
  192. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
  193. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
  194. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
  195. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
  196. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
  197. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
  198. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
  199. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
  200. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
  201. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/mod.rs +0 -0
  202. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
  203. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
  204. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/mod.rs +0 -0
  205. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/report/storage.rs +0 -0
  206. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
  207. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/checks.rs +0 -0
  208. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +0 -0
  209. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
  210. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
  211. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/pii.rs +0 -0
  212. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/report.rs +0 -0
  213. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
  214. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
  215. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
  216. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
  217. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
  218. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-core/tests/unit.rs +0 -0
  219. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/.gitignore +0 -0
  220. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/README.md +0 -0
  221. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/functions.rs +0 -0
  222. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/lib.rs +0 -0
  223. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/observer.rs +0 -0
  224. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/config.rs +0 -0
  225. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/errors.rs +0 -0
  226. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/mod.rs +0 -0
  227. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/src/types/outcome.rs +0 -0
  228. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/config.yml +0 -0
  229. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
  230. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
  231. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/fixtures/profile.yml +0 -0
  232. {floe_python-0.3.10 → floe_python-0.4.0}/crates/floe-python/tests/test_floe.py +0 -0
  233. {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/__init__.py +0 -0
  234. {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/_floe.pyi +0 -0
  235. {floe_python-0.3.10 → floe_python-0.4.0}/python/floe/py.typed +0 -0
@@ -476,6 +476,16 @@ version = "0.3.2"
476
476
  source = "registry+https://github.com/rust-lang/crates.io-index"
477
477
  checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063"
478
478
 
479
+ [[package]]
480
+ name = "assert-json-diff"
481
+ version = "2.0.2"
482
+ source = "registry+https://github.com/rust-lang/crates.io-index"
483
+ checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
484
+ dependencies = [
485
+ "serde",
486
+ "serde_json",
487
+ ]
488
+
479
489
  [[package]]
480
490
  name = "assert_cmd"
481
491
  version = "2.1.2"
@@ -1708,6 +1718,15 @@ version = "1.0.4"
1708
1718
  source = "registry+https://github.com/rust-lang/crates.io-index"
1709
1719
  checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
1710
1720
 
1721
+ [[package]]
1722
+ name = "colored"
1723
+ version = "3.1.1"
1724
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1725
+ checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
1726
+ dependencies = [
1727
+ "windows-sys 0.61.2",
1728
+ ]
1729
+
1711
1730
  [[package]]
1712
1731
  name = "comfy-table"
1713
1732
  version = "7.2.2"
@@ -3380,7 +3399,7 @@ dependencies = [
3380
3399
 
3381
3400
  [[package]]
3382
3401
  name = "floe-cli"
3383
- version = "0.3.10"
3402
+ version = "0.4.0"
3384
3403
  dependencies = [
3385
3404
  "assert_cmd",
3386
3405
  "clap",
@@ -3393,7 +3412,7 @@ dependencies = [
3393
3412
 
3394
3413
  [[package]]
3395
3414
  name = "floe-core"
3396
- version = "0.3.10"
3415
+ version = "0.4.0"
3397
3416
  dependencies = [
3398
3417
  "apache-avro 0.16.0",
3399
3418
  "arrow",
@@ -3414,6 +3433,7 @@ dependencies = [
3414
3433
  "iceberg",
3415
3434
  "iceberg-catalog-rest",
3416
3435
  "iceberg-storage-opendal",
3436
+ "mockito",
3417
3437
  "orc-rust",
3418
3438
  "polars",
3419
3439
  "rayon",
@@ -3425,6 +3445,7 @@ dependencies = [
3425
3445
  "serde_yaml",
3426
3446
  "sha2",
3427
3447
  "tempfile",
3448
+ "thiserror 1.0.69",
3428
3449
  "time",
3429
3450
  "tokio",
3430
3451
  "url",
@@ -3434,7 +3455,7 @@ dependencies = [
3434
3455
 
3435
3456
  [[package]]
3436
3457
  name = "floe-python"
3437
- version = "0.3.10"
3458
+ version = "0.4.0"
3438
3459
  dependencies = [
3439
3460
  "floe-core",
3440
3461
  "pyo3",
@@ -4046,6 +4067,7 @@ dependencies = [
4046
4067
  "http 1.4.0",
4047
4068
  "http-body 1.0.1",
4048
4069
  "httparse",
4070
+ "httpdate",
4049
4071
  "itoa",
4050
4072
  "pin-project-lite",
4051
4073
  "pin-utils",
@@ -4861,6 +4883,31 @@ dependencies = [
4861
4883
  "windows-sys 0.61.2",
4862
4884
  ]
4863
4885
 
4886
+ [[package]]
4887
+ name = "mockito"
4888
+ version = "1.7.2"
4889
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4890
+ checksum = "90820618712cab19cfc46b274c6c22546a82affcb3c3bdf0f29e3db8e1bb92c0"
4891
+ dependencies = [
4892
+ "assert-json-diff",
4893
+ "bytes",
4894
+ "colored",
4895
+ "futures-core",
4896
+ "http 1.4.0",
4897
+ "http-body 1.0.1",
4898
+ "http-body-util",
4899
+ "hyper 1.8.1",
4900
+ "hyper-util",
4901
+ "log",
4902
+ "pin-project-lite",
4903
+ "rand 0.9.4",
4904
+ "regex",
4905
+ "serde_json",
4906
+ "serde_urlencoded",
4907
+ "similar",
4908
+ "tokio",
4909
+ ]
4910
+
4864
4911
  [[package]]
4865
4912
  name = "moka"
4866
4913
  version = "0.12.13"
@@ -7420,6 +7467,12 @@ version = "0.1.5"
7420
7467
  source = "registry+https://github.com/rust-lang/crates.io-index"
7421
7468
  checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
7422
7469
 
7470
+ [[package]]
7471
+ name = "similar"
7472
+ version = "2.7.0"
7473
+ source = "registry+https://github.com/rust-lang/crates.io-index"
7474
+ checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
7475
+
7423
7476
  [[package]]
7424
7477
  name = "simple_asn1"
7425
7478
  version = "0.6.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: floe-python
3
- Version: 0.3.10
3
+ Version: 0.4.0
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "floe-core"
3
- version = "0.3.10"
3
+ version = "0.4.0"
4
4
  edition = "2021"
5
5
  description = "Core library for Floe, a YAML-driven technical ingestion tool."
6
6
  license = "MIT"
@@ -48,9 +48,11 @@ orc-rust = "0.7.1"
48
48
  reqwest = { version = "0.12", default-features = true, features = ["native-tls-vendored", "json", "blocking"] }
49
49
  sha2 = "0.10"
50
50
  hex = "0.4"
51
+ thiserror = "1"
51
52
 
52
53
  [dev-dependencies]
53
54
  rust_xlsxwriter = "0.67"
55
+ mockito = "1"
54
56
 
55
57
  [features]
56
58
  vendored-openssl = []
@@ -2,6 +2,7 @@ use std::collections::HashMap;
2
2
 
3
3
  use polars::prelude::{DataFrame, DataType, Series};
4
4
 
5
+ use crate::config::PolicySeverity;
5
6
  use crate::errors::RunError;
6
7
  use crate::{config, report, ConfigError, FloeResult};
7
8
 
@@ -124,7 +125,7 @@ pub fn plan_schema_mismatch(
124
125
  let mut warning = None;
125
126
  let rejection_requested = (effective_missing == "reject_file" && !missing.is_empty())
126
127
  || (effective_extra == "reject_file" && !extra.is_empty());
127
- if rejection_requested && entity.policy.severity == "warn" {
128
+ if rejection_requested && entity.policy.severity == PolicySeverity::Warn {
128
129
  warning = Some(format!(
129
130
  "entity.name={} schema mismatch requested reject_file but policy.severity=warn; continuing",
130
131
  entity.name
@@ -139,10 +140,10 @@ pub fn plan_schema_mismatch(
139
140
  if (effective_missing == "reject_file" && !missing.is_empty())
140
141
  || (effective_extra == "reject_file" && !extra.is_empty())
141
142
  {
142
- if entity.policy.severity == "abort" {
143
+ if entity.policy.severity == PolicySeverity::Abort {
143
144
  aborted = true;
144
145
  action = report::MismatchAction::Aborted;
145
- } else if entity.policy.severity == "reject" {
146
+ } else if entity.policy.severity == PolicySeverity::Reject {
146
147
  rejected = true;
147
148
  action = report::MismatchAction::RejectedFile;
148
149
  }
@@ -15,8 +15,8 @@ use crate::config::{
15
15
  DomainConfig, EntityConfig, EntityMetadata, EntityStateConfig, EnvConfig,
16
16
  IcebergPartitionFieldConfig, IcebergSinkTargetConfig, IncrementalMode, LineageConfig,
17
17
  MergeOptionsConfig, MergeScd2OptionsConfig, NormalizeColumnsConfig, PiiColumnConfig, PiiConfig,
18
- PiiStrategy, PolicyConfig, ProjectMetadata, ReportConfig, RootConfig, SchemaConfig,
19
- SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
18
+ PiiStrategy, PolicyConfig, PolicySeverity, ProjectMetadata, ReportConfig, RootConfig,
19
+ SchemaConfig, SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
20
20
  SchemaMismatchConfig, SinkConfig, SinkOptions, SinkTarget, SourceConfig, SourceOptions,
21
21
  StorageDefinition, StoragesConfig, WriteMode,
22
22
  };
@@ -830,9 +830,18 @@ fn parse_archive_target(value: &Yaml) -> FloeResult<ArchiveTarget> {
830
830
  fn parse_policy(value: &Yaml) -> FloeResult<PolicyConfig> {
831
831
  let hash = yaml_hash(value, "policy")?;
832
832
  validate_known_keys(hash, "policy", &["severity"])?;
833
- Ok(PolicyConfig {
834
- severity: get_string(hash, "severity", "policy")?,
835
- })
833
+ let severity_str = get_string(hash, "severity", "policy")?;
834
+ let severity = match severity_str.as_str() {
835
+ "warn" => PolicySeverity::Warn,
836
+ "reject" => PolicySeverity::Reject,
837
+ "abort" => PolicySeverity::Abort,
838
+ other => {
839
+ return Err(Box::new(ConfigError(format!(
840
+ "policy.severity={other} is unsupported (allowed: warn, reject, abort)"
841
+ ))))
842
+ }
843
+ };
844
+ Ok(PolicyConfig { severity })
836
845
  }
837
846
 
838
847
  fn parse_schema(value: &Yaml) -> FloeResult<SchemaConfig> {
@@ -1061,6 +1070,20 @@ fn opt_u64(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u64>> {
1061
1070
  }
1062
1071
  }
1063
1072
 
1073
+ fn opt_u32(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u32>> {
1074
+ match opt_u64(hash, key, ctx)? {
1075
+ None => Ok(None),
1076
+ Some(v) => {
1077
+ if v > u32::MAX as u64 {
1078
+ return Err(Box::new(ConfigError(format!(
1079
+ "value at {ctx}.{key} exceeds maximum allowed value"
1080
+ ))));
1081
+ }
1082
+ Ok(Some(v as u32))
1083
+ }
1084
+ }
1085
+ }
1086
+
1064
1087
  fn parse_pii_config(value: &Yaml) -> FloeResult<PiiConfig> {
1065
1088
  let hash = yaml_hash(value, "pii")?;
1066
1089
  validate_known_keys(hash, "pii", &["columns"])?;
@@ -1111,7 +1134,14 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1111
1134
  validate_known_keys(
1112
1135
  hash,
1113
1136
  "lineage",
1114
- &["url", "api_key", "timeout_secs", "namespace", "producer"],
1137
+ &[
1138
+ "url",
1139
+ "api_key",
1140
+ "timeout_secs",
1141
+ "namespace",
1142
+ "producer",
1143
+ "max_failures",
1144
+ ],
1115
1145
  )?;
1116
1146
  Ok(LineageConfig {
1117
1147
  url: get_string(hash, "url", "lineage")?,
@@ -1119,5 +1149,6 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1119
1149
  timeout_secs: opt_u64(hash, "timeout_secs", "lineage")?,
1120
1150
  namespace: get_string(hash, "namespace", "lineage")?,
1121
1151
  producer: opt_string(hash, "producer", "lineage")?,
1152
+ max_failures: opt_u32(hash, "max_failures", "lineage")?,
1122
1153
  })
1123
1154
  }
@@ -126,6 +126,7 @@ pub struct ResolvedPath {
126
126
  pub local_path: Option<PathBuf>,
127
127
  }
128
128
 
129
+ #[derive(Clone)]
129
130
  pub struct StorageResolver {
130
131
  config_base: ConfigBase,
131
132
  default_name: String,
@@ -3,7 +3,7 @@ use std::path::Path;
3
3
 
4
4
  use polars::polars_utils::pl_str::PlSmallStr;
5
5
  use polars::prelude::{
6
- CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, Schema, TimeUnit,
6
+ CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, TimeUnit,
7
7
  };
8
8
 
9
9
  use crate::{ConfigError, FloeResult};
@@ -28,6 +28,7 @@ pub struct LineageConfig {
28
28
  pub timeout_secs: Option<u64>,
29
29
  pub namespace: String,
30
30
  pub producer: Option<String>,
31
+ pub max_failures: Option<u32>,
31
32
  }
32
33
 
33
34
  #[derive(Debug)]
@@ -261,12 +262,6 @@ pub struct SinkConfig {
261
262
  pub archive: Option<ArchiveTarget>,
262
263
  }
263
264
 
264
- impl SinkConfig {
265
- pub fn resolved_write_mode(&self) -> WriteMode {
266
- self.write_mode
267
- }
268
- }
269
-
270
265
  #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
271
266
  pub enum WriteMode {
272
267
  #[default]
@@ -445,9 +440,33 @@ pub struct ArchiveTarget {
445
440
  pub storage: Option<String>,
446
441
  }
447
442
 
443
+ #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
444
+ pub enum PolicySeverity {
445
+ #[default]
446
+ Warn,
447
+ Reject,
448
+ Abort,
449
+ }
450
+
451
+ impl PolicySeverity {
452
+ pub fn as_str(self) -> &'static str {
453
+ match self {
454
+ Self::Warn => "warn",
455
+ Self::Reject => "reject",
456
+ Self::Abort => "abort",
457
+ }
458
+ }
459
+ }
460
+
461
+ impl std::fmt::Display for PolicySeverity {
462
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
463
+ f.write_str(self.as_str())
464
+ }
465
+ }
466
+
448
467
  #[derive(Debug)]
449
468
  pub struct PolicyConfig {
450
- pub severity: String,
469
+ pub severity: PolicySeverity,
451
470
  }
452
471
 
453
472
  #[derive(Debug)]
@@ -464,36 +483,6 @@ impl SchemaConfig {
464
483
  pub fn resolved_schema_evolution(&self) -> SchemaEvolutionConfig {
465
484
  self.schema_evolution.unwrap_or_default()
466
485
  }
467
-
468
- pub fn to_polars_schema(&self) -> FloeResult<Schema> {
469
- let mut schema = Schema::with_capacity(self.columns.len());
470
- for column in &self.columns {
471
- let dtype = parse_data_type(&column.column_type)?;
472
- if schema.insert(column.name.as_str().into(), dtype).is_some() {
473
- return Err(Box::new(ConfigError(format!(
474
- "duplicate column name in schema: {}",
475
- column.name
476
- ))));
477
- }
478
- }
479
- Ok(schema)
480
- }
481
-
482
- pub fn to_polars_string_schema(&self) -> FloeResult<Schema> {
483
- let mut schema = Schema::with_capacity(self.columns.len());
484
- for column in &self.columns {
485
- if schema
486
- .insert(column.name.as_str().into(), DataType::String)
487
- .is_some()
488
- {
489
- return Err(Box::new(ConfigError(format!(
490
- "duplicate column name in schema: {}",
491
- column.name
492
- ))));
493
- }
494
- }
495
- Ok(schema)
496
- }
497
486
  }
498
487
 
499
488
  #[derive(Debug)]
@@ -1,18 +1,18 @@
1
1
  use std::collections::HashSet;
2
2
 
3
3
  use crate::config::{
4
- CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, RootConfig, SourceOptions,
5
- StorageDefinition,
4
+ CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, PolicySeverity,
5
+ RootConfig, SourceOptions, StorageDefinition,
6
6
  };
7
7
  use crate::io::format;
8
8
  use crate::io::read::json_selector::parse_selector;
9
9
  use crate::io::read::xml_selector;
10
+ use crate::io::write::sink_format::sink_format;
10
11
  use crate::{warnings, ConfigError, FloeResult};
11
12
 
12
13
  const ALLOWED_COLUMN_TYPES: &[&str] = &["string", "number", "boolean", "datetime", "date", "time"];
13
14
  const ALLOWED_CAST_MODES: &[&str] = &["strict", "coerce"];
14
15
  const ALLOWED_NORMALIZE_STRATEGIES: &[&str] = &["snake_case", "lower", "camel_case", "none"];
15
- const ALLOWED_POLICY_SEVERITIES: &[&str] = &["warn", "reject", "abort"];
16
16
  const ALLOWED_MISSING_POLICIES: &[&str] = &["reject_file", "fill_nulls"];
17
17
  const ALLOWED_EXTRA_POLICIES: &[&str] = &["reject_file", "ignore"];
18
18
  const ALLOWED_STORAGE_TYPES: &[&str] = &["local", "s3", "adls", "gcs"];
@@ -103,6 +103,11 @@ fn validate_lineage(lineage: &crate::config::LineageConfig) -> FloeResult<()> {
103
103
  "lineage.namespace must not be empty".to_string(),
104
104
  )));
105
105
  }
106
+ if lineage.max_failures == Some(0) {
107
+ return Err(Box::new(ConfigError(
108
+ "lineage.max_failures must be at least 1".to_string(),
109
+ )));
110
+ }
106
111
  Ok(())
107
112
  }
108
113
 
@@ -134,7 +139,6 @@ fn validate_entity(
134
139
  ) -> FloeResult<()> {
135
140
  validate_source(entity, storages)?;
136
141
  validate_state(entity)?;
137
- validate_policy(entity)?;
138
142
  validate_sink(entity, storages, catalogs)?;
139
143
  validate_schema(entity, config_version)?;
140
144
  if let Some(pii) = &entity.pii {
@@ -147,7 +151,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
147
151
  use crate::config::PiiStrategy;
148
152
  // Abort severity writes the raw input file to the rejected sink without
149
153
  // loading a DataFrame, bypassing masking entirely.
150
- if entity.policy.severity == "abort" {
154
+ if entity.policy.severity == PolicySeverity::Abort {
151
155
  return Err(Box::new(ConfigError(format!(
152
156
  "entity.name={} pii: masking is not applied when policy.severity=abort \
153
157
  because the raw file is written to sink.rejected without DataFrame processing",
@@ -197,7 +201,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
197
201
  .flatten()
198
202
  .map(|s| s.as_str())
199
203
  .collect();
200
- let write_mode = entity.sink.resolved_write_mode();
204
+ let write_mode = entity.sink.write_mode;
201
205
  let is_merge_mode = matches!(
202
206
  write_mode,
203
207
  crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
@@ -515,7 +519,7 @@ fn validate_sink(
515
519
  entity.sink.accepted.options.as_ref(),
516
520
  )?;
517
521
 
518
- if entity.policy.severity == "reject" && entity.sink.rejected.is_none() {
522
+ if entity.policy.severity == PolicySeverity::Reject && entity.sink.rejected.is_none() {
519
523
  return Err(Box::new(ConfigError(format!(
520
524
  "entity.name={} sink.rejected is required when policy.severity=reject",
521
525
  entity.name
@@ -534,28 +538,14 @@ fn validate_sink(
534
538
  entity.sink.accepted.storage.as_deref(),
535
539
  )?;
536
540
  storages.validate_reference(entity, "sink.accepted.storage", &accepted_storage)?;
537
- if entity.sink.accepted.format == "delta" {
538
- if let Some(storage_type) = storages.definition_type(&accepted_storage) {
539
- if storage_type != "local"
540
- && storage_type != "s3"
541
- && storage_type != "adls"
542
- && storage_type != "gcs"
543
- {
544
- return Err(Box::new(ConfigError(format!(
545
- "entity.name={} sink.accepted.format=delta is only supported on local, s3, adls, or gcs storage (got {})",
546
- entity.name, storage_type
547
- ))));
548
- }
549
- }
550
- }
551
- if entity.sink.accepted.format == "iceberg" {
552
- if let Some(storage_type) = storages.definition_type(&accepted_storage) {
553
- if storage_type != "local" && storage_type != "s3" && storage_type != "gcs" {
554
- return Err(Box::new(ConfigError(format!(
555
- "entity.name={} sink.accepted.format=iceberg is only supported on local, s3, or gcs storage for now (got {})",
556
- entity.name, storage_type
557
- ))));
558
- }
541
+ if let Some(storage_type) = storages.definition_type(&accepted_storage) {
542
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
543
+ if !fmt.supported_storages().contains(&storage_type) {
544
+ let supported = fmt.supported_storages().join(", ");
545
+ return Err(Box::new(ConfigError(format!(
546
+ "entity.name={} sink.accepted.format={} is not supported on {} storage (supported: {})",
547
+ entity.name, entity.sink.accepted.format, storage_type, supported
548
+ ))));
559
549
  }
560
550
  }
561
551
  validate_iceberg_catalog_binding(entity, storages, catalogs, &accepted_storage)?;
@@ -589,20 +579,23 @@ fn validate_sink(
589
579
  }
590
580
 
591
581
  fn validate_sink_write_mode(entity: &EntityConfig) -> FloeResult<()> {
592
- let write_mode = entity.sink.resolved_write_mode();
582
+ let write_mode = entity.sink.write_mode;
583
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
584
+ if !fmt.supported_modes().contains(&write_mode) {
585
+ return Err(Box::new(ConfigError(format!(
586
+ "entity.name={} sink.write_mode={} is not supported by sink.accepted.format={}",
587
+ entity.name,
588
+ write_mode.as_str(),
589
+ entity.sink.accepted.format
590
+ ))));
591
+ }
592
+
593
593
  let is_merge_mode = matches!(
594
594
  write_mode,
595
595
  crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
596
596
  );
597
597
  if is_merge_mode {
598
598
  let mode_name = write_mode.as_str();
599
- if entity.sink.accepted.format != "delta" {
600
- return Err(Box::new(ConfigError(format!(
601
- "entity.name={} sink.write_mode={} requires sink.accepted.format=delta",
602
- entity.name, mode_name
603
- ))));
604
- }
605
-
606
599
  let primary_key = entity.schema.primary_key.as_ref().ok_or_else(|| {
607
600
  Box::new(ConfigError(format!(
608
601
  "entity.name={} sink.write_mode={} requires schema.primary_key",
@@ -628,9 +621,16 @@ fn validate_merge_options(
628
621
  return Ok(());
629
622
  };
630
623
 
631
- if entity.sink.accepted.format != "delta" {
624
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
625
+ let supports_merge = fmt.supported_modes().iter().any(|m| {
626
+ matches!(
627
+ m,
628
+ crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
629
+ )
630
+ });
631
+ if !supports_merge {
632
632
  return Err(Box::new(ConfigError(format!(
633
- "entity.name={} sink.accepted.merge is only supported when sink.accepted.format=delta",
633
+ "entity.name={} sink.accepted.merge is only supported when sink.accepted.format supports merge (e.g. delta)",
634
634
  entity.name
635
635
  ))));
636
636
  }
@@ -1063,18 +1063,6 @@ fn validate_sink_partitioning(entity: &EntityConfig) -> FloeResult<()> {
1063
1063
  Ok(())
1064
1064
  }
1065
1065
 
1066
- fn validate_policy(entity: &EntityConfig) -> FloeResult<()> {
1067
- if !ALLOWED_POLICY_SEVERITIES.contains(&entity.policy.severity.as_str()) {
1068
- return Err(Box::new(ConfigError(format!(
1069
- "entity.name={} policy.severity={} is unsupported (allowed: {})",
1070
- entity.name,
1071
- entity.policy.severity,
1072
- ALLOWED_POLICY_SEVERITIES.join(", ")
1073
- ))));
1074
- }
1075
- Ok(())
1076
- }
1077
-
1078
1066
  fn validate_schema(entity: &EntityConfig, config_version: ConfigVersion) -> FloeResult<()> {
1079
1067
  if entity.source.format == "json" && entity.schema.columns.len() > MAX_JSON_COLUMNS {
1080
1068
  return Err(Box::new(ConfigError(format!(
@@ -0,0 +1,27 @@
1
+ use crate::log::emit_log;
2
+
3
+ #[derive(Debug, thiserror::Error)]
4
+ #[error("{0}")]
5
+ pub struct ConfigError(pub String);
6
+
7
+ #[derive(Debug, thiserror::Error)]
8
+ #[error("{0}")]
9
+ pub struct RunError(pub String);
10
+
11
+ #[derive(Debug, thiserror::Error)]
12
+ #[error("{0}")]
13
+ pub struct StorageError(pub String);
14
+
15
+ #[derive(Debug, thiserror::Error)]
16
+ #[error("{0}")]
17
+ pub struct IoError(pub String);
18
+
19
+ pub fn emit(
20
+ run_id: &str,
21
+ entity: Option<&str>,
22
+ input: Option<&str>,
23
+ code: Option<&str>,
24
+ message: &str,
25
+ ) {
26
+ emit_log("error", run_id, entity, input, code, message);
27
+ }
@@ -46,7 +46,7 @@ pub enum ReadInput {
46
46
  },
47
47
  }
48
48
 
49
- #[derive(Debug, Clone)]
49
+ #[derive(Debug, Clone, Default)]
50
50
  pub struct AcceptedWriteMetrics {
51
51
  pub total_bytes_written: Option<u64>,
52
52
  pub avg_file_size_mb: Option<f64>,
@@ -85,6 +85,26 @@ pub struct AcceptedMergeMetrics {
85
85
  }
86
86
 
87
87
  #[derive(Debug, Clone)]
88
+ pub enum CatalogRegistration {
89
+ UnityDelta {
90
+ catalog_name: String,
91
+ schema: String,
92
+ table: String,
93
+ },
94
+ IcebergGlue {
95
+ catalog_name: String,
96
+ database: Option<String>,
97
+ namespace: String,
98
+ table: String,
99
+ },
100
+ IcebergRest {
101
+ catalog_name: String,
102
+ namespace: String,
103
+ table: String,
104
+ },
105
+ }
106
+
107
+ #[derive(Debug, Clone, Default)]
88
108
  pub struct AcceptedWriteOutput {
89
109
  pub files_written: Option<u64>,
90
110
  pub parts_written: u64,
@@ -92,13 +112,7 @@ pub struct AcceptedWriteOutput {
92
112
  pub table_version: Option<i64>,
93
113
  pub snapshot_id: Option<i64>,
94
114
  pub table_root_uri: Option<String>,
95
- pub iceberg_catalog_name: Option<String>,
96
- pub iceberg_database: Option<String>,
97
- pub iceberg_namespace: Option<String>,
98
- pub iceberg_table: Option<String>,
99
- pub delta_catalog_name: Option<String>,
100
- pub delta_catalog_schema: Option<String>,
101
- pub delta_catalog_table: Option<String>,
115
+ pub catalog: Option<CatalogRegistration>,
102
116
  pub metrics: AcceptedWriteMetrics,
103
117
  pub merge: Option<AcceptedMergeMetrics>,
104
118
  pub schema_evolution: AcceptedSchemaEvolution,
@@ -163,20 +177,16 @@ pub trait InputAdapter: Send + Sync {
163
177
  }
164
178
  }
165
179
 
166
- pub trait AcceptedSinkAdapter: Send + Sync {
167
- #[allow(clippy::too_many_arguments)]
168
- fn write_accepted(
169
- &self,
170
- target: &Target,
171
- df: &mut DataFrame,
172
- mode: config::WriteMode,
173
- output_stem: &str,
174
- temp_dir: Option<&Path>,
175
- cloud: &mut io::storage::CloudClient,
176
- resolver: &config::StorageResolver,
177
- catalogs: &config::CatalogResolver,
178
- entity: &config::EntityConfig,
179
- ) -> FloeResult<AcceptedWriteOutput>;
180
+ pub struct AcceptedWriteRequest<'a> {
181
+ pub target: &'a Target,
182
+ pub df: &'a mut DataFrame,
183
+ pub mode: config::WriteMode,
184
+ pub output_stem: &'a str,
185
+ pub temp_dir: Option<&'a Path>,
186
+ pub cloud: &'a mut io::storage::CloudClient,
187
+ pub resolver: &'a config::StorageResolver,
188
+ pub catalogs: &'a config::CatalogResolver,
189
+ pub entity: &'a config::EntityConfig,
180
190
  }
181
191
 
182
192
  pub struct RejectedWriteRequest<'a> {
@@ -247,7 +257,7 @@ pub fn ensure_input_format(entity_name: &str, format: &str) -> FloeResult<()> {
247
257
  }
248
258
 
249
259
  pub fn ensure_accepted_sink_format(entity_name: &str, format: &str) -> FloeResult<()> {
250
- if accepted_sink_adapter(format).is_err() {
260
+ if crate::io::write::sink_format::sink_format(format).is_err() {
251
261
  return Err(Box::new(unsupported_format_error(
252
262
  FormatKind::SinkAccepted,
253
263
  format,
@@ -371,19 +381,6 @@ pub fn input_adapter(format: &str) -> FloeResult<&'static dyn InputAdapter> {
371
381
  }
372
382
  }
373
383
 
374
- pub fn accepted_sink_adapter(format: &str) -> FloeResult<&'static dyn AcceptedSinkAdapter> {
375
- match format {
376
- "parquet" => Ok(io::write::parquet::parquet_accepted_adapter()),
377
- "delta" => Ok(io::write::delta::delta_accepted_adapter()),
378
- "iceberg" => Ok(io::write::iceberg::iceberg_accepted_adapter()),
379
- _ => Err(Box::new(unsupported_format_error(
380
- FormatKind::SinkAccepted,
381
- format,
382
- None,
383
- ))),
384
- }
385
- }
386
-
387
384
  pub fn rejected_sink_adapter(format: &str) -> FloeResult<&'static dyn RejectedSinkAdapter> {
388
385
  match format {
389
386
  "csv" => Ok(io::write::csv::csv_rejected_adapter()),