floe-python 0.3.10__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {floe_python-0.3.10 → floe_python-0.4.1}/Cargo.lock +56 -3
  2. {floe_python-0.3.10 → floe_python-0.4.1}/PKG-INFO +1 -1
  3. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/Cargo.toml +4 -2
  4. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/mismatch.rs +4 -3
  5. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/mod.rs +4 -1
  6. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/parse.rs +39 -8
  7. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/storage.rs +2 -1
  8. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/types.rs +27 -38
  9. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/validate.rs +46 -51
  10. floe_python-0.4.1/crates/floe-core/src/errors.rs +27 -0
  11. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/format.rs +33 -36
  12. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/mod.rs +42 -0
  13. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/inputs.rs +5 -1
  14. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/adls.rs +121 -1
  15. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/gcs.rs +123 -1
  16. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/local.rs +110 -1
  17. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/s3.rs +113 -1
  18. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/unique_seed/mod.rs +15 -41
  19. floe_python-0.4.1/crates/floe-core/src/io/write/accepted.rs +1 -0
  20. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/unity.rs +16 -4
  21. floe_python-0.4.1/crates/floe-core/src/io/write/delta/unity_tests.rs +209 -0
  22. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta.rs +73 -30
  23. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/rest.rs +6 -4
  24. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg.rs +314 -44
  25. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/mod.rs +1 -0
  26. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/parquet.rs +93 -30
  27. floe_python-0.4.1/crates/floe-core/src/io/write/sink_format.rs +66 -0
  28. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +2 -1
  29. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +15 -15
  30. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/shared.rs +55 -112
  31. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/mod.rs +1 -1
  32. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/lib.rs +26 -0
  33. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/lineage/mod.rs +85 -20
  34. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/builder.rs +24 -7
  35. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/model.rs +8 -1
  36. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/parse.rs +80 -2
  37. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/types.rs +24 -2
  38. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/validate.rs +126 -0
  39. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/entity.rs +35 -14
  40. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/context.rs +14 -0
  41. floe_python-0.4.1/crates/floe-core/src/run/entity/accepted_write.rs +156 -0
  42. floe_python-0.4.1/crates/floe-core/src/run/entity/incremental.rs +281 -0
  43. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/mod.rs +53 -52
  44. floe_python-0.4.1/crates/floe-core/src/run/entity/pii.rs +446 -0
  45. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/precheck.rs +3 -2
  46. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/validate_split.rs +12 -16
  47. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/mod.rs +48 -37
  48. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/output.rs +13 -16
  49. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runtime.rs +5 -4
  50. floe_python-0.4.1/crates/floe-core/src/state/mod.rs +730 -0
  51. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/delta_run.rs +2 -2
  52. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/add_entity.rs +2 -1
  53. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/adls_validation.rs +1 -1
  54. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/catalogs.rs +2 -2
  55. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/config_validation.rs +36 -4
  56. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/lineage_validation.rs +57 -0
  57. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/parse.rs +4 -4
  58. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/inputs.rs +1 -1
  59. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/local.rs +48 -1
  60. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/delta_write.rs +1 -1
  61. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +1 -1
  62. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/object_store.rs +8 -8
  63. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +1 -1
  64. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/manifest/mod.rs +22 -3
  65. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/parse.rs +28 -4
  66. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/validate.rs +36 -1
  67. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/incremental.rs +39 -0
  68. floe_python-0.4.1/crates/floe-core/tests/unit/run/lineage.rs +210 -0
  69. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/mod.rs +1 -0
  70. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/state/mod.rs +136 -77
  71. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/Cargo.toml +2 -2
  72. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/functions.rs +5 -1
  73. {floe_python-0.3.10 → floe_python-0.4.1}/pyproject.toml +1 -1
  74. floe_python-0.3.10/crates/floe-core/src/errors.rs +0 -57
  75. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/delta.rs +0 -47
  76. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/iceberg.rs +0 -299
  77. floe_python-0.3.10/crates/floe-core/src/io/unique_seed/parquet.rs +0 -80
  78. floe_python-0.3.10/crates/floe-core/src/io/write/accepted.rs +0 -56
  79. floe_python-0.3.10/crates/floe-core/src/run/entity/accepted_write.rs +0 -242
  80. floe_python-0.3.10/crates/floe-core/src/run/entity/incremental.rs +0 -177
  81. floe_python-0.3.10/crates/floe-core/src/run/entity/pii.rs +0 -182
  82. floe_python-0.3.10/crates/floe-core/src/state/mod.rs +0 -356
  83. {floe_python-0.3.10 → floe_python-0.4.1}/Cargo.toml +0 -0
  84. {floe_python-0.3.10 → floe_python-0.4.1}/README.md +0 -0
  85. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/README.md +0 -0
  86. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/add_entity.rs +0 -0
  87. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/cast.rs +0 -0
  88. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/mod.rs +0 -0
  89. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/normalize.rs +0 -0
  90. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/not_null.rs +0 -0
  91. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/checks/unique.rs +0 -0
  92. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/catalog.rs +0 -0
  93. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/location.rs +0 -0
  94. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/template.rs +0 -0
  95. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/config/yaml_decode.rs +0 -0
  96. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/mod.rs +0 -0
  97. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/avro.rs +0 -0
  98. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/csv.rs +0 -0
  99. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
  100. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/json.rs +0 -0
  101. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/json_selector.rs +0 -0
  102. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/mod.rs +0 -0
  103. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/orc.rs +0 -0
  104. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/parquet.rs +0 -0
  105. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xlsx.rs +0 -0
  106. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xml.rs +0 -0
  107. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
  108. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
  109. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
  110. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
  111. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
  112. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
  113. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
  114. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
  115. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/object_store.rs +0 -0
  116. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
  117. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
  118. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
  119. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
  120. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/storage/target.rs +0 -0
  121. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
  122. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/csv.rs +0 -0
  123. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
  124. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/options.rs +0 -0
  125. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
  126. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
  127. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
  128. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
  129. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
  130. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
  131. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/metrics.rs +0 -0
  132. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/parts.rs +0 -0
  133. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
  134. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
  135. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
  136. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/log.rs +0 -0
  137. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/manifest/mod.rs +0 -0
  138. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/profile/mod.rs +0 -0
  139. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/build.rs +0 -0
  140. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/mod.rs +0 -0
  141. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/report/output.rs +0 -0
  142. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/process.rs +0 -0
  143. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/entity/resolve.rs +0 -0
  144. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/events.rs +0 -0
  145. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/file.rs +0 -0
  146. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/run/perf.rs +0 -0
  147. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runner/mod.rs +0 -0
  148. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/runner/outcome.rs +0 -0
  149. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/vars/mod.rs +0 -0
  150. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/vars/resolve.rs +0 -0
  151. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/src/warnings.rs +0 -0
  152. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/archive_run.rs +0 -0
  153. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/composite_unique.rs +0 -0
  154. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/dry_run.rs +0 -0
  155. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/fixed_width.rs +0 -0
  156. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +0 -0
  157. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_glue_run.rs +0 -0
  158. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_run.rs +0 -0
  159. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/iceberg_s3_run.rs +0 -0
  160. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/json_selectors.rs +0 -0
  161. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/local_run.rs +0 -0
  162. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/mod.rs +0 -0
  163. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/path_normalization.rs +0 -0
  164. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration/run_entities_filter.rs +0 -0
  165. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/integration.rs +0 -0
  166. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/common.rs +0 -0
  167. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
  168. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
  169. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
  170. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
  171. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/mod.rs +0 -0
  172. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
  173. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
  174. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/config/templating.rs +0 -0
  175. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/format.rs +0 -0
  176. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/mod.rs +0 -0
  177. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/avro_input.rs +0 -0
  178. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
  179. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_array.rs +0 -0
  180. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +0 -0
  181. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
  182. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
  183. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/orc_input.rs +0 -0
  184. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/parquet_input.rs +0 -0
  185. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
  186. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +0 -0
  187. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xml.rs +0 -0
  188. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
  189. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
  190. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
  191. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
  192. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
  193. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
  194. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
  195. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
  196. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
  197. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/delta_merge.rs +0 -0
  198. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
  199. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
  200. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
  201. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
  202. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/mod.rs +0 -0
  203. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
  204. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
  205. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/mod.rs +0 -0
  206. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/report/storage.rs +0 -0
  207. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/check_order.rs +0 -0
  208. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/checks.rs +0 -0
  209. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +0 -0
  210. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
  211. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
  212. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/pii.rs +0 -0
  213. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/report.rs +0 -0
  214. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/run/schema_mismatch.rs +0 -0
  215. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
  216. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
  217. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
  218. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
  219. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-core/tests/unit.rs +0 -0
  220. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/.gitignore +0 -0
  221. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/README.md +0 -0
  222. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/lib.rs +0 -0
  223. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/observer.rs +0 -0
  224. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/config.rs +0 -0
  225. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/errors.rs +0 -0
  226. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/mod.rs +0 -0
  227. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/src/types/outcome.rs +0 -0
  228. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/config.yml +0 -0
  229. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
  230. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
  231. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/fixtures/profile.yml +0 -0
  232. {floe_python-0.3.10 → floe_python-0.4.1}/crates/floe-python/tests/test_floe.py +0 -0
  233. {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/__init__.py +0 -0
  234. {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/_floe.pyi +0 -0
  235. {floe_python-0.3.10 → floe_python-0.4.1}/python/floe/py.typed +0 -0
@@ -476,6 +476,16 @@ version = "0.3.2"
476
476
  source = "registry+https://github.com/rust-lang/crates.io-index"
477
477
  checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063"
478
478
 
479
+ [[package]]
480
+ name = "assert-json-diff"
481
+ version = "2.0.2"
482
+ source = "registry+https://github.com/rust-lang/crates.io-index"
483
+ checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
484
+ dependencies = [
485
+ "serde",
486
+ "serde_json",
487
+ ]
488
+
479
489
  [[package]]
480
490
  name = "assert_cmd"
481
491
  version = "2.1.2"
@@ -1708,6 +1718,15 @@ version = "1.0.4"
1708
1718
  source = "registry+https://github.com/rust-lang/crates.io-index"
1709
1719
  checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
1710
1720
 
1721
+ [[package]]
1722
+ name = "colored"
1723
+ version = "3.1.1"
1724
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1725
+ checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
1726
+ dependencies = [
1727
+ "windows-sys 0.61.2",
1728
+ ]
1729
+
1711
1730
  [[package]]
1712
1731
  name = "comfy-table"
1713
1732
  version = "7.2.2"
@@ -3380,7 +3399,7 @@ dependencies = [
3380
3399
 
3381
3400
  [[package]]
3382
3401
  name = "floe-cli"
3383
- version = "0.3.10"
3402
+ version = "0.4.1"
3384
3403
  dependencies = [
3385
3404
  "assert_cmd",
3386
3405
  "clap",
@@ -3393,7 +3412,7 @@ dependencies = [
3393
3412
 
3394
3413
  [[package]]
3395
3414
  name = "floe-core"
3396
- version = "0.3.10"
3415
+ version = "0.4.1"
3397
3416
  dependencies = [
3398
3417
  "apache-avro 0.16.0",
3399
3418
  "arrow",
@@ -3414,6 +3433,7 @@ dependencies = [
3414
3433
  "iceberg",
3415
3434
  "iceberg-catalog-rest",
3416
3435
  "iceberg-storage-opendal",
3436
+ "mockito",
3417
3437
  "orc-rust",
3418
3438
  "polars",
3419
3439
  "rayon",
@@ -3425,6 +3445,7 @@ dependencies = [
3425
3445
  "serde_yaml",
3426
3446
  "sha2",
3427
3447
  "tempfile",
3448
+ "thiserror 1.0.69",
3428
3449
  "time",
3429
3450
  "tokio",
3430
3451
  "url",
@@ -3434,7 +3455,7 @@ dependencies = [
3434
3455
 
3435
3456
  [[package]]
3436
3457
  name = "floe-python"
3437
- version = "0.3.10"
3458
+ version = "0.4.1"
3438
3459
  dependencies = [
3439
3460
  "floe-core",
3440
3461
  "pyo3",
@@ -4046,6 +4067,7 @@ dependencies = [
4046
4067
  "http 1.4.0",
4047
4068
  "http-body 1.0.1",
4048
4069
  "httparse",
4070
+ "httpdate",
4049
4071
  "itoa",
4050
4072
  "pin-project-lite",
4051
4073
  "pin-utils",
@@ -4861,6 +4883,31 @@ dependencies = [
4861
4883
  "windows-sys 0.61.2",
4862
4884
  ]
4863
4885
 
4886
+ [[package]]
4887
+ name = "mockito"
4888
+ version = "1.7.2"
4889
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4890
+ checksum = "90820618712cab19cfc46b274c6c22546a82affcb3c3bdf0f29e3db8e1bb92c0"
4891
+ dependencies = [
4892
+ "assert-json-diff",
4893
+ "bytes",
4894
+ "colored",
4895
+ "futures-core",
4896
+ "http 1.4.0",
4897
+ "http-body 1.0.1",
4898
+ "http-body-util",
4899
+ "hyper 1.8.1",
4900
+ "hyper-util",
4901
+ "log",
4902
+ "pin-project-lite",
4903
+ "rand 0.9.4",
4904
+ "regex",
4905
+ "serde_json",
4906
+ "serde_urlencoded",
4907
+ "similar",
4908
+ "tokio",
4909
+ ]
4910
+
4864
4911
  [[package]]
4865
4912
  name = "moka"
4866
4913
  version = "0.12.13"
@@ -7420,6 +7467,12 @@ version = "0.1.5"
7420
7467
  source = "registry+https://github.com/rust-lang/crates.io-index"
7421
7468
  checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
7422
7469
 
7470
+ [[package]]
7471
+ name = "similar"
7472
+ version = "2.7.0"
7473
+ source = "registry+https://github.com/rust-lang/crates.io-index"
7474
+ checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
7475
+
7423
7476
  [[package]]
7424
7477
  name = "simple_asn1"
7425
7478
  version = "0.6.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: floe-python
3
- Version: 0.3.10
3
+ Version: 0.4.1
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "floe-core"
3
- version = "0.3.10"
3
+ version = "0.4.1"
4
4
  edition = "2021"
5
5
  description = "Core library for Floe, a YAML-driven technical ingestion tool."
6
6
  license = "MIT"
@@ -41,16 +41,18 @@ uuid = "1"
41
41
  arrow = "57"
42
42
  iceberg = "0.9.0"
43
43
  iceberg-catalog-rest = "0.9.0"
44
- iceberg-storage-opendal = { version = "0.9.1", features = ["opendal-gcs"] }
44
+ iceberg-storage-opendal = { version = "0.9.1", features = ["opendal-s3", "opendal-gcs"] }
45
45
  df-interchange = { version = "0.3.2", features = ["arrow_57", "polars_0_52"] }
46
46
  orc-rust = "0.7.1"
47
47
 
48
48
  reqwest = { version = "0.12", default-features = true, features = ["native-tls-vendored", "json", "blocking"] }
49
49
  sha2 = "0.10"
50
50
  hex = "0.4"
51
+ thiserror = "1"
51
52
 
52
53
  [dev-dependencies]
53
54
  rust_xlsxwriter = "0.67"
55
+ mockito = "1"
54
56
 
55
57
  [features]
56
58
  vendored-openssl = []
@@ -2,6 +2,7 @@ use std::collections::HashMap;
2
2
 
3
3
  use polars::prelude::{DataFrame, DataType, Series};
4
4
 
5
+ use crate::config::PolicySeverity;
5
6
  use crate::errors::RunError;
6
7
  use crate::{config, report, ConfigError, FloeResult};
7
8
 
@@ -124,7 +125,7 @@ pub fn plan_schema_mismatch(
124
125
  let mut warning = None;
125
126
  let rejection_requested = (effective_missing == "reject_file" && !missing.is_empty())
126
127
  || (effective_extra == "reject_file" && !extra.is_empty());
127
- if rejection_requested && entity.policy.severity == "warn" {
128
+ if rejection_requested && entity.policy.severity == PolicySeverity::Warn {
128
129
  warning = Some(format!(
129
130
  "entity.name={} schema mismatch requested reject_file but policy.severity=warn; continuing",
130
131
  entity.name
@@ -139,10 +140,10 @@ pub fn plan_schema_mismatch(
139
140
  if (effective_missing == "reject_file" && !missing.is_empty())
140
141
  || (effective_extra == "reject_file" && !extra.is_empty())
141
142
  {
142
- if entity.policy.severity == "abort" {
143
+ if entity.policy.severity == PolicySeverity::Abort {
143
144
  aborted = true;
144
145
  action = report::MismatchAction::Aborted;
145
- } else if entity.policy.severity == "reject" {
146
+ } else if entity.policy.severity == PolicySeverity::Reject {
146
147
  rejected = true;
147
148
  action = report::MismatchAction::RejectedFile;
148
149
  }
@@ -13,6 +13,9 @@ pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver}
13
13
  pub use types::*;
14
14
 
15
15
  pub use parse::extract_raw_env_vars;
16
- pub(crate) use parse::{parse_catalogs_with_context, parse_config, parse_config_with_vars};
16
+ pub(crate) use parse::{
17
+ parse_catalogs_with_context, parse_config, parse_config_with_vars, parse_lineage_config,
18
+ parse_storages,
19
+ };
17
20
  pub(crate) use template::apply_templates_with_vars;
18
21
  pub(crate) use validate::{extract_first_n, extract_last_n, validate_config};
@@ -15,8 +15,8 @@ use crate::config::{
15
15
  DomainConfig, EntityConfig, EntityMetadata, EntityStateConfig, EnvConfig,
16
16
  IcebergPartitionFieldConfig, IcebergSinkTargetConfig, IncrementalMode, LineageConfig,
17
17
  MergeOptionsConfig, MergeScd2OptionsConfig, NormalizeColumnsConfig, PiiColumnConfig, PiiConfig,
18
- PiiStrategy, PolicyConfig, ProjectMetadata, ReportConfig, RootConfig, SchemaConfig,
19
- SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
18
+ PiiStrategy, PolicyConfig, PolicySeverity, ProjectMetadata, ReportConfig, RootConfig,
19
+ SchemaConfig, SchemaEvolutionConfig, SchemaEvolutionIncompatibleAction, SchemaEvolutionMode,
20
20
  SchemaMismatchConfig, SinkConfig, SinkOptions, SinkTarget, SourceConfig, SourceOptions,
21
21
  StorageDefinition, StoragesConfig, WriteMode,
22
22
  };
@@ -658,7 +658,7 @@ fn parse_sink_delta_options(value: &Yaml, ctx: &str) -> FloeResult<DeltaSinkTarg
658
658
  })
659
659
  }
660
660
 
661
- fn parse_storages(value: &Yaml) -> FloeResult<StoragesConfig> {
661
+ pub(crate) fn parse_storages(value: &Yaml) -> FloeResult<StoragesConfig> {
662
662
  let hash = yaml_hash(value, "storages")?;
663
663
  validate_known_keys(hash, "storages", &["default", "definitions"])?;
664
664
  let definitions_yaml = match hash_get(hash, "definitions") {
@@ -830,9 +830,18 @@ fn parse_archive_target(value: &Yaml) -> FloeResult<ArchiveTarget> {
830
830
  fn parse_policy(value: &Yaml) -> FloeResult<PolicyConfig> {
831
831
  let hash = yaml_hash(value, "policy")?;
832
832
  validate_known_keys(hash, "policy", &["severity"])?;
833
- Ok(PolicyConfig {
834
- severity: get_string(hash, "severity", "policy")?,
835
- })
833
+ let severity_str = get_string(hash, "severity", "policy")?;
834
+ let severity = match severity_str.as_str() {
835
+ "warn" => PolicySeverity::Warn,
836
+ "reject" => PolicySeverity::Reject,
837
+ "abort" => PolicySeverity::Abort,
838
+ other => {
839
+ return Err(Box::new(ConfigError(format!(
840
+ "policy.severity={other} is unsupported (allowed: warn, reject, abort)"
841
+ ))))
842
+ }
843
+ };
844
+ Ok(PolicyConfig { severity })
836
845
  }
837
846
 
838
847
  fn parse_schema(value: &Yaml) -> FloeResult<SchemaConfig> {
@@ -1061,6 +1070,20 @@ fn opt_u64(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u64>> {
1061
1070
  }
1062
1071
  }
1063
1072
 
1073
+ fn opt_u32(hash: &Hash, key: &str, ctx: &str) -> FloeResult<Option<u32>> {
1074
+ match opt_u64(hash, key, ctx)? {
1075
+ None => Ok(None),
1076
+ Some(v) => {
1077
+ if v > u32::MAX as u64 {
1078
+ return Err(Box::new(ConfigError(format!(
1079
+ "value at {ctx}.{key} exceeds maximum allowed value"
1080
+ ))));
1081
+ }
1082
+ Ok(Some(v as u32))
1083
+ }
1084
+ }
1085
+ }
1086
+
1064
1087
  fn parse_pii_config(value: &Yaml) -> FloeResult<PiiConfig> {
1065
1088
  let hash = yaml_hash(value, "pii")?;
1066
1089
  validate_known_keys(hash, "pii", &["columns"])?;
@@ -1106,12 +1129,19 @@ fn parse_pii_column(value: &Yaml) -> FloeResult<PiiColumnConfig> {
1106
1129
  })
1107
1130
  }
1108
1131
 
1109
- fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1132
+ pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1110
1133
  let hash = yaml_hash(value, "lineage")?;
1111
1134
  validate_known_keys(
1112
1135
  hash,
1113
1136
  "lineage",
1114
- &["url", "api_key", "timeout_secs", "namespace", "producer"],
1137
+ &[
1138
+ "url",
1139
+ "api_key",
1140
+ "timeout_secs",
1141
+ "namespace",
1142
+ "producer",
1143
+ "max_failures",
1144
+ ],
1115
1145
  )?;
1116
1146
  Ok(LineageConfig {
1117
1147
  url: get_string(hash, "url", "lineage")?,
@@ -1119,5 +1149,6 @@ fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1119
1149
  timeout_secs: opt_u64(hash, "timeout_secs", "lineage")?,
1120
1150
  namespace: get_string(hash, "namespace", "lineage")?,
1121
1151
  producer: opt_string(hash, "producer", "lineage")?,
1152
+ max_failures: opt_u32(hash, "max_failures", "lineage")?,
1122
1153
  })
1123
1154
  }
@@ -126,6 +126,7 @@ pub struct ResolvedPath {
126
126
  pub local_path: Option<PathBuf>,
127
127
  }
128
128
 
129
+ #[derive(Clone)]
129
130
  pub struct StorageResolver {
130
131
  config_base: ConfigBase,
131
132
  default_name: String,
@@ -614,7 +615,7 @@ fn parent_prefix(key: &str) -> String {
614
615
  }
615
616
  }
616
617
 
617
- fn is_remote_uri(value: &str) -> bool {
618
+ pub(crate) fn is_remote_uri(value: &str) -> bool {
618
619
  value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
619
620
  }
620
621
 
@@ -3,7 +3,7 @@ use std::path::Path;
3
3
 
4
4
  use polars::polars_utils::pl_str::PlSmallStr;
5
5
  use polars::prelude::{
6
- CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, Schema, TimeUnit,
6
+ CsvEncoding, CsvParseOptions, CsvReadOptions, DataType, NullValues, TimeUnit,
7
7
  };
8
8
 
9
9
  use crate::{ConfigError, FloeResult};
@@ -28,6 +28,7 @@ pub struct LineageConfig {
28
28
  pub timeout_secs: Option<u64>,
29
29
  pub namespace: String,
30
30
  pub producer: Option<String>,
31
+ pub max_failures: Option<u32>,
31
32
  }
32
33
 
33
34
  #[derive(Debug)]
@@ -261,12 +262,6 @@ pub struct SinkConfig {
261
262
  pub archive: Option<ArchiveTarget>,
262
263
  }
263
264
 
264
- impl SinkConfig {
265
- pub fn resolved_write_mode(&self) -> WriteMode {
266
- self.write_mode
267
- }
268
- }
269
-
270
265
  #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
271
266
  pub enum WriteMode {
272
267
  #[default]
@@ -445,9 +440,33 @@ pub struct ArchiveTarget {
445
440
  pub storage: Option<String>,
446
441
  }
447
442
 
443
+ #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
444
+ pub enum PolicySeverity {
445
+ #[default]
446
+ Warn,
447
+ Reject,
448
+ Abort,
449
+ }
450
+
451
+ impl PolicySeverity {
452
+ pub fn as_str(self) -> &'static str {
453
+ match self {
454
+ Self::Warn => "warn",
455
+ Self::Reject => "reject",
456
+ Self::Abort => "abort",
457
+ }
458
+ }
459
+ }
460
+
461
+ impl std::fmt::Display for PolicySeverity {
462
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
463
+ f.write_str(self.as_str())
464
+ }
465
+ }
466
+
448
467
  #[derive(Debug)]
449
468
  pub struct PolicyConfig {
450
- pub severity: String,
469
+ pub severity: PolicySeverity,
451
470
  }
452
471
 
453
472
  #[derive(Debug)]
@@ -464,36 +483,6 @@ impl SchemaConfig {
464
483
  pub fn resolved_schema_evolution(&self) -> SchemaEvolutionConfig {
465
484
  self.schema_evolution.unwrap_or_default()
466
485
  }
467
-
468
- pub fn to_polars_schema(&self) -> FloeResult<Schema> {
469
- let mut schema = Schema::with_capacity(self.columns.len());
470
- for column in &self.columns {
471
- let dtype = parse_data_type(&column.column_type)?;
472
- if schema.insert(column.name.as_str().into(), dtype).is_some() {
473
- return Err(Box::new(ConfigError(format!(
474
- "duplicate column name in schema: {}",
475
- column.name
476
- ))));
477
- }
478
- }
479
- Ok(schema)
480
- }
481
-
482
- pub fn to_polars_string_schema(&self) -> FloeResult<Schema> {
483
- let mut schema = Schema::with_capacity(self.columns.len());
484
- for column in &self.columns {
485
- if schema
486
- .insert(column.name.as_str().into(), DataType::String)
487
- .is_some()
488
- {
489
- return Err(Box::new(ConfigError(format!(
490
- "duplicate column name in schema: {}",
491
- column.name
492
- ))));
493
- }
494
- }
495
- Ok(schema)
496
- }
497
486
  }
498
487
 
499
488
  #[derive(Debug)]
@@ -1,18 +1,19 @@
1
1
  use std::collections::HashSet;
2
2
 
3
+ use crate::config::storage::is_remote_uri;
3
4
  use crate::config::{
4
- CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, RootConfig, SourceOptions,
5
- StorageDefinition,
5
+ CatalogDefinition, CatalogTypeConfig, EntityConfig, IncrementalMode, PolicySeverity,
6
+ RootConfig, SourceOptions, StorageDefinition,
6
7
  };
7
8
  use crate::io::format;
8
9
  use crate::io::read::json_selector::parse_selector;
9
10
  use crate::io::read::xml_selector;
11
+ use crate::io::write::sink_format::sink_format;
10
12
  use crate::{warnings, ConfigError, FloeResult};
11
13
 
12
14
  const ALLOWED_COLUMN_TYPES: &[&str] = &["string", "number", "boolean", "datetime", "date", "time"];
13
15
  const ALLOWED_CAST_MODES: &[&str] = &["strict", "coerce"];
14
16
  const ALLOWED_NORMALIZE_STRATEGIES: &[&str] = &["snake_case", "lower", "camel_case", "none"];
15
- const ALLOWED_POLICY_SEVERITIES: &[&str] = &["warn", "reject", "abort"];
16
17
  const ALLOWED_MISSING_POLICIES: &[&str] = &["reject_file", "fill_nulls"];
17
18
  const ALLOWED_EXTRA_POLICIES: &[&str] = &["reject_file", "ignore"];
18
19
  const ALLOWED_STORAGE_TYPES: &[&str] = &["local", "s3", "adls", "gcs"];
@@ -103,6 +104,11 @@ fn validate_lineage(lineage: &crate::config::LineageConfig) -> FloeResult<()> {
103
104
  "lineage.namespace must not be empty".to_string(),
104
105
  )));
105
106
  }
107
+ if lineage.max_failures == Some(0) {
108
+ return Err(Box::new(ConfigError(
109
+ "lineage.max_failures must be at least 1".to_string(),
110
+ )));
111
+ }
106
112
  Ok(())
107
113
  }
108
114
 
@@ -123,6 +129,12 @@ fn validate_report(
123
129
  ) -> FloeResult<()> {
124
130
  let storage_name = storages.resolve_report_name(report.storage.as_deref())?;
125
131
  storages.validate_report_reference("report.storage", &storage_name)?;
132
+ if storages.definition_type(&storage_name) == Some("local") && is_remote_uri(&report.path) {
133
+ return Err(Box::new(ConfigError(format!(
134
+ "report.path must be a local path (got {})",
135
+ report.path
136
+ ))));
137
+ }
126
138
  Ok(())
127
139
  }
128
140
 
@@ -134,7 +146,6 @@ fn validate_entity(
134
146
  ) -> FloeResult<()> {
135
147
  validate_source(entity, storages)?;
136
148
  validate_state(entity)?;
137
- validate_policy(entity)?;
138
149
  validate_sink(entity, storages, catalogs)?;
139
150
  validate_schema(entity, config_version)?;
140
151
  if let Some(pii) = &entity.pii {
@@ -147,7 +158,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
147
158
  use crate::config::PiiStrategy;
148
159
  // Abort severity writes the raw input file to the rejected sink without
149
160
  // loading a DataFrame, bypassing masking entirely.
150
- if entity.policy.severity == "abort" {
161
+ if entity.policy.severity == PolicySeverity::Abort {
151
162
  return Err(Box::new(ConfigError(format!(
152
163
  "entity.name={} pii: masking is not applied when policy.severity=abort \
153
164
  because the raw file is written to sink.rejected without DataFrame processing",
@@ -197,7 +208,7 @@ fn validate_pii(entity: &EntityConfig, pii: &crate::config::PiiConfig) -> FloeRe
197
208
  .flatten()
198
209
  .map(|s| s.as_str())
199
210
  .collect();
200
- let write_mode = entity.sink.resolved_write_mode();
211
+ let write_mode = entity.sink.write_mode;
201
212
  let is_merge_mode = matches!(
202
213
  write_mode,
203
214
  crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
@@ -515,7 +526,7 @@ fn validate_sink(
515
526
  entity.sink.accepted.options.as_ref(),
516
527
  )?;
517
528
 
518
- if entity.policy.severity == "reject" && entity.sink.rejected.is_none() {
529
+ if entity.policy.severity == PolicySeverity::Reject && entity.sink.rejected.is_none() {
519
530
  return Err(Box::new(ConfigError(format!(
520
531
  "entity.name={} sink.rejected is required when policy.severity=reject",
521
532
  entity.name
@@ -534,28 +545,14 @@ fn validate_sink(
534
545
  entity.sink.accepted.storage.as_deref(),
535
546
  )?;
536
547
  storages.validate_reference(entity, "sink.accepted.storage", &accepted_storage)?;
537
- if entity.sink.accepted.format == "delta" {
538
- if let Some(storage_type) = storages.definition_type(&accepted_storage) {
539
- if storage_type != "local"
540
- && storage_type != "s3"
541
- && storage_type != "adls"
542
- && storage_type != "gcs"
543
- {
544
- return Err(Box::new(ConfigError(format!(
545
- "entity.name={} sink.accepted.format=delta is only supported on local, s3, adls, or gcs storage (got {})",
546
- entity.name, storage_type
547
- ))));
548
- }
549
- }
550
- }
551
- if entity.sink.accepted.format == "iceberg" {
552
- if let Some(storage_type) = storages.definition_type(&accepted_storage) {
553
- if storage_type != "local" && storage_type != "s3" && storage_type != "gcs" {
554
- return Err(Box::new(ConfigError(format!(
555
- "entity.name={} sink.accepted.format=iceberg is only supported on local, s3, or gcs storage for now (got {})",
556
- entity.name, storage_type
557
- ))));
558
- }
548
+ if let Some(storage_type) = storages.definition_type(&accepted_storage) {
549
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
550
+ if !fmt.supported_storages().contains(&storage_type) {
551
+ let supported = fmt.supported_storages().join(", ");
552
+ return Err(Box::new(ConfigError(format!(
553
+ "entity.name={} sink.accepted.format={} is not supported on {} storage (supported: {})",
554
+ entity.name, entity.sink.accepted.format, storage_type, supported
555
+ ))));
559
556
  }
560
557
  }
561
558
  validate_iceberg_catalog_binding(entity, storages, catalogs, &accepted_storage)?;
@@ -589,20 +586,23 @@ fn validate_sink(
589
586
  }
590
587
 
591
588
  fn validate_sink_write_mode(entity: &EntityConfig) -> FloeResult<()> {
592
- let write_mode = entity.sink.resolved_write_mode();
589
+ let write_mode = entity.sink.write_mode;
590
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
591
+ if !fmt.supported_modes().contains(&write_mode) {
592
+ return Err(Box::new(ConfigError(format!(
593
+ "entity.name={} sink.write_mode={} is not supported by sink.accepted.format={}",
594
+ entity.name,
595
+ write_mode.as_str(),
596
+ entity.sink.accepted.format
597
+ ))));
598
+ }
599
+
593
600
  let is_merge_mode = matches!(
594
601
  write_mode,
595
602
  crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
596
603
  );
597
604
  if is_merge_mode {
598
605
  let mode_name = write_mode.as_str();
599
- if entity.sink.accepted.format != "delta" {
600
- return Err(Box::new(ConfigError(format!(
601
- "entity.name={} sink.write_mode={} requires sink.accepted.format=delta",
602
- entity.name, mode_name
603
- ))));
604
- }
605
-
606
606
  let primary_key = entity.schema.primary_key.as_ref().ok_or_else(|| {
607
607
  Box::new(ConfigError(format!(
608
608
  "entity.name={} sink.write_mode={} requires schema.primary_key",
@@ -628,9 +628,16 @@ fn validate_merge_options(
628
628
  return Ok(());
629
629
  };
630
630
 
631
- if entity.sink.accepted.format != "delta" {
631
+ let fmt = sink_format(entity.sink.accepted.format.as_str())?;
632
+ let supports_merge = fmt.supported_modes().iter().any(|m| {
633
+ matches!(
634
+ m,
635
+ crate::config::WriteMode::MergeScd1 | crate::config::WriteMode::MergeScd2
636
+ )
637
+ });
638
+ if !supports_merge {
632
639
  return Err(Box::new(ConfigError(format!(
633
- "entity.name={} sink.accepted.merge is only supported when sink.accepted.format=delta",
640
+ "entity.name={} sink.accepted.merge is only supported when sink.accepted.format supports merge (e.g. delta)",
634
641
  entity.name
635
642
  ))));
636
643
  }
@@ -1063,18 +1070,6 @@ fn validate_sink_partitioning(entity: &EntityConfig) -> FloeResult<()> {
1063
1070
  Ok(())
1064
1071
  }
1065
1072
 
1066
- fn validate_policy(entity: &EntityConfig) -> FloeResult<()> {
1067
- if !ALLOWED_POLICY_SEVERITIES.contains(&entity.policy.severity.as_str()) {
1068
- return Err(Box::new(ConfigError(format!(
1069
- "entity.name={} policy.severity={} is unsupported (allowed: {})",
1070
- entity.name,
1071
- entity.policy.severity,
1072
- ALLOWED_POLICY_SEVERITIES.join(", ")
1073
- ))));
1074
- }
1075
- Ok(())
1076
- }
1077
-
1078
1073
  fn validate_schema(entity: &EntityConfig, config_version: ConfigVersion) -> FloeResult<()> {
1079
1074
  if entity.source.format == "json" && entity.schema.columns.len() > MAX_JSON_COLUMNS {
1080
1075
  return Err(Box::new(ConfigError(format!(
@@ -0,0 +1,27 @@
1
+ use crate::log::emit_log;
2
+
3
+ #[derive(Debug, thiserror::Error)]
4
+ #[error("{0}")]
5
+ pub struct ConfigError(pub String);
6
+
7
+ #[derive(Debug, thiserror::Error)]
8
+ #[error("{0}")]
9
+ pub struct RunError(pub String);
10
+
11
+ #[derive(Debug, thiserror::Error)]
12
+ #[error("{0}")]
13
+ pub struct StorageError(pub String);
14
+
15
+ #[derive(Debug, thiserror::Error)]
16
+ #[error("{0}")]
17
+ pub struct IoError(pub String);
18
+
19
+ pub fn emit(
20
+ run_id: &str,
21
+ entity: Option<&str>,
22
+ input: Option<&str>,
23
+ code: Option<&str>,
24
+ message: &str,
25
+ ) {
26
+ emit_log("error", run_id, entity, input, code, message);
27
+ }