floe-python 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. {floe_python-0.4.2 → floe_python-0.4.4}/Cargo.lock +3 -3
  2. {floe_python-0.4.2 → floe_python-0.4.4}/PKG-INFO +42 -1
  3. {floe_python-0.4.2/crates/floe-python → floe_python-0.4.4}/README.md +41 -0
  4. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/Cargo.toml +1 -1
  5. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/location.rs +1 -1
  6. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/mod.rs +1 -0
  7. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/parse.rs +2 -0
  8. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/types.rs +1 -0
  9. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/rest.rs +26 -2
  10. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg.rs +10 -4
  11. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/lib.rs +10 -0
  12. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/lineage/mod.rs +141 -44
  13. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/context.rs +31 -1
  14. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/accepted_write.rs +3 -1
  15. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/incremental.rs +51 -8
  16. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/mod.rs +7 -1
  17. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/mod.rs +18 -2
  18. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/state/mod.rs +94 -0
  19. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/archive_run.rs +4 -0
  20. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/composite_unique.rs +1 -0
  21. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/delta_run.rs +41 -0
  22. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/dry_run.rs +2 -0
  23. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/fixed_width.rs +3 -0
  24. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/iceberg_gcs_run.rs +4 -0
  25. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/iceberg_glue_run.rs +4 -0
  26. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/iceberg_run.rs +2 -0
  27. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/iceberg_s3_run.rs +4 -0
  28. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/json_selectors.rs +3 -0
  29. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/local_run.rs +2 -0
  30. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/path_normalization.rs +1 -0
  31. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/run_entities_filter.rs +1 -0
  32. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/lineage_validation.rs +1 -0
  33. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/avro_input.rs +1 -0
  34. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/json_array.rs +1 -0
  35. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/json_ndjson.rs +1 -0
  36. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/orc_input.rs +1 -0
  37. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/parquet_input.rs +1 -0
  38. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/xlsx_input.rs +1 -0
  39. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/xml.rs +1 -0
  40. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/delta_merge.rs +5 -0
  41. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/check_order.rs +1 -0
  42. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/entity/accepted_output.rs +1 -0
  43. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/entity/incremental.rs +326 -0
  44. floe_python-0.4.4/crates/floe-core/tests/unit/run/lineage.rs +923 -0
  45. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/schema_mismatch.rs +1 -0
  46. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/Cargo.toml +2 -2
  47. {floe_python-0.4.2 → floe_python-0.4.4/crates/floe-python}/README.md +41 -0
  48. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/functions.rs +3 -1
  49. {floe_python-0.4.2 → floe_python-0.4.4}/pyproject.toml +1 -1
  50. {floe_python-0.4.2 → floe_python-0.4.4}/python/floe/_floe.pyi +1 -0
  51. floe_python-0.4.2/crates/floe-core/tests/unit/run/lineage.rs +0 -366
  52. {floe_python-0.4.2 → floe_python-0.4.4}/Cargo.toml +0 -0
  53. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/README.md +0 -0
  54. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/add_entity.rs +0 -0
  55. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/cast.rs +0 -0
  56. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/mismatch.rs +0 -0
  57. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/mod.rs +0 -0
  58. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/normalize.rs +0 -0
  59. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/not_null.rs +0 -0
  60. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/checks/unique.rs +0 -0
  61. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/catalog.rs +0 -0
  62. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/storage.rs +0 -0
  63. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/template.rs +0 -0
  64. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/validate.rs +0 -0
  65. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/config/yaml_decode.rs +0 -0
  66. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/errors.rs +0 -0
  67. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/format.rs +0 -0
  68. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/mod.rs +0 -0
  69. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/avro.rs +0 -0
  70. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/csv.rs +0 -0
  71. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/fixed_width.rs +0 -0
  72. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/json.rs +0 -0
  73. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/json_selector.rs +0 -0
  74. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/mod.rs +0 -0
  75. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/orc.rs +0 -0
  76. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/parquet.rs +0 -0
  77. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/xlsx.rs +0 -0
  78. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/xml.rs +0 -0
  79. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/read/xml_selector.rs +0 -0
  80. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/extensions.rs +0 -0
  81. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/mod.rs +0 -0
  82. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/paths.rs +0 -0
  83. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/placement.rs +0 -0
  84. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/planner.rs +0 -0
  85. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/uri.rs +0 -0
  86. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/core/validation.rs +0 -0
  87. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/mod.rs +0 -0
  88. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/object_store.rs +0 -0
  89. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/ops/archive.rs +0 -0
  90. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/ops/inputs.rs +0 -0
  91. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/ops/mod.rs +0 -0
  92. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/ops/output.rs +0 -0
  93. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/providers/adls.rs +0 -0
  94. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/providers/gcs.rs +0 -0
  95. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/providers/local.rs +0 -0
  96. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/providers/mod.rs +0 -0
  97. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/providers/s3.rs +0 -0
  98. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/storage/target.rs +0 -0
  99. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/unique_seed/mod.rs +0 -0
  100. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/accepted.rs +0 -0
  101. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/arrow_convert.rs +0 -0
  102. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/csv.rs +0 -0
  103. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta/commit_metrics.rs +0 -0
  104. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta/options.rs +0 -0
  105. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta/record_batch.rs +0 -0
  106. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta/unity.rs +0 -0
  107. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta/unity_tests.rs +0 -0
  108. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/delta.rs +0 -0
  109. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/context.rs +0 -0
  110. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/data_files.rs +0 -0
  111. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/glue.rs +0 -0
  112. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/metadata.rs +0 -0
  113. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/iceberg/schema.rs +0 -0
  114. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/metrics.rs +0 -0
  115. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/mod.rs +0 -0
  116. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/parquet.rs +0 -0
  117. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/parts.rs +0 -0
  118. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/sink_format.rs +0 -0
  119. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/append.rs +0 -0
  120. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/merge/mod.rs +0 -0
  121. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/merge/scd1.rs +0 -0
  122. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/merge/scd2.rs +0 -0
  123. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/merge/shared.rs +0 -0
  124. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/mod.rs +0 -0
  125. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/io/write/strategy/overwrite.rs +0 -0
  126. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/log.rs +0 -0
  127. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/manifest/builder.rs +0 -0
  128. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/manifest/mod.rs +0 -0
  129. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/manifest/model.rs +0 -0
  130. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/manifest/reconstruct.rs +0 -0
  131. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/profile/mod.rs +0 -0
  132. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/profile/parse.rs +0 -0
  133. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/profile/types.rs +0 -0
  134. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/profile/validate.rs +0 -0
  135. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/report/build.rs +0 -0
  136. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/report/entity.rs +0 -0
  137. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/report/mod.rs +0 -0
  138. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/report/output.rs +0 -0
  139. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/pii.rs +0 -0
  140. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/precheck.rs +0 -0
  141. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/process.rs +0 -0
  142. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/resolve.rs +0 -0
  143. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/entity/validate_split.rs +0 -0
  144. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/events.rs +0 -0
  145. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/file.rs +0 -0
  146. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/output.rs +0 -0
  147. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/run/perf.rs +0 -0
  148. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/runner/mod.rs +0 -0
  149. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/runner/outcome.rs +0 -0
  150. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/runtime.rs +0 -0
  151. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/vars/mod.rs +0 -0
  152. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/vars/resolve.rs +0 -0
  153. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/src/warnings.rs +0 -0
  154. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration/mod.rs +0 -0
  155. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/integration.rs +0 -0
  156. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/common.rs +0 -0
  157. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/add_entity.rs +0 -0
  158. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/adls_storage.rs +0 -0
  159. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/adls_validation.rs +0 -0
  160. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/catalogs.rs +0 -0
  161. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/config_validation.rs +0 -0
  162. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/gcs_storage.rs +0 -0
  163. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/gcs_validation.rs +0 -0
  164. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/local_storage.rs +0 -0
  165. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/mod.rs +0 -0
  166. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/parse.rs +0 -0
  167. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/pii_validation.rs +0 -0
  168. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/remote_base.rs +0 -0
  169. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/config/templating.rs +0 -0
  170. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/format.rs +0 -0
  171. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/mod.rs +0 -0
  172. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/csv_nulls.rs +0 -0
  173. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/json_selector.rs +0 -0
  174. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/mod.rs +0 -0
  175. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/tsv.rs +0 -0
  176. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/read/xml_selector.rs +0 -0
  177. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/adls.rs +0 -0
  178. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/adls_integration.rs +0 -0
  179. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/gcs.rs +0 -0
  180. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/inputs.rs +0 -0
  181. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/local.rs +0 -0
  182. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/mod.rs +0 -0
  183. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/paths.rs +0 -0
  184. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/planner.rs +0 -0
  185. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/s3.rs +0 -0
  186. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/storage/target.rs +0 -0
  187. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/delta_write.rs +0 -0
  188. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/iceberg_write.rs +0 -0
  189. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/metrics.rs +0 -0
  190. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/mod.rs +0 -0
  191. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/object_store.rs +0 -0
  192. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/parquet_write.rs +0 -0
  193. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/parts.rs +0 -0
  194. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/io/write/rejected_csv.rs +0 -0
  195. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/manifest/mod.rs +0 -0
  196. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/mod.rs +0 -0
  197. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/profile/mod.rs +0 -0
  198. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/profile/parse.rs +0 -0
  199. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/profile/validate.rs +0 -0
  200. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/report/accepted_output.rs +0 -0
  201. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/report/mod.rs +0 -0
  202. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/report/storage.rs +0 -0
  203. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/checks.rs +0 -0
  204. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/entity/mod.rs +0 -0
  205. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/mod.rs +0 -0
  206. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/normalize.rs +0 -0
  207. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/pii.rs +0 -0
  208. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/run/report.rs +0 -0
  209. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/runner/adapter.rs +0 -0
  210. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/runner/mod.rs +0 -0
  211. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/state/mod.rs +0 -0
  212. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/vars/mod.rs +0 -0
  213. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit/vars/resolve.rs +0 -0
  214. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-core/tests/unit.rs +0 -0
  215. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/.gitignore +0 -0
  216. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/lib.rs +0 -0
  217. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/observer.rs +0 -0
  218. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/types/config.rs +0 -0
  219. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/types/errors.rs +0 -0
  220. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/types/mod.rs +0 -0
  221. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/src/types/outcome.rs +0 -0
  222. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/tests/fixtures/config.yml +0 -0
  223. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/tests/fixtures/in/customer/customers_valid.csv +0 -0
  224. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/tests/fixtures/invalid_config.yml +0 -0
  225. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/tests/fixtures/profile.yml +0 -0
  226. {floe_python-0.4.2 → floe_python-0.4.4}/crates/floe-python/tests/test_floe.py +0 -0
  227. {floe_python-0.4.2 → floe_python-0.4.4}/python/floe/__init__.py +0 -0
  228. {floe_python-0.4.2 → floe_python-0.4.4}/python/floe/py.typed +0 -0
@@ -3399,7 +3399,7 @@ dependencies = [
3399
3399
 
3400
3400
  [[package]]
3401
3401
  name = "floe-cli"
3402
- version = "0.4.2"
3402
+ version = "0.4.4"
3403
3403
  dependencies = [
3404
3404
  "assert_cmd",
3405
3405
  "clap",
@@ -3412,7 +3412,7 @@ dependencies = [
3412
3412
 
3413
3413
  [[package]]
3414
3414
  name = "floe-core"
3415
- version = "0.4.2"
3415
+ version = "0.4.4"
3416
3416
  dependencies = [
3417
3417
  "apache-avro 0.16.0",
3418
3418
  "arrow",
@@ -3455,7 +3455,7 @@ dependencies = [
3455
3455
 
3456
3456
  [[package]]
3457
3457
  name = "floe-python"
3458
- version = "0.4.2"
3458
+ version = "0.4.4"
3459
3459
  dependencies = [
3460
3460
  "floe-core",
3461
3461
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: floe-python
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -98,6 +98,43 @@ except floe.FloeError as e:
98
98
  | `set_observer(callback)` | Register a live-event callback |
99
99
  | `clear_observer()` | Remove the current callback |
100
100
 
101
+ ## Jupyter
102
+
103
+ `RunOutcome` renders as a color-coded HTML table automatically in Jupyter — no extra code needed. Just end a cell with the variable:
104
+
105
+ ```python
106
+ outcome = floe.run("orders.yml")
107
+ outcome # renders inline HTML table with per-entity status, accepted/rejected counts
108
+ ```
109
+
110
+ Use `outcome.to_dict()` to turn results into a plain dict for pandas:
111
+
112
+ ```python
113
+ import pandas as pd
114
+ df = pd.DataFrame(outcome.entity_reports)
115
+ ```
116
+
117
+ ## Observing progress
118
+
119
+ Register a callback to receive live events as the run proceeds:
120
+
121
+ ```python
122
+ floe.set_observer(lambda e: print(f"[{e['event']}]", e.get("name", e.get("entity", ""))))
123
+ outcome = floe.run("orders.yml")
124
+ floe.clear_observer()
125
+ ```
126
+
127
+ Event types: `run_started`, `entity_started`, `file_started`, `file_finished`, `schema_evolution_applied`, `entity_finished`, `run_finished`, `log`. See the [full guide](../../docs/python-bindings.md#observing-runs-in-real-time) for all event fields.
128
+
129
+ ## Profile overrides
130
+
131
+ Override config variables or cloud credentials without editing the YAML:
132
+
133
+ ```python
134
+ floe.run("orders.yml", profile_vars={"incoming_root": "s3://my-bucket/incoming"})
135
+ floe.run("orders.yml", profile_path="prod.yml")
136
+ ```
137
+
101
138
  ## Building from source
102
139
 
103
140
  ```bash
@@ -111,3 +148,7 @@ maturin develop
111
148
 
112
149
  Apache 2.0
113
150
 
151
+ ---
152
+
153
+ → Full API reference and examples: [docs/python-bindings.md](../../docs/python-bindings.md)
154
+
@@ -70,6 +70,43 @@ except floe.FloeError as e:
70
70
  | `set_observer(callback)` | Register a live-event callback |
71
71
  | `clear_observer()` | Remove the current callback |
72
72
 
73
+ ## Jupyter
74
+
75
+ `RunOutcome` renders as a color-coded HTML table automatically in Jupyter — no extra code needed. Just end a cell with the variable:
76
+
77
+ ```python
78
+ outcome = floe.run("orders.yml")
79
+ outcome # renders inline HTML table with per-entity status, accepted/rejected counts
80
+ ```
81
+
82
+ Use `outcome.to_dict()` to turn results into a plain dict for pandas:
83
+
84
+ ```python
85
+ import pandas as pd
86
+ df = pd.DataFrame(outcome.entity_reports)
87
+ ```
88
+
89
+ ## Observing progress
90
+
91
+ Register a callback to receive live events as the run proceeds:
92
+
93
+ ```python
94
+ floe.set_observer(lambda e: print(f"[{e['event']}]", e.get("name", e.get("entity", ""))))
95
+ outcome = floe.run("orders.yml")
96
+ floe.clear_observer()
97
+ ```
98
+
99
+ Event types: `run_started`, `entity_started`, `file_started`, `file_finished`, `schema_evolution_applied`, `entity_finished`, `run_finished`, `log`. See the [full guide](../../docs/python-bindings.md#observing-runs-in-real-time) for all event fields.
100
+
101
+ ## Profile overrides
102
+
103
+ Override config variables or cloud credentials without editing the YAML:
104
+
105
+ ```python
106
+ floe.run("orders.yml", profile_vars={"incoming_root": "s3://my-bucket/incoming"})
107
+ floe.run("orders.yml", profile_path="prod.yml")
108
+ ```
109
+
73
110
  ## Building from source
74
111
 
75
112
  ```bash
@@ -82,3 +119,7 @@ maturin develop
82
119
  ## License
83
120
 
84
121
  Apache 2.0
122
+
123
+ ---
124
+
125
+ → Full API reference and examples: [docs/python-bindings.md](../../docs/python-bindings.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "floe-core"
3
- version = "0.4.2"
3
+ version = "0.4.4"
4
4
  edition = "2021"
5
5
  description = "Core library for Floe, a YAML-driven technical ingestion tool."
6
6
  license = "MIT"
@@ -70,6 +70,6 @@ fn download_remote_config(uri: &str, temp_dir: &Path) -> FloeResult<PathBuf> {
70
70
  Err(format!("unsupported config uri: {}", uri).into())
71
71
  }
72
72
 
73
- fn is_remote_uri(value: &str) -> bool {
73
+ pub(crate) fn is_remote_uri(value: &str) -> bool {
74
74
  value.starts_with("s3://") || value.starts_with("gs://") || value.starts_with("abfs://")
75
75
  }
@@ -8,6 +8,7 @@ mod validate;
8
8
  pub(crate) mod yaml_decode;
9
9
 
10
10
  pub use catalog::{CatalogResolver, ResolvedDeltaCatalogTarget, ResolvedIcebergCatalogTarget};
11
+ pub(crate) use location::is_remote_uri;
11
12
  pub use location::{resolve_config_location, ConfigLocation};
12
13
  pub use storage::{resolve_local_path, ConfigBase, ResolvedPath, StorageResolver};
13
14
  pub use types::*;
@@ -1141,6 +1141,7 @@ pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1141
1141
  "namespace",
1142
1142
  "producer",
1143
1143
  "max_failures",
1144
+ "job_name",
1144
1145
  ],
1145
1146
  )?;
1146
1147
  Ok(LineageConfig {
@@ -1150,5 +1151,6 @@ pub(crate) fn parse_lineage_config(value: &Yaml) -> FloeResult<LineageConfig> {
1150
1151
  namespace: get_string(hash, "namespace", "lineage")?,
1151
1152
  producer: opt_string(hash, "producer", "lineage")?,
1152
1153
  max_failures: opt_u32(hash, "max_failures", "lineage")?,
1154
+ job_name: opt_string(hash, "job_name", "lineage")?,
1153
1155
  })
1154
1156
  }
@@ -31,6 +31,7 @@ pub struct LineageConfig {
31
31
  pub namespace: String,
32
32
  pub producer: Option<String>,
33
33
  pub max_failures: Option<u32>,
34
+ pub job_name: Option<String>,
34
35
  }
35
36
 
36
37
  #[derive(Debug)]
@@ -9,6 +9,7 @@ use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableIdent};
9
9
  use iceberg_catalog_rest::{
10
10
  RestCatalogBuilder, REST_CATALOG_PROP_URI, REST_CATALOG_PROP_WAREHOUSE,
11
11
  };
12
+ use iceberg_storage_opendal::OpenDalStorageFactory;
12
13
 
13
14
  use crate::config::CatalogTypeConfig;
14
15
  use crate::errors::RunError;
@@ -28,6 +29,7 @@ use super::{map_iceberg_err, IcebergWriteResult, PreparedIcebergWrite};
28
29
  pub(crate) async fn build_rest_catalog(
29
30
  rest_cfg: &RestIcebergCatalogConfig,
30
31
  file_io_props: HashMap<String, String>,
32
+ table_location: &str,
31
33
  ) -> FloeResult<iceberg_catalog_rest::RestCatalog> {
32
34
  let mut props: HashMap<String, String> = file_io_props;
33
35
 
@@ -63,7 +65,29 @@ pub(crate) async fn build_rest_catalog(
63
65
  props.insert("scope".to_string(), scope.to_string());
64
66
  }
65
67
 
66
- let storage_factory: Arc<dyn StorageFactory> = Arc::new(LocalFsStorageFactory);
68
+ // Prefer the concrete table location for storage factory dispatch; fall back to the
69
+ // warehouse field for cases where the caller only has catalog-level config (e.g. seeding).
70
+ let effective_uri = if !table_location.is_empty() {
71
+ table_location
72
+ } else {
73
+ rest_cfg.warehouse.as_deref().unwrap_or("")
74
+ };
75
+ let storage_factory: Arc<dyn StorageFactory> =
76
+ if effective_uri.starts_with("s3://") || effective_uri.starts_with("s3a://") {
77
+ let scheme = effective_uri
78
+ .split("://")
79
+ .next()
80
+ .unwrap_or("s3")
81
+ .to_string();
82
+ Arc::new(OpenDalStorageFactory::S3 {
83
+ configured_scheme: scheme,
84
+ customized_credential_load: None,
85
+ })
86
+ } else if effective_uri.starts_with("gs://") {
87
+ Arc::new(OpenDalStorageFactory::Gcs)
88
+ } else {
89
+ Arc::new(LocalFsStorageFactory)
90
+ };
67
91
 
68
92
  RestCatalogBuilder::default()
69
93
  .with_storage_factory(storage_factory)
@@ -125,7 +149,7 @@ pub(crate) async fn write_via_rest_catalog(
125
149
  mode: config::WriteMode,
126
150
  small_file_threshold_bytes: u64,
127
151
  ) -> FloeResult<IcebergWriteResult> {
128
- let catalog = build_rest_catalog(rest_cfg, file_io_props).await?;
152
+ let catalog = build_rest_catalog(rest_cfg, file_io_props, &table_root_uri).await?;
129
153
 
130
154
  let namespace_name = rest_cfg.namespace.clone();
131
155
  let namespace = NamespaceIdent::new(namespace_name);
@@ -627,9 +627,14 @@ fn seed_iceberg_from_catalog(
627
627
  .map_err(|err| Box::new(RunError(format!("glue iceberg seed failed: {err}"))))?;
628
628
  seed_from_batches(tracker, batches, rename_back)
629
629
  }
630
- IcebergCatalogConfig::Rest(rest_cfg) => {
631
- seed_iceberg_from_rest(tracker, rest_cfg, file_io_props, scan_cols, rename_back)
632
- }
630
+ IcebergCatalogConfig::Rest(rest_cfg) => seed_iceberg_from_rest(
631
+ tracker,
632
+ rest_cfg,
633
+ file_io_props,
634
+ &warehouse_location,
635
+ scan_cols,
636
+ rename_back,
637
+ ),
633
638
  }
634
639
  }
635
640
 
@@ -637,6 +642,7 @@ fn seed_iceberg_from_rest(
637
642
  tracker: &mut check::UniqueTracker,
638
643
  rest_cfg: &RestIcebergCatalogConfig,
639
644
  file_io_props: HashMap<String, String>,
645
+ warehouse_location: &str,
640
646
  scan_cols: &[String],
641
647
  rename_back: &HashMap<String, String>,
642
648
  ) -> FloeResult<()> {
@@ -653,7 +659,7 @@ fn seed_iceberg_from_rest(
653
659
 
654
660
  let batches = runtime
655
661
  .block_on(async {
656
- let catalog = build_rest_catalog(rest_cfg, file_io_props).await?;
662
+ let catalog = build_rest_catalog(rest_cfg, file_io_props, warehouse_location).await?;
657
663
  let namespace = NamespaceIdent::new(rest_cfg.namespace.clone());
658
664
 
659
665
  if !catalog
@@ -54,6 +54,7 @@ pub struct RunOptions {
54
54
  pub run_id: Option<String>,
55
55
  pub entities: Vec<String>,
56
56
  pub dry_run: bool,
57
+ pub full_refresh: bool,
57
58
  pub profile: Option<ProfileConfig>,
58
59
  }
59
60
 
@@ -84,6 +85,15 @@ pub fn load_config(config_path: &Path) -> FloeResult<config::RootConfig> {
84
85
  config::parse_config(config_path)
85
86
  }
86
87
 
88
+ /// Read manifest JSON from any supported URI (local path, `s3://`, `gs://`, `abfs://`).
89
+ /// For remote URIs the file is downloaded to a temp directory that is cleaned up before
90
+ /// this function returns; the caller receives the raw JSON text as a `String`.
91
+ pub fn read_manifest_text(uri: &str) -> FloeResult<String> {
92
+ let location = config::resolve_config_location(uri)?;
93
+ let text = std::fs::read_to_string(&location.path)?;
94
+ Ok(text)
95
+ }
96
+
87
97
  pub fn load_config_with_profile_vars(
88
98
  config_path: &Path,
89
99
  profile_vars: &std::collections::HashMap<String, String>,
@@ -8,6 +8,18 @@ use serde_json::{json, Value};
8
8
  use crate::config::{EntityConfig, LineageConfig};
9
9
  use crate::run::events::{RunEvent, RunObserver};
10
10
 
11
+ const DEFAULT_PRODUCER: &str = concat!(
12
+ "https://github.com/malon64/floe/releases/tag/v",
13
+ env!("CARGO_PKG_VERSION")
14
+ );
15
+
16
+ #[derive(Clone)]
17
+ struct ColumnMapping {
18
+ output_name: String,
19
+ column_type: String,
20
+ source_field: Option<String>,
21
+ }
22
+
11
23
  struct EntityUris {
12
24
  source: String,
13
25
  accepted: String,
@@ -20,14 +32,19 @@ pub struct OpenLineageObserver {
20
32
  entity_start_ms: Mutex<HashMap<String, u128>>,
21
33
  entity_run_ids: Mutex<HashMap<String, String>>,
22
34
  run_start_ms: Mutex<Option<u128>>,
23
- entity_schemas: HashMap<String, Vec<(String, String)>>,
35
+ entity_schemas: HashMap<String, Vec<ColumnMapping>>,
24
36
  entity_uris: HashMap<String, EntityUris>,
37
+ run_job_name: String,
25
38
  consecutive_failures: AtomicUsize,
26
39
  circuit_open: AtomicBool,
27
40
  }
28
41
 
29
42
  impl OpenLineageObserver {
30
- pub fn new(config: &LineageConfig, entities: &[EntityConfig]) -> crate::FloeResult<Self> {
43
+ pub fn new(
44
+ config: &LineageConfig,
45
+ entities: &[EntityConfig],
46
+ config_path: &str,
47
+ ) -> crate::FloeResult<Self> {
31
48
  let timeout = Duration::from_secs(config.timeout_secs.unwrap_or(5));
32
49
  let client = reqwest::blocking::Client::builder()
33
50
  .timeout(timeout)
@@ -38,14 +55,30 @@ impl OpenLineageObserver {
38
55
  ))) as Box<dyn std::error::Error + Send + Sync>
39
56
  })?;
40
57
 
58
+ let run_job_name = config
59
+ .job_name
60
+ .clone()
61
+ .filter(|s| !s.is_empty())
62
+ .unwrap_or_else(|| {
63
+ std::path::Path::new(config_path)
64
+ .file_stem()
65
+ .and_then(|s| s.to_str())
66
+ .unwrap_or("floe-run")
67
+ .to_string()
68
+ });
69
+
41
70
  let entity_schemas = entities
42
71
  .iter()
43
72
  .map(|e| {
44
- let fields: Vec<(String, String)> = e
73
+ let fields: Vec<ColumnMapping> = e
45
74
  .schema
46
75
  .columns
47
76
  .iter()
48
- .map(|c| (c.name.clone(), c.column_type.clone()))
77
+ .map(|c| ColumnMapping {
78
+ output_name: c.name.clone(),
79
+ column_type: c.column_type.clone(),
80
+ source_field: c.source.clone(),
81
+ })
49
82
  .collect();
50
83
  (e.name.clone(), fields)
51
84
  })
@@ -73,6 +106,7 @@ impl OpenLineageObserver {
73
106
  run_start_ms: Mutex::new(None),
74
107
  entity_schemas,
75
108
  entity_uris,
109
+ run_job_name,
76
110
  consecutive_failures: AtomicUsize::new(0),
77
111
  circuit_open: AtomicBool::new(false),
78
112
  })
@@ -155,10 +189,7 @@ impl OpenLineageObserver {
155
189
  }
156
190
 
157
191
  fn producer(&self) -> &str {
158
- self.config
159
- .producer
160
- .as_deref()
161
- .unwrap_or("https://github.com/malon64/floe")
192
+ self.config.producer.as_deref().unwrap_or(DEFAULT_PRODUCER)
162
193
  }
163
194
 
164
195
  fn parent_run_facet(&self) -> Option<Value> {
@@ -212,71 +243,112 @@ impl OpenLineageObserver {
212
243
  run_facets["parent"] = parent;
213
244
  }
214
245
 
215
- // Build inputs: source dataset with schema and quality facets on COMPLETE/FAIL.
216
- let inputs = match (stats.as_ref(), uris) {
246
+ // Build inputs/outputs based on whether stats and uris are present (COMPLETE/FAIL)
247
+ // or absent (START — keep both empty).
248
+ let (inputs, outputs) = match (stats.as_ref(), uris) {
217
249
  (Some(s), Some(u)) => {
218
250
  let rejection_rate = if s.rows > 0 {
219
251
  s.rejected as f64 / s.rows as f64
220
252
  } else {
221
253
  0.0
222
254
  };
255
+
256
+ // Input: source dataset — sub-namespace avoids collision with real entity names.
257
+ let (src_ns, src_path) = split_storage_uri(&u.source);
258
+ let inputs = json!([{
259
+ "namespace": format!("{}.source", self.config.namespace),
260
+ "name": name,
261
+ "facets": {
262
+ "symlinks": symlinks_facet(self.producer(), &src_ns, &src_path, "DIRECTORY")
263
+ }
264
+ }]);
265
+
266
+ // Accepted output: entity name as logical identifier, TABLE type.
267
+ let (acc_ns, acc_path) = split_storage_uri(&u.accepted);
223
268
  let schema_facet = json!({
224
- "fields": s.schema_fields.iter().map(|(col_name, col_type)| {
225
- json!({ "name": col_name, "type": col_type })
269
+ "fields": s.schema_fields.iter().map(|col| {
270
+ json!({ "name": col.output_name, "type": col.column_type })
226
271
  }).collect::<Vec<_>>(),
227
272
  "_producer": self.producer(),
228
273
  "_schemaURL": "https://openlineage.io/spec/facets/1-1-1/SchemaDatasetFacet.json"
229
274
  });
230
- let dq_facet = json!({
231
- "rowCount": s.rows,
275
+ let accepted_dq_facet = json!({
276
+ "rowCount": s.accepted,
232
277
  "validCount": s.accepted,
233
- "invalidCount": s.rejected,
278
+ "invalidCount": 0u64,
234
279
  "_producer": self.producer(),
235
- "_schemaURL": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsInputDatasetFacet.json"
280
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsOutputDatasetFacet.json"
236
281
  });
237
282
  let floe_facet = json!({
238
283
  "entity": name,
239
284
  "rejectionRate": rejection_rate,
240
285
  "files": s.files,
241
286
  "rows": s.rows,
242
- "accepted": s.accepted,
243
- "rejected": s.rejected,
244
287
  "warnings": s.warnings,
245
288
  "errors": s.errors,
246
289
  "_producer": self.producer(),
247
290
  "_schemaURL": "https://github.com/malon64/floe/schemas/FloeQualityRunFacet.json"
248
291
  });
249
- json!([{
250
- "namespace": self.config.namespace,
251
- "name": u.source,
252
- "facets": {
253
- "schema": schema_facet,
254
- "dataQualityMetrics": dq_facet,
255
- "floeQualityRun": floe_facet
256
- }
257
- }])
258
- }
259
- _ => json!([]),
260
- };
261
292
 
262
- // Build outputs: accepted sink always present; rejected sink when configured.
263
- let outputs = match uris {
264
- Some(u) => {
293
+ let mut accepted_facets = json!({
294
+ "symlinks": symlinks_facet(self.producer(), &acc_ns, &acc_path, "TABLE"),
295
+ "schema": schema_facet,
296
+ "dataQualityMetrics": accepted_dq_facet,
297
+ "floeQualityRun": floe_facet
298
+ });
299
+
300
+ if !s.schema_fields.is_empty() {
301
+ let fields_map: serde_json::Map<String, Value> = s
302
+ .schema_fields
303
+ .iter()
304
+ .map(|col| {
305
+ let src = col.source_field.as_deref().unwrap_or(&col.output_name);
306
+ let entry = json!({
307
+ "inputFields": [{
308
+ "namespace": format!("{}.source", self.config.namespace),
309
+ "name": name,
310
+ "field": src
311
+ }]
312
+ });
313
+ (col.output_name.clone(), entry)
314
+ })
315
+ .collect();
316
+ accepted_facets["columnLineage"] = json!({
317
+ "fields": fields_map,
318
+ "_producer": self.producer(),
319
+ "_schemaURL": "https://openlineage.io/spec/facets/1-1-1/ColumnLineageDatasetFacet.json"
320
+ });
321
+ }
322
+
265
323
  let mut out = vec![json!({
266
324
  "namespace": self.config.namespace,
267
- "name": u.accepted,
268
- "facets": {}
325
+ "name": name,
326
+ "facets": accepted_facets
269
327
  })];
328
+
329
+ // Rejected output (when configured): DIRECTORY type, rejected-row quality metrics.
270
330
  if let Some(ref rej) = u.rejected {
331
+ let (rej_ns, rej_path) = split_storage_uri(rej);
332
+ let rejected_dq_facet = json!({
333
+ "rowCount": s.rejected,
334
+ "validCount": 0u64,
335
+ "invalidCount": s.rejected,
336
+ "_producer": self.producer(),
337
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-2/DataQualityMetricsOutputDatasetFacet.json"
338
+ });
271
339
  out.push(json!({
272
- "namespace": self.config.namespace,
273
- "name": rej,
274
- "facets": {}
340
+ "namespace": format!("{}.rejected", self.config.namespace),
341
+ "name": name,
342
+ "facets": {
343
+ "symlinks": symlinks_facet(self.producer(), &rej_ns, &rej_path, "DIRECTORY"),
344
+ "dataQualityMetrics": rejected_dq_facet
345
+ }
275
346
  }));
276
347
  }
277
- json!(out)
348
+
349
+ (inputs, json!(out))
278
350
  }
279
- None => json!([]),
351
+ _ => (json!([]), json!([])),
280
352
  };
281
353
 
282
354
  let body = json!({
@@ -307,7 +379,7 @@ struct EntityStats {
307
379
  rejected: u64,
308
380
  warnings: u64,
309
381
  errors: u64,
310
- schema_fields: Vec<(String, String)>,
382
+ schema_fields: Vec<ColumnMapping>,
311
383
  }
312
384
 
313
385
  fn ms_to_iso8601(ms: u128) -> String {
@@ -324,6 +396,30 @@ fn ms_to_iso8601(ms: u128) -> String {
324
396
  }
325
397
  }
326
398
 
399
+ fn split_storage_uri(uri: &str) -> (String, String) {
400
+ // abfss:// must precede abfs:// so the longer prefix matches first.
401
+ let cloud_prefixes = ["s3://", "gs://", "gcs://", "az://", "abfss://", "abfs://"];
402
+ for prefix in cloud_prefixes {
403
+ if let Some(after_scheme) = uri.strip_prefix(prefix) {
404
+ if let Some(slash) = after_scheme.find('/') {
405
+ let authority = uri[..prefix.len() + slash].to_string();
406
+ let path = after_scheme[slash..].to_string();
407
+ return (authority, path);
408
+ }
409
+ return (uri.to_string(), "/".to_string());
410
+ }
411
+ }
412
+ ("file".to_string(), uri.to_string())
413
+ }
414
+
415
+ fn symlinks_facet(producer: &str, namespace: &str, name: &str, ds_type: &str) -> Value {
416
+ json!({
417
+ "identifiers": [{ "namespace": namespace, "name": name, "type": ds_type }],
418
+ "_producer": producer,
419
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SymlinksDatasetFacet.json"
420
+ })
421
+ }
422
+
327
423
  impl RunObserver for OpenLineageObserver {
328
424
  fn on_event(&self, event: RunEvent) {
329
425
  match event {
@@ -349,7 +445,7 @@ impl RunObserver for OpenLineageObserver {
349
445
  },
350
446
  "job": {
351
447
  "namespace": self.config.namespace,
352
- "name": run_id,
448
+ "name": self.run_job_name,
353
449
  "facets": {}
354
450
  },
355
451
  "inputs": [],
@@ -444,7 +540,7 @@ impl RunObserver for OpenLineageObserver {
444
540
  },
445
541
  "job": {
446
542
  "namespace": self.config.namespace,
447
- "name": run_id,
543
+ "name": self.run_job_name,
448
544
  "facets": {}
449
545
  },
450
546
  "inputs": [],
@@ -462,8 +558,9 @@ impl RunObserver for OpenLineageObserver {
462
558
  pub fn build_observer(
463
559
  config: &LineageConfig,
464
560
  entities: &[EntityConfig],
561
+ config_path: &str,
465
562
  ) -> crate::FloeResult<Arc<dyn RunObserver>> {
466
- let obs = OpenLineageObserver::new(config, entities)?;
563
+ let obs = OpenLineageObserver::new(config, entities, config_path)?;
467
564
  Ok(Arc::new(obs))
468
565
  }
469
566
 
@@ -16,6 +16,7 @@ pub struct RunContext {
16
16
  pub run_id: String,
17
17
  pub started_at: String,
18
18
  pub run_timer: Instant,
19
+ pub full_refresh: bool,
19
20
  }
20
21
 
21
22
  impl RunContext {
@@ -82,6 +83,7 @@ impl RunContext {
82
83
  run_id,
83
84
  started_at,
84
85
  run_timer: Instant::now(),
86
+ full_refresh: options.full_refresh,
85
87
  })
86
88
  }
87
89
 
@@ -98,7 +100,13 @@ impl RunContext {
98
100
  let catalog_resolver = config::CatalogResolver::new(&config)?;
99
101
  let config_dir =
100
102
  crate::io::storage::paths::normalize_local_path(storage_resolver.config_dir());
101
- let config_path = crate::io::storage::paths::normalize_local_path(manifest_path);
103
+ let manifest_str = manifest_path.to_string_lossy();
104
+ let config_path = if config::is_remote_uri(&manifest_str) {
105
+ // Preserve the URI string as-is; normalize_local_path would collapse s3:// → s3:/
106
+ std::path::PathBuf::from(manifest_str.as_ref())
107
+ } else {
108
+ crate::io::storage::paths::normalize_local_path(manifest_path)
109
+ };
102
110
 
103
111
  // The manifest embeds report_base_uri; resolve it to a target if it looks local.
104
112
  let (report_target, report_base_path) =
@@ -145,6 +153,28 @@ impl RunContext {
145
153
  run_id,
146
154
  started_at,
147
155
  run_timer: Instant::now(),
156
+ full_refresh: options.full_refresh,
148
157
  })
149
158
  }
150
159
  }
160
+
161
+ #[cfg(test)]
162
+ mod tests {
163
+ use std::path::Path;
164
+
165
+ #[test]
166
+ fn remote_uri_preserved_via_pathbuf_from() {
167
+ // normalize_local_path iterates Path::components(), which collapses the double slash
168
+ // in "s3://..." producing "s3:/..." — this documents the bug that the fix avoids.
169
+ let uri = "s3://bucket/manifests/prod.json";
170
+ let normalized = crate::io::storage::paths::normalize_local_path(Path::new(uri));
171
+ assert_ne!(
172
+ normalized.display().to_string(),
173
+ uri,
174
+ "normalize_local_path should mangle s3:// (confirming the bug we guard against)"
175
+ );
176
+ // PathBuf::from preserves the raw bytes, so display() round-trips the URI correctly.
177
+ let preserved = std::path::PathBuf::from(uri);
178
+ assert_eq!(preserved.display().to_string(), uri);
179
+ }
180
+ }