dataframely 2.8.0__tar.gz → 2.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. dataframely-2.8.2/.github/copilot-instructions.md +53 -0
  2. {dataframely-2.8.0 → dataframely-2.8.2}/.github/dependabot.yml +2 -0
  3. {dataframely-2.8.0 → dataframely-2.8.2}/.github/workflows/build.yml +2 -5
  4. dataframely-2.8.2/.github/workflows/chore-main.yml +21 -0
  5. dataframely-2.8.0/.github/workflows/chore.yml → dataframely-2.8.2/.github/workflows/chore-pr.yml +7 -14
  6. {dataframely-2.8.0 → dataframely-2.8.2}/.github/workflows/ci.yml +2 -2
  7. {dataframely-2.8.0 → dataframely-2.8.2}/.github/workflows/copilot-setup-steps.yml +1 -1
  8. {dataframely-2.8.0 → dataframely-2.8.2}/.github/workflows/scorecard.yml +1 -1
  9. {dataframely-2.8.0 → dataframely-2.8.2}/PKG-INFO +1 -1
  10. dataframely-2.8.2/SKILL.md +238 -0
  11. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_base_schema.py +31 -1
  12. {dataframely-2.8.0 → dataframely-2.8.2}/docs/conf.py +0 -1
  13. dataframely-2.8.2/docs/guides/coding-agents.md +75 -0
  14. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/index.md +1 -0
  15. {dataframely-2.8.0 → dataframely-2.8.2}/pyproject.toml +1 -1
  16. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_base.py +19 -0
  17. dataframely-2.8.0/.github/copilot-instructions.md +0 -237
  18. {dataframely-2.8.0 → dataframely-2.8.2}/.copier-answers.yml +0 -0
  19. {dataframely-2.8.0 → dataframely-2.8.2}/.envrc +0 -0
  20. {dataframely-2.8.0 → dataframely-2.8.2}/.gitattributes +0 -0
  21. {dataframely-2.8.0 → dataframely-2.8.2}/.github/CODEOWNERS +0 -0
  22. {dataframely-2.8.0 → dataframely-2.8.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  23. {dataframely-2.8.0 → dataframely-2.8.2}/.github/instructions/tests.instructions.md +0 -0
  24. {dataframely-2.8.0 → dataframely-2.8.2}/.github/release-drafter.yml +0 -0
  25. {dataframely-2.8.0 → dataframely-2.8.2}/.github/workflows/nightly.yml +0 -0
  26. {dataframely-2.8.0 → dataframely-2.8.2}/.gitignore +0 -0
  27. {dataframely-2.8.0 → dataframely-2.8.2}/.pre-commit-config.yaml +0 -0
  28. {dataframely-2.8.0 → dataframely-2.8.2}/.prettierignore +0 -0
  29. {dataframely-2.8.0 → dataframely-2.8.2}/.prettierrc +0 -0
  30. {dataframely-2.8.0 → dataframely-2.8.2}/.readthedocs.yml +0 -0
  31. {dataframely-2.8.0 → dataframely-2.8.2}/Cargo.lock +0 -0
  32. {dataframely-2.8.0 → dataframely-2.8.2}/Cargo.toml +0 -0
  33. {dataframely-2.8.0 → dataframely-2.8.2}/LICENSE +0 -0
  34. {dataframely-2.8.0 → dataframely-2.8.2}/README.md +0 -0
  35. {dataframely-2.8.0 → dataframely-2.8.2}/SECURITY.md +0 -0
  36. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/__init__.py +0 -0
  37. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_compat.py +0 -0
  38. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_deprecation.py +0 -0
  39. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_filter.py +0 -0
  40. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_match_to_schema.py +0 -0
  41. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_native.pyi +0 -0
  42. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_plugin.py +0 -0
  43. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_polars.py +0 -0
  44. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_pydantic.py +0 -0
  45. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_rule.py +0 -0
  46. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_serialization.py +0 -0
  47. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/__init__.py +0 -0
  48. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/_base.py +0 -0
  49. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/_exc.py +0 -0
  50. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/_fsspec.py +0 -0
  51. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/constants.py +0 -0
  52. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/delta.py +0 -0
  53. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_storage/parquet.py +0 -0
  54. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/_typing.py +0 -0
  55. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/collection/__init__.py +0 -0
  56. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/collection/_base.py +0 -0
  57. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/collection/collection.py +0 -0
  58. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/collection/filter_result.py +0 -0
  59. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/__init__.py +0 -0
  60. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/_base.py +0 -0
  61. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/_mixins.py +0 -0
  62. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/_registry.py +0 -0
  63. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/_utils.py +0 -0
  64. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/any.py +0 -0
  65. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/array.py +0 -0
  66. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/binary.py +0 -0
  67. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/bool.py +0 -0
  68. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/categorical.py +0 -0
  69. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/datetime.py +0 -0
  70. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/decimal.py +0 -0
  71. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/enum.py +0 -0
  72. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/float.py +0 -0
  73. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/integer.py +0 -0
  74. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/list.py +0 -0
  75. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/object.py +0 -0
  76. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/string.py +0 -0
  77. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/columns/struct.py +0 -0
  78. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/config.py +0 -0
  79. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/exc.py +0 -0
  80. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/experimental/__init__.py +0 -0
  81. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/experimental/infer_schema.py +0 -0
  82. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/filter_result.py +0 -0
  83. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/functional.py +0 -0
  84. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/py.typed +0 -0
  85. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/random.py +0 -0
  86. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/schema.py +0 -0
  87. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/__init__.py +0 -0
  88. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/const.py +0 -0
  89. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/factory.py +0 -0
  90. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/mask.py +0 -0
  91. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/rules.py +0 -0
  92. {dataframely-2.8.0 → dataframely-2.8.2}/dataframely/testing/storage.py +0 -0
  93. {dataframely-2.8.0 → dataframely-2.8.2}/docker-compose.yml +0 -0
  94. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_static/custom.css +0 -0
  95. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_static/favicon.ico +0 -0
  96. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_templates/autosummary/class.rst +0 -0
  97. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_templates/autosummary/method.rst +0 -0
  98. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_templates/classes/column.rst +0 -0
  99. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_templates/classes/error.rst +0 -0
  100. {dataframely-2.8.0 → dataframely-2.8.2}/docs/_templates/classes/filter_result.rst +0 -0
  101. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/generation.rst +0 -0
  102. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/index.rst +0 -0
  103. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/io.rst +0 -0
  104. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/metadata.rst +0 -0
  105. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/operations.rst +0 -0
  106. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/collection/validation.rst +0 -0
  107. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/columns/index.rst +0 -0
  108. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/errors/index.rst +0 -0
  109. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/experimental/index.rst +0 -0
  110. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/filter_result/failure_info.rst +0 -0
  111. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/filter_result/index.rst +0 -0
  112. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/index.rst +0 -0
  113. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/misc/index.rst +0 -0
  114. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/conversion.rst +0 -0
  115. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/generation.rst +0 -0
  116. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/index.rst +0 -0
  117. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/io.rst +0 -0
  118. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/metadata.rst +0 -0
  119. {dataframely-2.8.0 → dataframely-2.8.2}/docs/api/schema/validation.rst +0 -0
  120. {dataframely-2.8.0 → dataframely-2.8.2}/docs/css/custom.css +0 -0
  121. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/development.md +0 -0
  122. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/examples/index.md +0 -0
  123. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/examples/real-world.ipynb +0 -0
  124. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/faq.md +0 -0
  125. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/column-metadata.md +0 -0
  126. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/data-generation.md +0 -0
  127. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/index.md +0 -0
  128. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/lazy-validation.md +0 -0
  129. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/primary-keys.md +0 -0
  130. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/serialization.md +0 -0
  131. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/features/sql-generation.md +0 -0
  132. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/migration/index.md +0 -0
  133. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/migration/v1-v2.md +0 -0
  134. {dataframely-2.8.0 → dataframely-2.8.2}/docs/guides/quickstart.md +0 -0
  135. {dataframely-2.8.0 → dataframely-2.8.2}/docs/index.md +0 -0
  136. {dataframely-2.8.0 → dataframely-2.8.2}/pixi.lock +0 -0
  137. {dataframely-2.8.0 → dataframely-2.8.2}/pixi.toml +0 -0
  138. {dataframely-2.8.0 → dataframely-2.8.2}/rust-toolchain.toml +0 -0
  139. {dataframely-2.8.0 → dataframely-2.8.2}/src/lib.rs +0 -0
  140. {dataframely-2.8.0 → dataframely-2.8.2}/src/polars_plugin/mod.rs +0 -0
  141. {dataframely-2.8.0 → dataframely-2.8.2}/src/polars_plugin/rule_failure.rs +0 -0
  142. {dataframely-2.8.0 → dataframely-2.8.2}/src/polars_plugin/utils.rs +0 -0
  143. {dataframely-2.8.0 → dataframely-2.8.2}/src/polars_plugin/validation_error.rs +0 -0
  144. {dataframely-2.8.0 → dataframely-2.8.2}/src/regex/errdefs.rs +0 -0
  145. {dataframely-2.8.0 → dataframely-2.8.2}/src/regex/mod.rs +0 -0
  146. {dataframely-2.8.0 → dataframely-2.8.2}/src/regex/repr.rs +0 -0
  147. {dataframely-2.8.0 → dataframely-2.8.2}/tests/benches/conftest.py +0 -0
  148. {dataframely-2.8.0 → dataframely-2.8.2}/tests/benches/test_collection.py +0 -0
  149. {dataframely-2.8.0 → dataframely-2.8.2}/tests/benches/test_failure.py +0 -0
  150. {dataframely-2.8.0 → dataframely-2.8.2}/tests/benches/test_schema.py +0 -0
  151. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_base.py +0 -0
  152. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_cast.py +0 -0
  153. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_collection_future_annotations.py +0 -0
  154. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_create_empty.py +0 -0
  155. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_filter_one_to_n.py +0 -0
  156. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_filter_validate.py +0 -0
  157. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_ignore_in_filter.py +0 -0
  158. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_implementation.py +0 -0
  159. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_join.py +0 -0
  160. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_matches.py +0 -0
  161. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_optional_members.py +0 -0
  162. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_propagate_row_failures.py +0 -0
  163. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_repr.py +0 -0
  164. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_sample.py +0 -0
  165. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_serialization.py +0 -0
  166. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_storage.py +0 -0
  167. {dataframely-2.8.0 → dataframely-2.8.2}/tests/collection/test_validate_input.py +0 -0
  168. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/__init__.py +0 -0
  169. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_any.py +0 -0
  170. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_array.py +0 -0
  171. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_binary.py +0 -0
  172. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_datetime.py +0 -0
  173. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_decimal.py +0 -0
  174. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_enum.py +0 -0
  175. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_float.py +0 -0
  176. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_integer.py +0 -0
  177. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_list.py +0 -0
  178. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_object.py +0 -0
  179. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_string.py +0 -0
  180. {dataframely-2.8.0 → dataframely-2.8.2}/tests/column_types/test_struct.py +0 -0
  181. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/__init__.py +0 -0
  182. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_alias.py +0 -0
  183. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_base.py +0 -0
  184. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_check.py +0 -0
  185. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_default_dtypes.py +0 -0
  186. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_matches.py +0 -0
  187. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_metadata.py +0 -0
  188. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_polars_schema.py +0 -0
  189. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_pyarrow.py +0 -0
  190. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_rules.py +0 -0
  191. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_sample.py +0 -0
  192. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_sqlalchemy_columns.py +0 -0
  193. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_str.py +0 -0
  194. {dataframely-2.8.0 → dataframely-2.8.2}/tests/columns/test_utils.py +0 -0
  195. {dataframely-2.8.0 → dataframely-2.8.2}/tests/conftest.py +0 -0
  196. {dataframely-2.8.0 → dataframely-2.8.2}/tests/core_validation/__init__.py +0 -0
  197. {dataframely-2.8.0 → dataframely-2.8.2}/tests/core_validation/test_match_to_schema.py +0 -0
  198. {dataframely-2.8.0 → dataframely-2.8.2}/tests/core_validation/test_rule_evaluation.py +0 -0
  199. {dataframely-2.8.0 → dataframely-2.8.2}/tests/experimental/test_infer_schema.py +0 -0
  200. {dataframely-2.8.0 → dataframely-2.8.2}/tests/failure_info/test_storage.py +0 -0
  201. {dataframely-2.8.0 → dataframely-2.8.2}/tests/functional/test_concat.py +0 -0
  202. {dataframely-2.8.0 → dataframely-2.8.2}/tests/functional/test_relationships.py +0 -0
  203. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_cast.py +0 -0
  204. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_create_empty.py +0 -0
  205. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_create_empty_if_none.py +0 -0
  206. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_filter.py +0 -0
  207. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_inheritance.py +0 -0
  208. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_matches.py +0 -0
  209. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_read_write_parquet.py +0 -0
  210. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_repr.py +0 -0
  211. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_rule_implementation.py +0 -0
  212. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_sample.py +0 -0
  213. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_serialization.py +0 -0
  214. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_storage.py +0 -0
  215. {dataframely-2.8.0 → dataframely-2.8.2}/tests/schema/test_validate.py +0 -0
  216. {dataframely-2.8.0 → dataframely-2.8.2}/tests/storage/test_delta.py +0 -0
  217. {dataframely-2.8.0 → dataframely-2.8.2}/tests/storage/test_fsspec.py +0 -0
  218. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_compat.py +0 -0
  219. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_config.py +0 -0
  220. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_deprecation.py +0 -0
  221. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_factory.py +0 -0
  222. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_native_regex.py +0 -0
  223. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_pydantic.py +0 -0
  224. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_random.py +0 -0
  225. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_serialization.py +0 -0
  226. {dataframely-2.8.0 → dataframely-2.8.2}/tests/test_typing.py +0 -0
@@ -0,0 +1,53 @@
1
+ # Dataframely
2
+
3
+ ## Package Management
4
+
5
+ This repository uses the Pixi package manager. When editing `pixi.toml`, run `pixi lock` afterwards.
6
+
7
+ When running any commands (like `pytest`), prepend them with `pixi run`.
8
+
9
+ ## Code Style
10
+
11
+ ### Documentation
12
+
13
+ - Document all public functions/methods and classes using docstrings
14
+ - For functions & methods, use Google Docstrings and include `Args` (if there are any arguments) and `Returns` (if
15
+ there is a return type).
16
+ - Do not include type hints in the docstrings
17
+ - Do not mention default values in the docstrings
18
+ - Do not write docstrings for private functions/methods unless the function is highly complex
19
+
20
+ ### License Headers
21
+
22
+ Do not manually adjust or add license headers. A pre-commit hook will take care of this.
23
+
24
+ ## Testing
25
+
26
+ - Never use classes for pytest, but only free functions
27
+ - Do not put `__init__.py` files into test directories
28
+ - Tests should not have docstrings unless they are very complicated or very specific, i.e. warrant a description beyond
29
+ the test's name
30
+ - All tests should follow the arrange-act-assert pattern. The respective logical blocks should be distinguished via
31
+ code comments as follows:
32
+
33
+ ```python
34
+ def test_method() -> None:
35
+ # Arrange
36
+ ...
37
+
38
+ # Act
39
+ ...
40
+
41
+ # Assert
42
+ ...
43
+ ```
44
+
45
+ - If two or more tests are structurally equivalent, they should be merged into a single test and parametrized with
46
+ `@pytest.mark.parametrize`
47
+ - If at least two tests share the same logic in the "arrange" step, the respective logic should be extracted into a
48
+ fixture
49
+
50
+ ## Reviewing
51
+
52
+ When reviewing code changes, make sure that the `SKILL.md` is up-to-date and in line with the public API of this
53
+ package.
@@ -10,3 +10,5 @@ updates:
10
10
  - "*"
11
11
  commit-message:
12
12
  prefix: ci
13
+ cooldown:
14
+ default-days: 7
@@ -57,14 +57,11 @@ jobs:
57
57
  environments: build
58
58
  - name: Set version
59
59
  run: pixi run -e build set-version
60
- - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
61
- with:
62
- python-version: "3.10"
63
60
  - name: Build wheel
64
61
  uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
65
62
  with:
66
63
  command: build
67
- args: --out dist --release
64
+ args: --out dist --release -i python3.10
68
65
  manylinux: auto
69
66
  sccache: true
70
67
  - name: Check package
@@ -84,7 +81,7 @@ jobs:
84
81
  id-token: write
85
82
  environment: pypi
86
83
  steps:
87
- - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
84
+ - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
88
85
  with:
89
86
  path: dist
90
87
  merge-multiple: true
@@ -0,0 +1,21 @@
1
+ name: Chore
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ concurrency:
7
+ group: ${{ github.workflow }}-${{ github.ref }}
8
+ cancel-in-progress: true
9
+
10
+ jobs:
11
+ draft-release:
12
+ name: Draft Release
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ contents: write
16
+ pull-requests: read
17
+ steps:
18
+ - name: Update release draft
19
+ uses: release-drafter/release-drafter@139054aeaa9adc52ab36ddf67437541f039b88e2 # v7.1.1
20
+ with:
21
+ token: ${{ github.token }}
@@ -3,8 +3,6 @@ on:
3
3
  pull_request_target:
4
4
  branches: [main]
5
5
  types: [opened, reopened, edited, synchronize]
6
- push:
7
- branches: [main]
8
6
 
9
7
  concurrency:
10
8
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -13,7 +11,6 @@ concurrency:
13
11
  jobs:
14
12
  check-pr-title:
15
13
  name: Check PR Title
16
- if: github.event_name == 'pull_request_target'
17
14
  runs-on: ubuntu-latest
18
15
  permissions:
19
16
  contents: read
@@ -28,7 +25,7 @@ jobs:
28
25
  GITHUB_TOKEN: ${{ github.token }}
29
26
  - name: Post comment about invalid PR title
30
27
  if: failure()
31
- uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2.9.4
28
+ uses: marocchino/sticky-pull-request-comment@70d2764d1a7d5d9560b100cbea0077fc8f633987 # v3.0.2
32
29
  with:
33
30
  header: conventional-commit-pr-title
34
31
  message: |
@@ -45,22 +42,18 @@ jobs:
45
42
  </details>
46
43
  - name: Delete comment about invalid PR title
47
44
  if: success()
48
- uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2.9.4
45
+ uses: marocchino/sticky-pull-request-comment@70d2764d1a7d5d9560b100cbea0077fc8f633987 # v3.0.2
49
46
  with:
50
47
  header: conventional-commit-pr-title
51
48
  delete: true
52
49
 
53
- release-drafter:
54
- name: ${{ github.event_name == 'pull_request_target' && 'Assign Labels' || 'Draft Release' }}
50
+ assign-labels:
51
+ name: Assign Labels
55
52
  runs-on: ubuntu-latest
56
53
  permissions:
57
- contents: write
58
54
  pull-requests: write
59
55
  steps:
60
- - name: ${{ github.event_name == 'pull_request_target' && 'Assign labels' || 'Update release draft' }}
61
- uses: release-drafter/release-drafter@6db134d15f3909ccc9eefd369f02bd1e9cffdf97 # v6.2.0
56
+ - name: Assign labels
57
+ uses: release-drafter/release-drafter/autolabeler@139054aeaa9adc52ab36ddf67437541f039b88e2 # v7.1.1
62
58
  with:
63
- disable-releaser: ${{ github.event_name == 'pull_request_target' }}
64
- disable-autolabeler: ${{ github.event_name == 'push' }}
65
- env:
66
- GITHUB_TOKEN: ${{ github.token }}
59
+ token: ${{ github.token }}
@@ -30,7 +30,7 @@ jobs:
30
30
  - name: Install Rust
31
31
  run: rustup show
32
32
  - name: Cache Rust dependencies
33
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2.8.2
33
+ uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
34
34
  - name: pre-commit
35
35
  run: pixi run pre-commit-run --color=always --show-diff-on-failure
36
36
 
@@ -71,7 +71,7 @@ jobs:
71
71
  - name: Install Rust
72
72
  run: rustup show
73
73
  - name: Cache Rust dependencies
74
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2.8.2
74
+ uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
75
75
  - name: Install repository
76
76
  run: pixi run -e ${{ matrix.environment }} postinstall
77
77
  - name: Run pytest
@@ -21,6 +21,6 @@ jobs:
21
21
  - name: Install Rust
22
22
  run: rustup show
23
23
  - name: Cache Rust dependencies
24
- uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2.8.2
24
+ uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
25
25
  - name: Install repository
26
26
  run: pixi run postinstall
@@ -74,6 +74,6 @@ jobs:
74
74
  # Upload the results to GitHub's code scanning dashboard (optional).
75
75
  # Commenting out will disable upload of results to your repo's Code Scanning dashboard
76
76
  - name: "Upload to code-scanning"
77
- uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v3.29.5
77
+ uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
78
78
  with:
79
79
  sarif_file: results.sarif
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataframely
3
- Version: 2.8.0
3
+ Version: 2.8.2
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.10
6
6
  Classifier: Programming Language :: Python :: 3.11
@@ -0,0 +1,238 @@
1
+ ---
2
+ name: dataframely
3
+ description: Best practices for polars data processing with dataframely. Covers definitions of Schema and Collection, usage of
4
+ .validate() and .filter(), type hints, and testing. Use when writing or modifying code involving dataframely or
5
+ polars data frames.
6
+ license: BSD-3-Clause
7
+ user-invocable: false
8
+ ---
9
+
10
+ # Overview
11
+
12
+ `dataframely` provides two types:
13
+
14
+ - `dy.Schema` documents and enforces the structure of a single data frame
15
+ - `dy.Collection` documents and enforces the relationships between multiple related data frames that each have their
16
+ own `dy.Schema`
17
+
18
+ ## `dy.Schema`
19
+
20
+ A subclass of `dy.Schema` describes the structure of a single dataframe.
21
+
22
+ ```python
23
+ class MyHouseSchema(dy.Schema):
24
+ """A schema for a dataframe describing houses."""
25
+
26
+ street = dy.String(primary_key=True)
27
+ number = dy.UInt16(primary_key=True)
28
+ #: Description on the number of rooms.
29
+ rooms = dy.UInt8()
30
+ #: Description on the area of the house.
31
+ area = dy.UInt16()
32
+ ```
33
+
34
+ The schema can be used in type hints via `dy.DataFrame[MyHouseSchema]` and `dy.LazyFrame[MyHouseSchema]` to express
35
+ schema adherence statically. It can also be used to validate the structure and contents of a data frame at runtime
36
+ using validation and filtering.
37
+
38
+ `dy.DataFrame[...]` and `dy.LazyFrame[...]` are typically referred to as "typed data frames". They are typing-only
39
+ wrappers around `pl.DataFrame` and `pl.LazyFrame`, respectively, and only express intent. They are never initialized at
40
+ runtime.
41
+
42
+ ### Defining Constraints
43
+
44
+ Persist all implicit assumptions on the data as constraints in the schema. Use docstrings purely to answer the "what"
45
+ about the column contents.
46
+
47
+ - Use the most specific type possible for each column (e.g. `dy.Enum` instead of `dy.String` when applicable).
48
+ - Use pre-defined arguments (e.g. `nullable`, `min`, `regex`) for column-level constraints if possible.
49
+ - Use the `check` argument for non-standard column-level constraints that cannot be expressed using pre-defined
50
+ arguments. Prefer the defining the check as a dictionary with keys describing the type of check:
51
+
52
+ ```python
53
+ class MySchema(dy.Schema):
54
+ col = dy.UInt8(check={"divisible_by_two": lambda col: (col % 2) == 0})
55
+ ```
56
+
57
+ - Use rules (i.e. methods decorated with `@dy.rule`) for cross-column constraints. Use expressive names for the rules
58
+ and use `cls` to refer to the schema:
59
+
60
+ ```python
61
+ class MySchema(dy.Schema):
62
+ col1 = dy.UInt8()
63
+ col2 = dy.UInt8()
64
+
65
+ @dy.rule()
66
+ def col1_greater_col2(cls) -> pl.Expr:
67
+ return cls.col1.col > cls.col2.col
68
+ ```
69
+
70
+ - Use group rules (i.e. methods decorated with `@dy.rule(group_by=...)`) for cross-row constraints beyond primary key
71
+ checks.
72
+
73
+ ### Referencing Columns
74
+
75
+ When referencing columns of the schema anywhere in the code, always reference column as attribute of the schema class:
76
+
77
+ - Use `Schema.column.col` instead of `pl.col("column")` to obtain a `pl.Expr` referencing the column.
78
+ - Use `Schema.column.name` to reference the column name as a string.
79
+
80
+ This allows for easier refactorings and enables lookups on column definitions and constraints via LSP.
81
+
82
+ ## `dy.Collection`
83
+
84
+ A subclass of `dy.Collection` describes a set of related data frames, each described by a `dy.Schema`. Data frames in a
85
+ collection should share at least a subset of their primary key.
86
+
87
+ ```python
88
+ class MyStreetSchema(dy.Schema):
89
+ """A schema for a dataframe describing streets."""
90
+
91
+ # Shared primary key component with MyHouseSchema
92
+ street = dy.String(primary_key=True)
93
+ city = dy.String()
94
+
95
+
96
+ class MyCollection(dy.Collection):
97
+ """A collection of related dataframes."""
98
+
99
+ houses: dy.LazyFrame[MyHouseSchema]
100
+ streets: dy.LazyFrame[MyStreetSchema]
101
+ ```
102
+
103
+ The collection can be used in a standalone manner (much like a dataclass). It can also be used to validate the
104
+ structure and contents of its members and their relationships at runtime using validation and filtering.
105
+
106
+ ### Defining Constraints
107
+
108
+ Persist all implicit assumptions about the relationships between the collections' data frames as constraints in the
109
+ collection.
110
+
111
+ - Use filters (i.e. methods decorated with `@dy.filter`) to enforce assumptions about the relationships (e.g. 1:1, 1:N)
112
+ between the collections' data frames. Leverage `dy.functional` for writing filter logic.
113
+
114
+ ```python
115
+ class MyCollection(dy.Collection):
116
+ houses: dy.LazyFrame[MyHouseSchema]
117
+ streets: dy.LazyFrame[MyStreetSchema]
118
+
119
+ @dy.filter()
120
+ def all_houses_on_known_streets(cls) -> pl.LazyFrame:
121
+ return dy.functional.require_relationship_one_to_at_least_one(
122
+ cls.streets, cls.houses, on="street"
123
+ )
124
+ ```
125
+
126
+ # Usage Conventions
127
+
128
+ ## Clear Interfaces
129
+
130
+ Structure data processing code with clear interfaces documented using `dataframely` type hints:
131
+
132
+ ```python
133
+ def preprocess(raw: dy.LazyFrame[MyRawSchema]) -> dy.DataFrame[MyPreprocessedSchema]:
134
+ # Internal data frames do not require schemas
135
+ df: pl.LazyFrame = ...
136
+ return MyPreprocessedSchema.validate(df, cast=True)
137
+ ```
138
+
139
+ - Use schemas for all input and output data frames in a function. Omit type hints if the function is a private helper
140
+ (prefixed with `_`) unless the schema critically improves readability or testability.
141
+ - Omit schemas for short-lived temporary data frames. Never define schemas for function-local data frames.
142
+
143
+ ## Validation and Filtering
144
+
145
+ Both `.validate` and `.filter` enforce the schema at runtime. Pass `cast=True` for safe type-casting.
146
+
147
+ - **`Schema.validate`** — raises on failure. Use when failures are unexpected (e.g. transforming already-validated
148
+ data).
149
+ - **`Schema.filter`** — returns valid rows plus a `FailureInfo` describing filtered-out rows. Use when failures are
150
+ possible and should be handled gracefully. Failures should either be kept around or logged for introspection. The
151
+ `FailureInfo` object provides several utility methods to obtain information about the failures:
152
+ - `len(failure)` provides the total number of failures
153
+ - `failure.counts()` provides the number of violations by rule
154
+ - `failure.invalid()` provides the data frame of invalid rows
155
+ - `failure.details()` provides the data frame of invalid rows with additional columns providing information on which
156
+ rules were violated
157
+
158
+ When performing validation or filtering, prefer using `pipe` to clarify the flow of data:
159
+
160
+ ```python
161
+ result = df.pipe(MySchema.validate)
162
+ out, failures = df.pipe(MySchema.filter)
163
+ ```
164
+
165
+ ### Pure Casting
166
+
167
+ Use `Schema.cast` as an escape-hatch when it is already known that the data frame conforms to the schema and the
168
+ runtime cost of the validation should not be incurred. Generally, prefer using `Schema.validate` or `Schema.filter`.
169
+
170
+ ## Testing
171
+
172
+ Unless otherwise specified by the user or the project context, add unit tests for all (non-private) methods performing
173
+ data transformations.
174
+
175
+ - Do not test properties already guaranteed by the schema (e.g. data types, nullability, value constraints).
176
+
177
+ ### Test structure
178
+
179
+ Write tests with the following structure:
180
+
181
+ 1. "Arrange": Define synthetic input data and expected output
182
+ 2. "Act": Execute the transformation
183
+ 3. "Assert": Compare expected and actual output using `assert_frame_equal` from `polars.testing`
184
+
185
+ ```python
186
+ from polars.testing import assert_frame_equal
187
+
188
+
189
+ def test_grouped_sum():
190
+ df = pl.DataFrame({
191
+ "col1": [1, 2, 3],
192
+ "col2": ["a", "a", "b"],
193
+ }).pipe(MyInputSchema.validate, cast=True)
194
+
195
+ expected = pl.DataFrame({
196
+ "col1": ["a", "b"],
197
+ "col2": [3, 3],
198
+ })
199
+
200
+ result = my_code(df)
201
+
202
+ assert_frame_equal(expected, result)
203
+ ```
204
+
205
+ ### Generating Synthetic Test Data
206
+
207
+ Use `dataframely`'s synthetic data generation for creating inputs to functions requiring typed data frames in their
208
+ input. Generate synthetic data for schemas as follows:
209
+
210
+ - Use `MySchema.sample(num_rows=...)` to generate fully random data when exact contents don't matter.
211
+ - Use `MySchema.sample(overrides=...)` to generate random data with specific columns pinned to certain values for
212
+ testing specific functionality. Prefer using dicts of lists for overrides unless specifically prompted otherwise.
213
+ - When using dicts of lists: for providing overrides that are constant across all rows, provide scalar values instead
214
+ of lists of equal values.
215
+ - Always use `MySchema.create_empty()` instead of sampling with empty overrides when an empty data frame is needed.
216
+
217
+ Synthetic data for collections should be generated as follows:
218
+
219
+ - Use `MyCollection.sample(num_rows=...)` to generate fully random data when exact contents don't matter.
220
+ - Use `MyCollection.sample(overrides=...)` to generate random data where certain values of the collection members
221
+ matter. Use lists of dicts for providing overrides as "objects" spanning the collection members.
222
+ - Values for shared primary keys must be provided at the root of the dictionaries
223
+ - Values for individual collection members must be provided in nested dictionaries under the keys corresponding to
224
+ the collection member names.
225
+ - Always use `MyCollection.create_empty()` instead of sampling with empty overrides when an empty collection is needed.
226
+
227
+ ## I/O Conventions
228
+
229
+ When writing typed data frames to disk, prefer using `MySchema.write_...` instead of using `write_...` directly on the
230
+ data frame. This ensures that schema metadata is persisted alongside the data and can be leveraged when reading the
231
+ data back in.
232
+
233
+ When reading typed data frames from disk, prefer using `MySchema.read_...` instead of using `pl.read_...` directly from
234
+
235
+ # Getting more information
236
+
237
+ `dataframely` provides clear function signatures, type hints and docstrings for the full public API. For more
238
+ information, inspect the source code in the site packages. If available, always use the LSP tool to find documentation.
@@ -119,7 +119,9 @@ class SchemaMeta(ABCMeta):
119
119
  result = Metadata()
120
120
  for base in bases:
121
121
  result.update(mcs._get_metadata_recursively(base))
122
- result.update(mcs._get_metadata(namespace))
122
+ namespace_metadata = mcs._get_metadata(namespace)
123
+ mcs._remove_overridden_columns(result, namespace, bases)
124
+ result.update(namespace_metadata)
123
125
  namespace[_COLUMN_ATTR] = result.columns
124
126
  cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs)
125
127
 
@@ -207,6 +209,34 @@ class SchemaMeta(ABCMeta):
207
209
  val._name = val.alias or name
208
210
  return val
209
211
 
212
+ @staticmethod
213
+ def _remove_overridden_columns(
214
+ result: Metadata,
215
+ namespace: dict[str, Any],
216
+ bases: tuple[type[object], ...],
217
+ ) -> None:
218
+ """Remove inherited columns that the child namespace explicitly overrides.
219
+
220
+ Before merging the child namespace, we must drop any parent columns whose
221
+ attribute name is redefined in the child. This allows subclasses to redefine
222
+ inherited columns while still detecting genuine alias conflicts.
223
+
224
+ In multiple-inheritance scenarios, the same attribute name may appear in more
225
+ than one base with different aliases, so we walk all parent MROs and collect
226
+ every matching column key to remove.
227
+ """
228
+ for attr, value in namespace.items():
229
+ if not isinstance(value, Column):
230
+ continue
231
+ keys_to_remove: set[str] = set()
232
+ for base in bases:
233
+ for parent_cls in base.__mro__:
234
+ parent_col = parent_cls.__dict__.get(attr)
235
+ if parent_col is not None and isinstance(parent_col, Column):
236
+ keys_to_remove.add(parent_col.alias or attr)
237
+ for parent_key in keys_to_remove:
238
+ result.columns.pop(parent_key, None)
239
+
210
240
  @staticmethod
211
241
  def _get_metadata_recursively(kls: type[object]) -> Metadata:
212
242
  result = Metadata()
@@ -22,7 +22,6 @@ from typing import Any, cast
22
22
 
23
23
  _mod = importlib.import_module("dataframely")
24
24
 
25
-
26
25
  project = "dataframely"
27
26
  copyright = f"{datetime.date.today().year}, QuantCo, Inc"
28
27
  author = "QuantCo, Inc."
@@ -0,0 +1,75 @@
1
+ # Using `dataframely` with coding agents
2
+
3
+ Coding agents like [Claude Code](https://code.claude.com/), [Codex](https://openai.com/codex/) and
4
+ [GitHub Copilot](https://github.com/features/copilot) are particularly powerful when two criteria are met:
5
+
6
+ 1. The agent has access to the full context required to solve the problem, i.e. does not have to guess.
7
+ 2. The results of the agent's work can be easily verified.
8
+
9
+ When writing data processing logic, `dataframely` helps to fulfill these criteria.
10
+
11
+ To help your coding agent write idiomatic `dataframely` code, we provide a `dataframely`
12
+ [skill](https://raw.githubusercontent.com/Quantco/dataframely/refs/heads/main/SKILL.md) following the
13
+ [`agentskills.io` spec](https://agentskills.io/specification). You can install it by placing it where your agent can
14
+ find it. For example, if you are using Claude Code:
15
+
16
+ ```bash
17
+ mkdir -p .claude/skills/dataframely/
18
+ curl -o .claude/skills/dataframely/SKILL.md https://raw.githubusercontent.com/Quantco/dataframely/refs/heads/main/SKILL.md
19
+ ```
20
+
21
+ or if you are using [skills.sh](https://skills.sh/) to manage your skills:
22
+
23
+ ```bash
24
+ npx skills add Quantco/dataframely
25
+ ```
26
+
27
+ Refer to the documentation of your coding agent for instructions on how to add custom skills.
28
+
29
+ ## Tell the agent about your data with `dataframely` schemas
30
+
31
+ `dataframely` schemas provide a clear format for documenting dataframe structure and contents, which helps coding
32
+ agents understand your code base. We recommend structuring your data processing code using clear interfaces that are
33
+ documented using `dataframely` type hints. This streamlines your coding agent's ability to find the right schema at the
34
+ right time.
35
+
36
+ For example:
37
+
38
+ ```python
39
+ def preprocess(raw: dy.LazyFrame[MyRawSchema]) -> dy.DataFrame[MyPreprocessedSchema]:
40
+ ...
41
+ ```
42
+
43
+ gives a coding agent much more information than the schema-less alternative:
44
+
45
+ ```python
46
+ def load_data(raw: pl.LazyFrame) -> pl.DataFrame:
47
+ ...
48
+ ```
49
+
50
+ This convention also makes your code more readable and maintainable for human developers.
51
+
52
+ If there is additional domain information that is not natively expressed through the structure of the schema, we
53
+ recommend documenting this as docstrings on the definition of the schema columns. One common example would be the
54
+ semantic meanings of enum values referring to conventions in the data:
55
+
56
+ ```python
57
+ class HospitalStaySchema(dy.Schema):
58
+ # Reason for admission to the hospital
59
+ # N = Emergency
60
+ # V = Transfer from another hospital
61
+ # ...
62
+ admission_reason = dy.Enum(["N", "V", ...])
63
+ ```
64
+
65
+ ## Verifying results
66
+
67
+ `dataframely` supports you and your coding agent in writing unit tests for individual pieces of logic. One significant
68
+ bottleneck is the generation of appropriate test data. Check out
69
+ [our documentation on synthetic data generation](./features/data-generation.md) to see how `dataframely` can help you
70
+ generate realistic test data that meets the constraints of your schema. We recommend requiring your coding agent to
71
+ write tests using this functionality to verify its work.
72
+
73
+ <!-- prettier-ignore -->
74
+ > [!NOTE]
75
+ > The official skill already tells your coding agent how to best write unit tests with dataframely.
@@ -7,6 +7,7 @@
7
7
  quickstart
8
8
  examples/index
9
9
  features/index
10
+ coding-agents
10
11
  development
11
12
  migration/index
12
13
  faq
@@ -27,7 +27,7 @@ description = "A declarative, polars-native data frame validation library"
27
27
  name = "dataframely"
28
28
  readme = "README.md"
29
29
  requires-python = ">=3.10"
30
- version = "2.8.0"
30
+ version = "2.8.2"
31
31
 
32
32
  [project.optional-dependencies]
33
33
  deltalake = ["deltalake"]
@@ -141,3 +141,22 @@ def test_user_error_polars_datatype_type() -> None:
141
141
  class MySchemaWithPolarsDataTypeType(dy.Schema):
142
142
  a = dy.Int32(nullable=False)
143
143
  b = pl.String # User error: Used pl.String instead of dy.String()
144
+
145
+
146
+ def test_override() -> None:
147
+ class FirstSchema(dy.Schema):
148
+ x = dy.Int64()
149
+
150
+ class SecondSchema(FirstSchema):
151
+ x = dy.Int64(nullable=True)
152
+
153
+ first_columns = FirstSchema.columns()
154
+ second_columns = SecondSchema.columns()
155
+
156
+ assert set(first_columns) == {"x"}
157
+ assert set(second_columns) == {"x"}
158
+
159
+ assert first_columns["x"].nullable is False
160
+ assert second_columns["x"].nullable is True
161
+
162
+ assert type(second_columns["x"]) is type(first_columns["x"])