dataframely 2.8.2__tar.gz → 2.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataframely-2.8.2 → dataframely-2.9.0}/PKG-INFO +1 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/_base.py +118 -2
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/_mixins.py +12 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/any.py +6 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/array.py +18 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/binary.py +6 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/bool.py +6 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/categorical.py +4 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/datetime.py +49 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/decimal.py +10 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/enum.py +5 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/float.py +21 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/integer.py +15 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/list.py +13 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/object.py +4 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/string.py +14 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/struct.py +6 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/schema.py +28 -6
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/coding-agents.md +1 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/pixi.lock +6229 -6639
- {dataframely-2.8.2 → dataframely-2.9.0}/pixi.toml +1 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/pyproject.toml +1 -1
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_cast.py +3 -3
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_any.py +6 -0
- dataframely-2.9.0/tests/columns/test_base.py +188 -0
- dataframely-2.9.0/tests/columns/test_pydantic.py +363 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_cast.py +18 -4
- dataframely-2.9.0/tests/schema/test_pydantic_model.py +76 -0
- dataframely-2.8.2/tests/columns/test_base.py +0 -12
- {dataframely-2.8.2 → dataframely-2.9.0}/.copier-answers.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.envrc +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.gitattributes +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/CODEOWNERS +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/copilot-instructions.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/dependabot.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/instructions/tests.instructions.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/release-drafter.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/build.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/chore-main.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/chore-pr.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/ci.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/copilot-setup-steps.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/nightly.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.github/workflows/scorecard.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.gitignore +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.pre-commit-config.yaml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.prettierignore +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.prettierrc +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/.readthedocs.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/Cargo.lock +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/Cargo.toml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/LICENSE +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/README.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/SECURITY.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_base_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_compat.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_deprecation.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_filter.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_match_to_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_native.pyi +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_plugin.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_polars.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_pydantic.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_rule.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_serialization.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/_base.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/_exc.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/_fsspec.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/constants.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/delta.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_storage/parquet.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/_typing.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/collection/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/collection/_base.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/collection/collection.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/collection/filter_result.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/_registry.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/columns/_utils.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/config.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/exc.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/experimental/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/experimental/infer_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/filter_result.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/functional.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/py.typed +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/random.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/const.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/factory.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/mask.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/rules.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/dataframely/testing/storage.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docker-compose.yml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_static/custom.css +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_static/favicon.ico +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_templates/autosummary/class.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_templates/autosummary/method.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_templates/classes/column.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_templates/classes/error.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/_templates/classes/filter_result.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/generation.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/io.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/metadata.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/operations.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/collection/validation.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/columns/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/errors/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/experimental/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/filter_result/failure_info.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/filter_result/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/misc/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/conversion.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/generation.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/index.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/io.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/metadata.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/api/schema/validation.rst +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/conf.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/css/custom.css +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/development.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/examples/index.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/examples/real-world.ipynb +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/faq.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/column-metadata.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/data-generation.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/index.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/lazy-validation.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/primary-keys.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/serialization.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/features/sql-generation.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/index.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/migration/index.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/migration/v1-v2.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/guides/quickstart.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/docs/index.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/rust-toolchain.toml +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0/skills}/SKILL.md +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/lib.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/polars_plugin/mod.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/polars_plugin/rule_failure.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/polars_plugin/utils.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/polars_plugin/validation_error.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/regex/errdefs.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/regex/mod.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/src/regex/repr.rs +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/benches/conftest.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/benches/test_collection.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/benches/test_failure.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/benches/test_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_base.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_collection_future_annotations.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_create_empty.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_filter_one_to_n.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_filter_validate.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_ignore_in_filter.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_implementation.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_join.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_matches.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_optional_members.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_propagate_row_failures.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_repr.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_sample.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_serialization.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_storage.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/collection/test_validate_input.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_array.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_binary.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_datetime.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_decimal.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_enum.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_float.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_integer.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_list.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_object.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_string.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/column_types/test_struct.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_alias.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_check.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_default_dtypes.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_matches.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_metadata.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_polars_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_pyarrow.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_rules.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_sample.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_sqlalchemy_columns.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_str.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/columns/test_utils.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/conftest.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/core_validation/__init__.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/core_validation/test_match_to_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/core_validation/test_rule_evaluation.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/experimental/test_infer_schema.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/failure_info/test_storage.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/functional/test_concat.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/functional/test_relationships.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_base.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_create_empty.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_create_empty_if_none.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_filter.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_inheritance.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_matches.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_read_write_parquet.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_repr.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_rule_implementation.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_sample.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_serialization.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_storage.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/schema/test_validate.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/storage/test_delta.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/storage/test_fsspec.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_compat.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_config.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_deprecation.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_factory.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_native_regex.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_pydantic.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_random.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_serialization.py +0 -0
- {dataframely-2.8.2 → dataframely-2.9.0}/tests/test_typing.py +0 -0
|
@@ -5,14 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import inspect
|
|
7
7
|
import sys
|
|
8
|
+
import warnings
|
|
8
9
|
from abc import ABC, abstractmethod
|
|
9
10
|
from collections import Counter
|
|
10
11
|
from collections.abc import Callable, Mapping, Sequence
|
|
11
|
-
from typing import Any, TypeAlias, cast
|
|
12
|
+
from typing import Annotated, Any, TypeAlias, cast
|
|
12
13
|
|
|
13
14
|
import polars as pl
|
|
14
15
|
|
|
15
|
-
from dataframely._compat import pa, sa, sa_TypeEngine
|
|
16
|
+
from dataframely._compat import pa, pydantic, sa, sa_TypeEngine
|
|
16
17
|
from dataframely._polars import PolarsDataType
|
|
17
18
|
from dataframely.random import Generator
|
|
18
19
|
|
|
@@ -222,6 +223,50 @@ class Column(ABC):
|
|
|
222
223
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
223
224
|
"""The :mod:`pyarrow` dtype equivalent of this column data type."""
|
|
224
225
|
|
|
226
|
+
# ----------------------------------- PYDANTIC ----------------------------------- #
|
|
227
|
+
|
|
228
|
+
def pydantic_field(self) -> Any:
|
|
229
|
+
"""Obtain a pydantic field type for this column definition.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
A pydantic-compatible type annotation that includes structured constraints
|
|
233
|
+
(such as `min`, `max`, ...).
|
|
234
|
+
|
|
235
|
+
Warning:
|
|
236
|
+
Custom checks are not translated to pydantic validators.
|
|
237
|
+
"""
|
|
238
|
+
if self.check is not None:
|
|
239
|
+
warnings.warn(
|
|
240
|
+
f"Custom checks for column '{self.name or self.__class__.__name__}' "
|
|
241
|
+
"are not translated to pydantic constraints."
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
python_type = self._python_type
|
|
245
|
+
if self.nullable:
|
|
246
|
+
python_type = python_type | None
|
|
247
|
+
|
|
248
|
+
field_kwargs = self._pydantic_field_kwargs()
|
|
249
|
+
if field_kwargs:
|
|
250
|
+
return Annotated[python_type, pydantic.Field(**field_kwargs)]
|
|
251
|
+
return python_type
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
@abstractmethod
|
|
255
|
+
def _python_type(self) -> Any:
|
|
256
|
+
"""The native Python type corresponding to this column definition."""
|
|
257
|
+
|
|
258
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
259
|
+
"""Return kwargs for pydantic.Field initialization.
|
|
260
|
+
|
|
261
|
+
This method should be extended by subclasses and mixins to add their
|
|
262
|
+
specific constraints. Subclasses should call super() and extend the
|
|
263
|
+
returned dictionary.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
A dictionary of kwargs to pass to pydantic.Field.
|
|
267
|
+
"""
|
|
268
|
+
return {}
|
|
269
|
+
|
|
225
270
|
# ------------------------------------ HELPER ------------------------------------ #
|
|
226
271
|
|
|
227
272
|
@property
|
|
@@ -234,6 +279,77 @@ class Column(ABC):
|
|
|
234
279
|
"""Obtain a Polars column expression for the column."""
|
|
235
280
|
return pl.col(self.name)
|
|
236
281
|
|
|
282
|
+
def with_properties(self, **kwargs: Any) -> Self:
|
|
283
|
+
"""Copy the current column definition while updating the provided properties.
|
|
284
|
+
|
|
285
|
+
All other properties from the original column are preserved.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
**kwargs: Properties to update on the new column instance. The set of allowed properties depends on the type of the column.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
A new column instance with updated properties.
|
|
292
|
+
"""
|
|
293
|
+
new_kwargs = {
|
|
294
|
+
k: getattr(self, k) for k in inspect.signature(self.__class__).parameters
|
|
295
|
+
} | kwargs
|
|
296
|
+
return self.__class__(**new_kwargs)
|
|
297
|
+
|
|
298
|
+
def with_nullable(self, nullable: bool) -> Self:
|
|
299
|
+
"""Return a new column definition with specified nullability.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
nullable: Whether the new column may contain null values.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
A new column instance with updated nullability.
|
|
306
|
+
"""
|
|
307
|
+
return self.with_properties(nullable=nullable)
|
|
308
|
+
|
|
309
|
+
def with_alias(self, alias: str) -> Self:
|
|
310
|
+
"""Return a new column definition with a specified alias.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
alias: The alias to use for the column name.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
A new column instance with the specified alias.
|
|
317
|
+
"""
|
|
318
|
+
return self.with_properties(alias=alias)
|
|
319
|
+
|
|
320
|
+
def with_check(self, check: Check) -> Self:
|
|
321
|
+
"""Return a new column definition with a specified check.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
check: A custom validation rule or rules for the column.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
A new column instance with the specified check.
|
|
328
|
+
"""
|
|
329
|
+
return self.with_properties(check=check)
|
|
330
|
+
|
|
331
|
+
def with_primary_key(self, primary_key: bool) -> Self:
|
|
332
|
+
"""Return a new column definition with a specified primary key status.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
primary_key: Whether the column should be part of the primary key.
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
A new column instance with updated primary key status.
|
|
339
|
+
"""
|
|
340
|
+
return self.with_properties(primary_key=primary_key)
|
|
341
|
+
|
|
342
|
+
def with_metadata(self, metadata: dict[str, Any]) -> Self:
|
|
343
|
+
"""Return a new column definition with specified metadata.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
metadata: A dictionary of metadata to attach to the column.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
A new column instance with the specified metadata.
|
|
350
|
+
"""
|
|
351
|
+
return self.with_properties(metadata=metadata)
|
|
352
|
+
|
|
237
353
|
# ----------------------------------- SAMPLING ----------------------------------- #
|
|
238
354
|
|
|
239
355
|
def sample(self, generator: Generator, n: int = 1) -> pl.Series:
|
|
@@ -80,6 +80,18 @@ class OrdinalMixin(Generic[T], Base):
|
|
|
80
80
|
result["max_exclusive"] = expr < self.max_exclusive # type: ignore
|
|
81
81
|
return result
|
|
82
82
|
|
|
83
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
84
|
+
kwargs = super()._pydantic_field_kwargs()
|
|
85
|
+
if self.min is not None:
|
|
86
|
+
kwargs["ge"] = self.min
|
|
87
|
+
if self.min_exclusive is not None:
|
|
88
|
+
kwargs["gt"] = self.min_exclusive
|
|
89
|
+
if self.max is not None:
|
|
90
|
+
kwargs["le"] = self.max
|
|
91
|
+
if self.max_exclusive is not None:
|
|
92
|
+
kwargs["lt"] = self.max_exclusive
|
|
93
|
+
return kwargs
|
|
94
|
+
|
|
83
95
|
|
|
84
96
|
# ------------------------------------ IS IN MIXIN ----------------------------------- #
|
|
85
97
|
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
from typing import Any as AnyType
|
|
7
|
+
|
|
6
8
|
import polars as pl
|
|
7
9
|
|
|
8
10
|
from dataframely._compat import pa, sa, sa_mssql, sa_TypeEngine
|
|
@@ -77,5 +79,9 @@ class Any(Column):
|
|
|
77
79
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
78
80
|
return pa.null()
|
|
79
81
|
|
|
82
|
+
@property
|
|
83
|
+
def _python_type(self) -> AnyType:
|
|
84
|
+
return AnyType
|
|
85
|
+
|
|
80
86
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
81
87
|
return pl.repeat(None, n, dtype=pl.Null, eager=True)
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
7
|
import sys
|
|
8
|
+
import warnings
|
|
8
9
|
from collections.abc import Sequence
|
|
9
10
|
from typing import Any, Literal, cast
|
|
10
11
|
|
|
@@ -121,6 +122,23 @@ class Array(Column):
|
|
|
121
122
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
122
123
|
return self._pyarrow_field_of_shape(self.shape).type
|
|
123
124
|
|
|
125
|
+
@property
|
|
126
|
+
def _python_type(self) -> Any:
|
|
127
|
+
inner_type = self.inner.pydantic_field()
|
|
128
|
+
return list[inner_type] # type: ignore
|
|
129
|
+
|
|
130
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
131
|
+
if len(self.shape) != 1:
|
|
132
|
+
warnings.warn(
|
|
133
|
+
"Multi-dimensional arrays are flattened for pydantic validation."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
**super()._pydantic_field_kwargs(),
|
|
138
|
+
"min_length": math.prod(self.shape),
|
|
139
|
+
"max_length": math.prod(self.shape),
|
|
140
|
+
}
|
|
141
|
+
|
|
124
142
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
125
143
|
# Sample the inner elements in a flat series
|
|
126
144
|
n_elements = n * math.prod(self.shape)
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
6
8
|
import polars as pl
|
|
7
9
|
|
|
8
10
|
from dataframely._compat import pa, sa, sa_TypeEngine
|
|
@@ -31,6 +33,10 @@ class Binary(Column):
|
|
|
31
33
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
32
34
|
return pa.large_binary()
|
|
33
35
|
|
|
36
|
+
@property
|
|
37
|
+
def _python_type(self) -> Any:
|
|
38
|
+
return bytes
|
|
39
|
+
|
|
34
40
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
35
41
|
return generator.sample_binary(
|
|
36
42
|
n,
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
6
8
|
import polars as pl
|
|
7
9
|
|
|
8
10
|
from dataframely._compat import pa, sa, sa_TypeEngine
|
|
@@ -27,5 +29,9 @@ class Bool(Column):
|
|
|
27
29
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
28
30
|
return pa.bool_()
|
|
29
31
|
|
|
32
|
+
@property
|
|
33
|
+
def _python_type(self) -> Any:
|
|
34
|
+
return bool
|
|
35
|
+
|
|
30
36
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
31
37
|
return generator.sample_bool(n, null_probability=self._null_probability)
|
|
@@ -71,6 +71,10 @@ class Categorical(Column):
|
|
|
71
71
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
72
72
|
return pa.dictionary(pa.uint32(), pa.large_string())
|
|
73
73
|
|
|
74
|
+
@property
|
|
75
|
+
def _python_type(self) -> Any:
|
|
76
|
+
return str
|
|
77
|
+
|
|
74
78
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
75
79
|
# We simply sample low-cardinality strings here
|
|
76
80
|
return generator.sample_string(
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import datetime as dt
|
|
7
|
+
import warnings
|
|
7
8
|
from typing import Any, cast
|
|
8
9
|
|
|
9
10
|
import polars as pl
|
|
@@ -132,6 +133,16 @@ class Date(OrdinalMixin[dt.date], Column):
|
|
|
132
133
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
133
134
|
return pa.date32()
|
|
134
135
|
|
|
136
|
+
@property
|
|
137
|
+
def _python_type(self) -> Any:
|
|
138
|
+
return dt.date
|
|
139
|
+
|
|
140
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
141
|
+
if self.resolution is not None:
|
|
142
|
+
warnings.warn("Date resolution is not translated to a pydantic constraint.")
|
|
143
|
+
|
|
144
|
+
return super()._pydantic_field_kwargs()
|
|
145
|
+
|
|
135
146
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
136
147
|
return generator.sample_date(
|
|
137
148
|
n,
|
|
@@ -261,6 +272,16 @@ class Time(OrdinalMixin[dt.time], Column):
|
|
|
261
272
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
262
273
|
return pa.time64("ns")
|
|
263
274
|
|
|
275
|
+
@property
|
|
276
|
+
def _python_type(self) -> Any:
|
|
277
|
+
return dt.time
|
|
278
|
+
|
|
279
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
280
|
+
if self.resolution is not None:
|
|
281
|
+
warnings.warn("Time resolution is not translated to a pydantic constraint.")
|
|
282
|
+
|
|
283
|
+
return super()._pydantic_field_kwargs()
|
|
284
|
+
|
|
264
285
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
265
286
|
return generator.sample_time(
|
|
266
287
|
n,
|
|
@@ -394,6 +415,22 @@ class Datetime(OrdinalMixin[dt.datetime], Column):
|
|
|
394
415
|
)
|
|
395
416
|
return pa.timestamp(self.time_unit, time_zone)
|
|
396
417
|
|
|
418
|
+
@property
|
|
419
|
+
def _python_type(self) -> Any:
|
|
420
|
+
return dt.datetime
|
|
421
|
+
|
|
422
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
423
|
+
if self.resolution is not None:
|
|
424
|
+
warnings.warn(
|
|
425
|
+
"Datetime resolution is not translated to a pydantic constraint."
|
|
426
|
+
)
|
|
427
|
+
if self.time_zone is not None:
|
|
428
|
+
warnings.warn(
|
|
429
|
+
"Datetime time zone is not translated to a pydantic constraint."
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return super()._pydantic_field_kwargs()
|
|
433
|
+
|
|
397
434
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
398
435
|
return generator.sample_datetime(
|
|
399
436
|
n,
|
|
@@ -531,6 +568,18 @@ class Duration(OrdinalMixin[dt.timedelta], Column):
|
|
|
531
568
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
532
569
|
return pa.duration(self.time_unit)
|
|
533
570
|
|
|
571
|
+
@property
|
|
572
|
+
def _python_type(self) -> Any:
|
|
573
|
+
return dt.timedelta
|
|
574
|
+
|
|
575
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
576
|
+
if self.resolution is not None:
|
|
577
|
+
warnings.warn(
|
|
578
|
+
"Duration resolution is not translated to a pydantic constraint."
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
return super()._pydantic_field_kwargs()
|
|
582
|
+
|
|
534
583
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
535
584
|
# NOTE: If no duration is specified, we default to 100 years
|
|
536
585
|
return generator.sample_duration(
|
|
@@ -128,6 +128,16 @@ class Decimal(OrdinalMixin[decimal.Decimal], Column):
|
|
|
128
128
|
# We do not use decimal256 since its values cannot be represented in SQL Server.
|
|
129
129
|
return pa.decimal128(self.precision or 38, self.scale)
|
|
130
130
|
|
|
131
|
+
@property
|
|
132
|
+
def _python_type(self) -> Any:
|
|
133
|
+
return decimal.Decimal
|
|
134
|
+
|
|
135
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
136
|
+
return {
|
|
137
|
+
**super()._pydantic_field_kwargs(),
|
|
138
|
+
"decimal_places": self.scale,
|
|
139
|
+
}
|
|
140
|
+
|
|
131
141
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
132
142
|
# NOTE: Default precision to 38 for sampling, just like for SQL and Pyarrow
|
|
133
143
|
precision = self.precision or 38
|
|
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
import enum
|
|
7
7
|
from collections.abc import Iterable
|
|
8
8
|
from inspect import isclass
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any, Literal
|
|
10
10
|
|
|
11
11
|
import polars as pl
|
|
12
12
|
|
|
@@ -95,6 +95,10 @@ class Enum(Column):
|
|
|
95
95
|
dtype = pa.uint32()
|
|
96
96
|
return pa.dictionary(dtype, pa.large_string())
|
|
97
97
|
|
|
98
|
+
@property
|
|
99
|
+
def _python_type(self) -> Any:
|
|
100
|
+
return Literal[tuple(self.categories)]
|
|
101
|
+
|
|
98
102
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
99
103
|
return generator.sample_choice(
|
|
100
104
|
n,
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
7
|
import sys
|
|
8
|
+
import warnings
|
|
8
9
|
from abc import abstractmethod
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
@@ -101,6 +102,26 @@ class _BaseFloat(OrdinalMixin[float], Column):
|
|
|
101
102
|
def min_value(self) -> float:
|
|
102
103
|
"""Minimum value of the column's type."""
|
|
103
104
|
|
|
105
|
+
@property
|
|
106
|
+
def _python_type(self) -> Any:
|
|
107
|
+
return float
|
|
108
|
+
|
|
109
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
110
|
+
if self.allow_inf != self.allow_nan:
|
|
111
|
+
warnings.warn(
|
|
112
|
+
"Unequal settings of `allow_inf` and `allow_nan` cannot be translated to "
|
|
113
|
+
"pydantic constraints."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
kwargs = super()._pydantic_field_kwargs()
|
|
117
|
+
if self.allow_inf == self.allow_nan:
|
|
118
|
+
kwargs["allow_inf_nan"] = self.allow_inf
|
|
119
|
+
if "le" not in kwargs:
|
|
120
|
+
kwargs["le"] = self.max_value
|
|
121
|
+
if "ge" not in kwargs:
|
|
122
|
+
kwargs["ge"] = self.min_value
|
|
123
|
+
return kwargs
|
|
124
|
+
|
|
104
125
|
@property
|
|
105
126
|
def _nan_probability(self) -> float:
|
|
106
127
|
"""Private utility for the null probability used during sampling."""
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
from abc import abstractmethod
|
|
7
7
|
from collections.abc import Sequence
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any, Literal
|
|
9
9
|
|
|
10
10
|
import polars as pl
|
|
11
11
|
from polars.datatypes.group import INTEGER_DTYPES
|
|
@@ -114,6 +114,20 @@ class _BaseInteger(IsInMixin[int], OrdinalMixin[int], Column):
|
|
|
114
114
|
"""Minimum value of the column's type."""
|
|
115
115
|
return 0 if self.is_unsigned else -(2 ** (self.num_bytes * 8 - 1))
|
|
116
116
|
|
|
117
|
+
@property
|
|
118
|
+
def _python_type(self) -> Any:
|
|
119
|
+
if self.is_in is not None:
|
|
120
|
+
return Literal[tuple(self.is_in)]
|
|
121
|
+
return int
|
|
122
|
+
|
|
123
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
124
|
+
kwargs = super()._pydantic_field_kwargs()
|
|
125
|
+
if "le" not in kwargs:
|
|
126
|
+
kwargs["le"] = self.max_value
|
|
127
|
+
if "ge" not in kwargs:
|
|
128
|
+
kwargs["ge"] = self.min_value
|
|
129
|
+
return kwargs
|
|
130
|
+
|
|
117
131
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
118
132
|
if self.is_in is not None:
|
|
119
133
|
return generator.sample_choice(
|
|
@@ -133,6 +133,19 @@ class List(Column):
|
|
|
133
133
|
# NOTE: Polars uses `large_list`s by default.
|
|
134
134
|
return pa.large_list(self.inner.pyarrow_field("item"))
|
|
135
135
|
|
|
136
|
+
@property
|
|
137
|
+
def _python_type(self) -> Any:
|
|
138
|
+
inner_type = self.inner.pydantic_field()
|
|
139
|
+
return list[inner_type] # type: ignore
|
|
140
|
+
|
|
141
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
142
|
+
kwargs = super()._pydantic_field_kwargs()
|
|
143
|
+
if self.min_length is not None:
|
|
144
|
+
kwargs["min_length"] = self.min_length
|
|
145
|
+
if self.max_length is not None:
|
|
146
|
+
kwargs["max_length"] = self.max_length
|
|
147
|
+
return kwargs
|
|
148
|
+
|
|
136
149
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
137
150
|
# First, sample the number of items per list element
|
|
138
151
|
# NOTE: We default to 32 for the upper bound as we need some kind of reasonable
|
|
@@ -67,6 +67,10 @@ class Object(Column):
|
|
|
67
67
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
68
68
|
raise NotImplementedError("PyArrow column cannot have 'Object' type.")
|
|
69
69
|
|
|
70
|
+
@property
|
|
71
|
+
def _python_type(self) -> Any:
|
|
72
|
+
return Any
|
|
73
|
+
|
|
70
74
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
71
75
|
raise NotImplementedError(
|
|
72
76
|
"Random data sampling not implemented for 'Object' type."
|
|
@@ -112,6 +112,20 @@ class String(Column):
|
|
|
112
112
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
113
113
|
return pa.large_string()
|
|
114
114
|
|
|
115
|
+
@property
|
|
116
|
+
def _python_type(self) -> Any:
|
|
117
|
+
return str
|
|
118
|
+
|
|
119
|
+
def _pydantic_field_kwargs(self) -> dict[str, Any]:
|
|
120
|
+
kwargs = super()._pydantic_field_kwargs()
|
|
121
|
+
if self.min_length is not None:
|
|
122
|
+
kwargs["min_length"] = self.min_length
|
|
123
|
+
if self.max_length is not None:
|
|
124
|
+
kwargs["max_length"] = self.max_length
|
|
125
|
+
if self.regex is not None:
|
|
126
|
+
kwargs["pattern"] = self.regex
|
|
127
|
+
return kwargs
|
|
128
|
+
|
|
115
129
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
116
130
|
if (
|
|
117
131
|
self.min_length is not None or self.max_length is not None
|
|
@@ -8,7 +8,7 @@ from typing import Any, cast
|
|
|
8
8
|
|
|
9
9
|
import polars as pl
|
|
10
10
|
|
|
11
|
-
from dataframely._compat import pa, sa, sa_postgresql, sa_TypeEngine
|
|
11
|
+
from dataframely._compat import pa, pydantic, sa, sa_postgresql, sa_TypeEngine
|
|
12
12
|
from dataframely._polars import PolarsDataType
|
|
13
13
|
from dataframely.random import Generator
|
|
14
14
|
|
|
@@ -117,6 +117,11 @@ class Struct(Column):
|
|
|
117
117
|
def pyarrow_dtype(self) -> pa.DataType:
|
|
118
118
|
return pa.struct([col.pyarrow_field(name) for name, col in self.inner.items()])
|
|
119
119
|
|
|
120
|
+
@property
|
|
121
|
+
def _python_type(self) -> Any:
|
|
122
|
+
fields = {name: col.pydantic_field() for name, col in self.inner.items()}
|
|
123
|
+
return pydantic.create_model("StructModel", **fields)
|
|
124
|
+
|
|
120
125
|
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
|
|
121
126
|
series = (
|
|
122
127
|
pl.DataFrame(
|
|
@@ -16,10 +16,8 @@ import polars as pl
|
|
|
16
16
|
import polars.exceptions as plexc
|
|
17
17
|
from polars._typing import FileSource
|
|
18
18
|
|
|
19
|
-
from dataframely._compat import deltalake
|
|
20
|
-
|
|
21
19
|
from ._base_schema import ORIGINAL_COLUMN_PREFIX, BaseSchema
|
|
22
|
-
from ._compat import PartitionSchemeOrSinkDirectory, pa, sa
|
|
20
|
+
from ._compat import PartitionSchemeOrSinkDirectory, deltalake, pa, pydantic, sa
|
|
23
21
|
from ._match_to_schema import match_to_schema
|
|
24
22
|
from ._native import format_rule_failures
|
|
25
23
|
from ._plugin import all_rules, all_rules_horizontal, all_rules_required
|
|
@@ -813,9 +811,7 @@ class Schema(BaseSchema, ABC):
|
|
|
813
811
|
the lazy frame's schema but also means that a call to :meth:`polars.LazyFrame.collect`
|
|
814
812
|
further down the line might fail because of the cast and/or missing columns.
|
|
815
813
|
"""
|
|
816
|
-
lf = df.lazy()
|
|
817
|
-
pl.col(name).cast(col.dtype) for name, col in cls.columns().items()
|
|
818
|
-
)
|
|
814
|
+
lf = match_to_schema(df.lazy(), cls, casting="strict")
|
|
819
815
|
if isinstance(df, pl.DataFrame):
|
|
820
816
|
return lf.collect() # type: ignore
|
|
821
817
|
return lf # type: ignore
|
|
@@ -1341,6 +1337,32 @@ class Schema(BaseSchema, ABC):
|
|
|
1341
1337
|
[col.pyarrow_field(name) for name, col in cls.columns().items()]
|
|
1342
1338
|
)
|
|
1343
1339
|
|
|
1340
|
+
@classmethod
|
|
1341
|
+
def to_pydantic_model(cls, name: str | None = None) -> type[pydantic.BaseModel]:
|
|
1342
|
+
"""Convert this schema to a pydantic model.
|
|
1343
|
+
|
|
1344
|
+
The pydantic model includes all columns defined in the schema along with their
|
|
1345
|
+
(structured) constraints. Custom checks and schema-level rules are not included
|
|
1346
|
+
in the pydantic model.
|
|
1347
|
+
|
|
1348
|
+
Args:
|
|
1349
|
+
name: The name of the returned pydantic model. If `None`, a default name is
|
|
1350
|
+
generated based on the name of this schema.
|
|
1351
|
+
|
|
1352
|
+
Returns:
|
|
1353
|
+
A :mod:`pydantic` model class.
|
|
1354
|
+
"""
|
|
1355
|
+
if cls._schema_validation_rules():
|
|
1356
|
+
warnings.warn(
|
|
1357
|
+
"Schema-level rules are not translated to pydantic validators."
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
model_name = name or f"{cls.__name__.removesuffix('Schema')}Model"
|
|
1361
|
+
fields = {
|
|
1362
|
+
col_name: col.pydantic_field() for col_name, col in cls.columns().items()
|
|
1363
|
+
}
|
|
1364
|
+
return pydantic.create_model(model_name, **fields)
|
|
1365
|
+
|
|
1344
1366
|
# ----------------------------------- EQUALITY ----------------------------------- #
|
|
1345
1367
|
|
|
1346
1368
|
@classmethod
|
|
@@ -15,7 +15,7 @@ find it. For example, if you are using Claude Code:
|
|
|
15
15
|
|
|
16
16
|
```bash
|
|
17
17
|
mkdir -p .claude/skills/dataframely/
|
|
18
|
-
curl -o .claude/skills/dataframely/SKILL.md https://raw.githubusercontent.com/Quantco/dataframely/refs/heads/main/SKILL.md
|
|
18
|
+
curl -o .claude/skills/dataframely/SKILL.md https://raw.githubusercontent.com/Quantco/dataframely/refs/heads/main/skills/SKILL.md
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
or if you are using [skills.sh](https://skills.sh/) to manage your skills:
|