dfguard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. dfguard-0.1.0/.github/workflows/ci.yml +60 -0
  2. dfguard-0.1.0/.github/workflows/docs.yml +24 -0
  3. dfguard-0.1.0/.github/workflows/release.yml +36 -0
  4. dfguard-0.1.0/.gitignore +12 -0
  5. dfguard-0.1.0/.readthedocs.yaml +17 -0
  6. dfguard-0.1.0/.release-please-manifest.json +3 -0
  7. dfguard-0.1.0/CHANGELOG.md +27 -0
  8. dfguard-0.1.0/CONTRIBUTING.md +40 -0
  9. dfguard-0.1.0/LICENSE +147 -0
  10. dfguard-0.1.0/PKG-INFO +415 -0
  11. dfguard-0.1.0/README.md +241 -0
  12. dfguard-0.1.0/dfguard/__init__.py +8 -0
  13. dfguard-0.1.0/dfguard/py.typed +0 -0
  14. dfguard-0.1.0/dfguard/pyspark/__init__.py +114 -0
  15. dfguard-0.1.0/dfguard/pyspark/_enforcement.py +250 -0
  16. dfguard-0.1.0/dfguard/pyspark/_inference.py +77 -0
  17. dfguard-0.1.0/dfguard/pyspark/_nullable.py +49 -0
  18. dfguard-0.1.0/dfguard/pyspark/coercion.py +203 -0
  19. dfguard-0.1.0/dfguard/pyspark/dataset.py +696 -0
  20. dfguard-0.1.0/dfguard/pyspark/decorators.py +86 -0
  21. dfguard-0.1.0/dfguard/pyspark/exceptions.py +55 -0
  22. dfguard-0.1.0/dfguard/pyspark/history.py +139 -0
  23. dfguard-0.1.0/dfguard/pyspark/schema.py +418 -0
  24. dfguard-0.1.0/dfguard/pyspark/types.py +107 -0
  25. dfguard-0.1.0/docs/Makefile +20 -0
  26. dfguard-0.1.0/docs/airflow.rst +214 -0
  27. dfguard-0.1.0/docs/api/index.rst +7 -0
  28. dfguard-0.1.0/docs/api/pyspark/dataset.rst +62 -0
  29. dfguard-0.1.0/docs/api/pyspark/decorators.rst +75 -0
  30. dfguard-0.1.0/docs/api/pyspark/enforcement.rst +87 -0
  31. dfguard-0.1.0/docs/api/pyspark/exceptions.rst +44 -0
  32. dfguard-0.1.0/docs/api/pyspark/history.rst +38 -0
  33. dfguard-0.1.0/docs/api/pyspark/index.rst +12 -0
  34. dfguard-0.1.0/docs/api/pyspark/schemas.rst +90 -0
  35. dfguard-0.1.0/docs/conf.py +58 -0
  36. dfguard-0.1.0/docs/index.rst +93 -0
  37. dfguard-0.1.0/docs/kedro.rst +232 -0
  38. dfguard-0.1.0/docs/make.bat +35 -0
  39. dfguard-0.1.0/docs/pipelines.rst +285 -0
  40. dfguard-0.1.0/docs/quickstart.rst +425 -0
  41. dfguard-0.1.0/docs/requirements.txt +3 -0
  42. dfguard-0.1.0/examples/airflow/dags/orders_dag.py +79 -0
  43. dfguard-0.1.0/examples/airflow/pipeline/__init__.py +7 -0
  44. dfguard-0.1.0/examples/airflow/pipeline/schemas.py +23 -0
  45. dfguard-0.1.0/examples/airflow/pipeline/transforms.py +35 -0
  46. dfguard-0.1.0/examples/airflow/requirements.txt +3 -0
  47. dfguard-0.1.0/examples/kedro/conf/base/catalog.yml +17 -0
  48. dfguard-0.1.0/examples/kedro/conf/base/parameters.yml +1 -0
  49. dfguard-0.1.0/examples/kedro/data/raw_orders.csv +5 -0
  50. dfguard-0.1.0/examples/kedro/pyproject.toml +8 -0
  51. dfguard-0.1.0/examples/kedro/requirements.txt +4 -0
  52. dfguard-0.1.0/examples/kedro/src/orders_pipeline/__init__.py +3 -0
  53. dfguard-0.1.0/examples/kedro/src/orders_pipeline/__main__.py +8 -0
  54. dfguard-0.1.0/examples/kedro/src/orders_pipeline/pipeline_registry.py +11 -0
  55. dfguard-0.1.0/examples/kedro/src/orders_pipeline/pipelines/__init__.py +0 -0
  56. dfguard-0.1.0/examples/kedro/src/orders_pipeline/pipelines/processing/__init__.py +3 -0
  57. dfguard-0.1.0/examples/kedro/src/orders_pipeline/pipelines/processing/nodes.py +33 -0
  58. dfguard-0.1.0/examples/kedro/src/orders_pipeline/pipelines/processing/pipeline.py +20 -0
  59. dfguard-0.1.0/examples/kedro/src/orders_pipeline/schemas.py +27 -0
  60. dfguard-0.1.0/pyproject.toml +57 -0
  61. dfguard-0.1.0/release-please-config.json +15 -0
  62. dfguard-0.1.0/tests/__init__.py +0 -0
  63. dfguard-0.1.0/tests/pyspark/__init__.py +0 -0
  64. dfguard-0.1.0/tests/pyspark/conftest.py +57 -0
  65. dfguard-0.1.0/tests/pyspark/test_coercion.py +135 -0
  66. dfguard-0.1.0/tests/pyspark/test_complex_types.py +263 -0
  67. dfguard-0.1.0/tests/pyspark/test_dataset.py +206 -0
  68. dfguard-0.1.0/tests/pyspark/test_decorators.py +77 -0
  69. dfguard-0.1.0/tests/pyspark/test_enforcement.py +179 -0
  70. dfguard-0.1.0/tests/pyspark/test_history.py +53 -0
  71. dfguard-0.1.0/tests/pyspark/test_pipeline.py +134 -0
  72. dfguard-0.1.0/tests/pyspark/test_schema.py +136 -0
  73. dfguard-0.1.0/tests/pyspark/test_schema_annotation.py +116 -0
  74. dfguard-0.1.0/tests/pyspark/test_types.py +43 -0
@@ -0,0 +1,60 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+ cache: pip
18
+ - run: pip install ruff mypy
19
+ - run: ruff check dfguard/ tests/
20
+ - run: mypy dfguard/ --ignore-missing-imports
21
+
22
+ test:
23
+ needs: lint
24
+ runs-on: ubuntu-latest
25
+ strategy:
26
+ fail-fast: false # run all versions even if one fails
27
+ matrix:
28
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ - uses: actions/setup-python@v5
32
+ with:
33
+ python-version: ${{ matrix.python-version }}
34
+ cache: pip
35
+ - run: pip install -e ".[pyspark,dev]"
36
+ - run: pytest tests/pyspark/ -q --tb=short --cov=dfguard --cov-report=xml
37
+
38
+ - name: Upload coverage
39
+ if: matrix.python-version == '3.12'
40
+ uses: codecov/codecov-action@v4
41
+ with:
42
+ files: coverage.xml
43
+ fail_ci_if_error: false
44
+ env:
45
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
46
+
47
+ # Test against the newest Python release.
48
+ # Allowed to fail: PySpark may not support it yet.
49
+ test-latest:
50
+ needs: lint
51
+ runs-on: ubuntu-latest
52
+ continue-on-error: true
53
+ steps:
54
+ - uses: actions/checkout@v4
55
+ - uses: actions/setup-python@v5
56
+ with:
57
+ python-version: "3.14"
58
+ cache: pip
59
+ - run: pip install -e ".[pyspark,dev]"
60
+ - run: pytest tests/pyspark/ -q --tb=short
@@ -0,0 +1,24 @@
1
+ name: Docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ deploy:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: write # needed to push to gh-pages branch
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ cache: pip
19
+ - run: pip install -e ".[pyspark]" sphinx furo sphinx-autodoc-typehints
20
+ - run: make -C docs html
21
+ - uses: peaceiris/actions-gh-pages@v4
22
+ with:
23
+ github_token: ${{ secrets.GITHUB_TOKEN }}
24
+ publish_dir: docs/_build/html
@@ -0,0 +1,36 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ release-please:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: write
12
+ pull-requests: write
13
+ outputs:
14
+ release_created: ${{ steps.release.outputs.release_created }}
15
+
16
+ steps:
17
+ - uses: googleapis/release-please-action@v4
18
+ id: release
19
+ with:
20
+ release-type: python
21
+
22
+ publish:
23
+ needs: release-please
24
+ if: needs.release-please.outputs.release_created
25
+ runs-on: ubuntu-latest
26
+ environment: pypi
27
+ permissions:
28
+ id-token: write # required for PyPI trusted publishing (no API token needed)
29
+
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+ - uses: actions/setup-python@v5
33
+ with:
34
+ python-version: "3.11"
35
+ - run: pip install build && python -m build
36
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ .mypy_cache/
9
+ .ruff_cache/
10
+ .pytest_cache/
11
+ docs/_build/
12
+ *.DS_Store
@@ -0,0 +1,17 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-24.04
5
+ tools:
6
+ python: "3.12"
7
+
8
+ sphinx:
9
+ configuration: docs/conf.py
10
+
11
+ python:
12
+ install:
13
+ - method: pip
14
+ path: .
15
+ extra_requirements:
16
+ - pyspark
17
+ - requirements: docs/requirements.txt
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.1.0"
3
+ }
@@ -0,0 +1,27 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-04-11)
4
+
5
+
6
+ ### Features
7
+
8
+ * complex type tests, Airflow/Kedro examples, subset docs, pipeline docs ([9ff5820](https://github.com/nitrajen/dfguard/commit/9ff58201d6439d7982a7901bcfaf0bfb31ba0fff))
9
+ * initial release of frameguard v0.1.0 ([90eced9](https://github.com/nitrajen/dfguard/commit/90eced96556dc74f463d04f4a59d8bf82b346537))
10
+ * subset flag on enforce() and arm() with global/function-level hierarchy ([8bc0c08](https://github.com/nitrajen/dfguard/commit/8bc0c08d6aa7613ade08459e88d3e20c7fe1f3cf))
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * enforcement robustness and test coverage ([7db3f1b](https://github.com/nitrajen/dfguard/commit/7db3f1b364cdfd5a66783cd4b8de275adda98ab6))
16
+ * resolve CI lint failures and add coverage reporting ([0ba8926](https://github.com/nitrajen/dfguard/commit/0ba892674a4a8541777d812d69a989de0560d530))
17
+ * support Python 3.10 by replacing typing.Optional with fg.Optional ([ec765f4](https://github.com/nitrajen/dfguard/commit/ec765f42b571f79442c180fb337bc4194a629af0))
18
+
19
+
20
+ ### Documentation
21
+
22
+ * fix subset section, add create_dataframe(), fix Kedro/Airflow docs ([d7de934](https://github.com/nitrajen/dfguard/commit/d7de9346632e047f7a2eafc16c8be70ab7a1a53f))
23
+ * make Two ways section fully self-contained with imports and data ([1fcaf0a](https://github.com/nitrajen/dfguard/commit/1fcaf0a95780b3b142e449b9a1915458ad7369b6))
24
+ * remove all em dashes from docs and README ([a86a627](https://github.com/nitrajen/dfguard/commit/a86a6270b5a1b25aa466f8cd56059fbcb9bbb0dc))
25
+ * rewrite opening — lead with the problem, not a presumption ([e10f129](https://github.com/nitrajen/dfguard/commit/e10f1295cd2da2915f08566506686be5df27c336))
26
+ * self-contained examples, drop Pandera mention, fix license label ([7753a94](https://github.com/nitrajen/dfguard/commit/7753a949e8f5f9dcc17410d7ab5aa286b1ff2e4a))
27
+ * version in title, self-contained examples, nested struct, ReadTheDocs config ([d6439c5](https://github.com/nitrajen/dfguard/commit/d6439c5332d8ea642f2692c1e7b2085e81a27035))
@@ -0,0 +1,40 @@
1
+ # Contributing to frameguard
2
+
3
+ Thanks for taking the time. This is a small library with a focused job, so
4
+ contributions don't need to be big to matter.
5
+
6
+ ## Reporting bugs
7
+
8
+ Open an issue. Include:
9
+ - what you were doing
10
+ - the full error message
11
+ - your Python, PySpark, and frameguard versions
12
+
13
+ Schema mismatch bugs are especially welcome — if frameguard raised when it
14
+ shouldn't have, or didn't raise when it should, that's exactly the kind of
15
+ thing we want to know about.
16
+
17
+ ## Suggesting changes
18
+
19
+ Open an issue before writing code. A quick description of the problem and
20
+ what you have in mind is enough. We'll let you know if it fits the direction
21
+ of the library.
22
+
23
+ ## Submitting a PR
24
+
25
+ 1. Fork the repo and create a branch
26
+ 2. `pip install -e ".[pyspark,dev]"`
27
+ 3. Write code, write tests
28
+ 4. `ruff check frameguard/ tests/` and `mypy frameguard/ --ignore-missing-imports`
29
+ 5. `pytest tests/pyspark/ -q`
30
+ 6. Open the PR with a short description of what changed and why
31
+
32
+ Keep PRs focused. One thing at a time.
33
+
34
+ ## Roadmap
35
+
36
+ PySpark is the first integration. Polars and pandas are next. If you want to
37
+ help build those, say so in an issue and we'll coordinate.
38
+
39
+ The core enforcement mechanism (`_enforcement.py`) is intentionally simple
40
+ and dependency-free. New integrations should follow the same principle.
dfguard-0.1.0/LICENSE ADDED
@@ -0,0 +1,147 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship made available under
36
+ the License, as indicated by a copyright notice that is included in
37
+ or attached to the work (an example is provided in the Appendix below).
38
+
39
+ "Derivative Works" shall mean any work, whether in Source or Object
40
+ form, that is based on (or derived from) the Work and for which the
41
+ editorial revisions, annotations, elaborations, or other modifications
42
+ represent, as a whole, an original work of authorship. For the purposes
43
+ of this License, Derivative Works shall not include works that remain
44
+ separable from, or merely link (or bind by name) to the interfaces of,
45
+ the Work and Derivative Works thereof.
46
+
47
+ "Contribution" shall mean, as submitted to the Licensor for inclusion
48
+ in the Work by the copyright owner or by an individual or Legal Entity
49
+ authorized to submit on behalf of the copyright owner. For the purposes
50
+ of this definition, "submitted" means any form of electronic, verbal,
51
+ or written communication sent to the Licensor or its representatives.
52
+
53
+ "Contributor" shall mean Licensor and any Legal Entity on behalf of
54
+ whom a Contribution has been received by the Licensor and included
55
+ within the Work.
56
+
57
+ 2. Grant of Copyright License. Subject to the terms and conditions of
58
+ this License, each Contributor hereby grants to You a perpetual,
59
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
60
+ copyright license to reproduce, prepare Derivative Works of,
61
+ publicly display, publicly perform, sublicense, and distribute the
62
+ Work and such Derivative Works in Source or Object form.
63
+
64
+ 3. Grant of Patent License. Subject to the terms and conditions of
65
+ this License, each Contributor hereby grants to You a perpetual,
66
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
67
+ (except as stated in this section) patent license to make, have made,
68
+ use, offer to sell, sell, import, and otherwise transfer the Work.
69
+
70
+ 4. Redistribution. You may reproduce and distribute copies of the
71
+ Work or Derivative Works thereof in any medium, with or without
72
+ modifications, and in Source or Object form, provided that You
73
+ meet the following conditions:
74
+
75
+ (a) You must give any other recipients of the Work or Derivative Works
76
+ a copy of this License; and
77
+
78
+ (b) You must cause any modified files to carry prominent notices
79
+ stating that You changed the files; and
80
+
81
+ (c) You must retain, in the Source form of any Derivative Works
82
+ that You distribute, all copyright, patent, trademark, and
83
+ attribution notices from the Source form of the Work; and
84
+
85
+ (d) If the Work includes a "NOTICE" text file, you must include a
86
+ readable copy of the attribution notices contained within such
87
+ NOTICE file, in at least one of the following places: within a
88
+ NOTICE text file distributed as part of the Derivative Works;
89
+ within the Source form or documentation, if provided along with
90
+ the Derivative Works; or, within a display generated by the
91
+ Derivative Works, if and wherever such third-party notices normally
92
+ appear. The contents of the NOTICE file are for informational
93
+ purposes only and do not modify the License.
94
+
95
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
96
+ any Contribution intentionally submitted for inclusion in the Work
97
+ by You to the Licensor shall be under the terms and conditions of
98
+ this License, without any additional terms or conditions.
99
+
100
+ 6. Trademarks. This License does not grant permission to use the trade
101
+ names, trademarks, service marks, or product names of the Licensor.
102
+
103
+ 7. Disclaimer of Warranty. Unless required by applicable law or
104
+ agreed to in writing, Licensor provides the Work (and each
105
+ Contributor provides its Contributions) on an "AS IS" BASIS,
106
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
107
+ implied. See the License for the specific language governing
108
+ permissions and limitations under the License.
109
+
110
+ 8. Limitation of Liability. In no event and under no legal theory,
111
+ whether in tort (including negligence), contract, or otherwise,
112
+ unless required by applicable law (such as deliberate and grossly
113
+ negligent acts) or agreed to in writing, shall any Contributor be
114
+ liable to You for damages, including any direct, indirect, special,
115
+ incidental, or exemplary damages of any character arising as a
116
+ result of this License or out of the use or inability to use the
117
+ Work (including but not limited to damages for loss of goodwill,
118
+ work stoppage, computer failure or malfunction, or all other
119
+ commercial damages or losses), even if such Contributor has been
120
+ advised of the possibility of such damages.
121
+
122
+ 9. Accepting Warranty or Liability. While redistributing the Work or
123
+ Derivative Works thereof, You may choose to offer, and charge a fee
124
+ for, acceptance of support, warranty, indemnity, or other liability
125
+ obligations and/or rights consistent with this License. However,
126
+ in accepting such obligations, You may offer such conditions only
127
+ on Your own behalf and on Your sole responsibility, not on behalf
128
+ of any other Contributor, and only if You agree to indemnify,
129
+ defend, and hold each Contributor harmless for any liability
130
+ incurred by, or claims asserted against, such Contributor by reason
131
+ of your accepting any such warranty or additional liability.
132
+
133
+ END OF TERMS AND CONDITIONS
134
+
135
+ Copyright 2024 Nithin Rajendran
136
+
137
+ Licensed under the Apache License, Version 2.0 (the "License");
138
+ you may not use this file except in compliance with the License.
139
+ You may obtain a copy of the License at
140
+
141
+ http://www.apache.org/licenses/LICENSE-2.0
142
+
143
+ Unless required by applicable law or agreed to in writing, software
144
+ distributed under the License is distributed on an "AS IS" BASIS,
145
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
146
+ See the License for the specific language governing permissions and
147
+ limitations under the License.