conformare 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. conformare-0.1.0/.github/workflows/ci.yml +90 -0
  2. conformare-0.1.0/.github/workflows/pages.yml +79 -0
  3. conformare-0.1.0/.github/workflows/publish.yml +74 -0
  4. conformare-0.1.0/.gitignore +38 -0
  5. conformare-0.1.0/.pre-commit-config.yaml +13 -0
  6. conformare-0.1.0/CHANGELOG.md +31 -0
  7. conformare-0.1.0/LICENSE +77 -0
  8. conformare-0.1.0/PKG-INFO +300 -0
  9. conformare-0.1.0/README.md +256 -0
  10. conformare-0.1.0/conformare/__init__.py +201 -0
  11. conformare-0.1.0/conformare/adapters/__init__.py +1 -0
  12. conformare-0.1.0/conformare/adapters/io.py +327 -0
  13. conformare-0.1.0/conformare/adapters/narwhals.py +152 -0
  14. conformare-0.1.0/conformare/adapters/pandas.py +264 -0
  15. conformare-0.1.0/conformare/adapters/spark.py +221 -0
  16. conformare-0.1.0/conformare/bootstrap.py +186 -0
  17. conformare-0.1.0/conformare/core/__init__.py +1 -0
  18. conformare-0.1.0/conformare/core/checklist.py +253 -0
  19. conformare-0.1.0/conformare/core/context.py +441 -0
  20. conformare-0.1.0/conformare/core/diagram.py +110 -0
  21. conformare-0.1.0/conformare/core/groups.py +109 -0
  22. conformare-0.1.0/conformare/core/lineage.py +164 -0
  23. conformare-0.1.0/conformare/core/names.py +137 -0
  24. conformare-0.1.0/conformare/core/recording.py +90 -0
  25. conformare-0.1.0/conformare/core/report.py +2003 -0
  26. conformare-0.1.0/conformare/core/risks.py +210 -0
  27. conformare-0.1.0/conformare/core/sensitivity.py +285 -0
  28. conformare-0.1.0/conformare/core/steps.py +340 -0
  29. conformare-0.1.0/conformare/core/suppress.py +82 -0
  30. conformare-0.1.0/conformare/profilers/__init__.py +1 -0
  31. conformare-0.1.0/conformare/profilers/backend.py +29 -0
  32. conformare-0.1.0/conformare/profilers/backend_narwhals.py +69 -0
  33. conformare-0.1.0/conformare/profilers/backend_pandas.py +53 -0
  34. conformare-0.1.0/conformare/profilers/backend_spark.py +108 -0
  35. conformare-0.1.0/conformare/profilers/base.py +46 -0
  36. conformare-0.1.0/conformare/profilers/builtins.py +696 -0
  37. conformare-0.1.0/conformare/profilers/conditions.py +42 -0
  38. conformare-0.1.0/conformare/profilers/engine.py +232 -0
  39. conformare-0.1.0/docs/build_site.py +414 -0
  40. conformare-0.1.0/docs/design.md +942 -0
  41. conformare-0.1.0/docs/docstring-standard.md +81 -0
  42. conformare-0.1.0/docs/pages/risks.md +115 -0
  43. conformare-0.1.0/docs/pages/sensitivity.md +121 -0
  44. conformare-0.1.0/examples/bootstrap/README.md +107 -0
  45. conformare-0.1.0/examples/bootstrap/bootstrap.py +87 -0
  46. conformare-0.1.0/examples/bootstrap/pipeline.py +167 -0
  47. conformare-0.1.0/examples/bootstrap/utils.py +50 -0
  48. conformare-0.1.0/examples/example_docstring_tagging.py +150 -0
  49. conformare-0.1.0/examples/example_feature_engineering_spark.py +219 -0
  50. conformare-0.1.0/examples/example_great_expectations.py +137 -0
  51. conformare-0.1.0/examples/example_great_expectations_spark.py +151 -0
  52. conformare-0.1.0/examples/example_html_report.py +130 -0
  53. conformare-0.1.0/examples/example_mixed.py +31 -0
  54. conformare-0.1.0/examples/example_narwhals.py +40 -0
  55. conformare-0.1.0/examples/example_pandas.py +118 -0
  56. conformare-0.1.0/examples/example_risk_checklist.py +81 -0
  57. conformare-0.1.0/examples/example_spark.py +41 -0
  58. conformare-0.1.0/examples/example_streaming.py +303 -0
  59. conformare-0.1.0/examples/example_streaming_spark.py +202 -0
  60. conformare-0.1.0/pyproject.toml +79 -0
  61. conformare-0.1.0/tests/conftest.py +69 -0
  62. conformare-0.1.0/tests/test_bootstrap.py +89 -0
  63. conformare-0.1.0/tests/test_chains.py +188 -0
  64. conformare-0.1.0/tests/test_checklist.py +86 -0
  65. conformare-0.1.0/tests/test_columns.py +49 -0
  66. conformare-0.1.0/tests/test_context.py +183 -0
  67. conformare-0.1.0/tests/test_diagram.py +64 -0
  68. conformare-0.1.0/tests/test_force_profile.py +138 -0
  69. conformare-0.1.0/tests/test_groups.py +40 -0
  70. conformare-0.1.0/tests/test_io.py +89 -0
  71. conformare-0.1.0/tests/test_logic.py +209 -0
  72. conformare-0.1.0/tests/test_names.py +28 -0
  73. conformare-0.1.0/tests/test_narwhals.py +71 -0
  74. conformare-0.1.0/tests/test_pandas.py +135 -0
  75. conformare-0.1.0/tests/test_profilers.py +277 -0
  76. conformare-0.1.0/tests/test_report.py +367 -0
  77. conformare-0.1.0/tests/test_sensitivity.py +40 -0
  78. conformare-0.1.0/tests/test_spark.py +241 -0
  79. conformare-0.1.0/tests/test_spark_cluster.py +79 -0
  80. conformare-0.1.0/tests/test_steps.py +190 -0
  81. conformare-0.1.0/tests/test_version.py +11 -0
@@ -0,0 +1,90 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ # Code quality: Ruff lint + format check (pinned so a new Ruff release can't break
10
+ # CI unexpectedly).
11
+ lint:
12
+ name: lint (ruff)
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v5
16
+ - uses: actions/setup-python@v6
17
+ with:
18
+ python-version: "3.11"
19
+ - run: python -m pip install --upgrade pip
20
+ - run: pip install "ruff>=0.15,<0.16"
21
+ - run: ruff check .
22
+ - run: ruff format --check .
23
+
24
+ # Core suite across every supported Python (3.10+). No Spark/JVM and no
25
+ # great-expectations here: those tests skip automatically (pytest.importorskip),
26
+ # so this stays fast and runs on the newest interpreters too.
27
+ test:
28
+ name: test (py${{ matrix.python-version }})
29
+ runs-on: ubuntu-latest
30
+ strategy:
31
+ fail-fast: false
32
+ matrix:
33
+ python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
34
+ steps:
35
+ - uses: actions/checkout@v5
36
+ - uses: actions/setup-python@v6
37
+ with:
38
+ python-version: ${{ matrix.python-version }}
39
+ - run: python -m pip install --upgrade pip
40
+ - run: pip install -e ".[test]"
41
+ - run: python -m pytest -q
42
+
43
+ # Full coverage: Java + PySpark + Great Expectations so the Spark and GX tests run too.
44
+ full:
45
+ name: full (spark + great-expectations)
46
+ runs-on: ubuntu-latest
47
+ steps:
48
+ - uses: actions/checkout@v5
49
+ - uses: actions/setup-java@v5
50
+ with:
51
+ distribution: temurin
52
+ java-version: "17"
53
+ - uses: actions/setup-python@v6
54
+ with:
55
+ python-version: "3.11"
56
+ - run: python -m pip install --upgrade pip
57
+ - run: pip install -e ".[spark,gx,test]"
58
+ - run: python -m pytest -q
59
+
60
+ # Multi-process Spark: local-cluster launches real executor JVMs (serialization,
61
+ # shuffle, networking). These tests are deselected by default, so run them
62
+ # explicitly with -m cluster. Linux only (reliable; teardown noise is harmless).
63
+ cluster:
64
+ name: cluster (multi-process spark)
65
+ runs-on: ubuntu-latest
66
+ steps:
67
+ - uses: actions/checkout@v5
68
+ - uses: actions/setup-java@v5
69
+ with:
70
+ distribution: temurin
71
+ java-version: "17"
72
+ - uses: actions/setup-python@v6
73
+ with:
74
+ python-version: "3.11"
75
+ - run: python -m pip install --upgrade pip
76
+ - run: pip install -e ".[spark,test]"
77
+ - run: python -m pytest -q -m cluster
78
+
79
+ # Verify the package builds (sdist + wheel) and the metadata is valid.
80
+ build:
81
+ name: build
82
+ runs-on: ubuntu-latest
83
+ steps:
84
+ - uses: actions/checkout@v5
85
+ - uses: actions/setup-python@v6
86
+ with:
87
+ python-version: "3.11"
88
+ - run: python -m pip install --upgrade pip build twine
89
+ - run: python -m build
90
+ - run: python -m twine check dist/*
@@ -0,0 +1,79 @@
1
+ name: Deploy docs site
2
+
3
+ # Builds the public docs site (landing page + live example reports) and publishes
4
+ # it to the SEPARATE public Pages repo, so the source repo can stay private.
5
+ #
6
+ # One-time setup:
7
+ # 1. Create a public repo, e.g. kaelonlloyd/conformare-docs.
8
+ # 2. Generate a deploy key: ssh-keygen -t ed25519 -C conformare-docs -f gh-pages-key -N ""
9
+ # 3. In conformare-docs: Settings > Deploy keys > add `gh-pages-key.pub`, ALLOW WRITE.
10
+ # 4. In THIS (private) repo: Settings > Secrets and variables > Actions >
11
+ # add secret PAGES_DEPLOY_KEY = contents of the private `gh-pages-key`.
12
+ # (The deploy step below strips CR / fixes the trailing newline, so a paste
13
+ # with Windows line endings still works.)
14
+ # 5. In conformare-docs: Settings > Pages > deploy from branch `gh-pages` / root.
15
+
16
+ on:
17
+ push:
18
+ branches: [main]
19
+ workflow_dispatch:
20
+
21
+ permissions:
22
+ contents: read
23
+
24
+ concurrency:
25
+ group: pages-deploy
26
+ cancel-in-progress: true
27
+
28
+ jobs:
29
+ build-and-deploy:
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - uses: actions/checkout@v5
33
+ - uses: actions/setup-java@v5
34
+ with:
35
+ distribution: temurin
36
+ java-version: "17"
37
+ - uses: actions/setup-python@v6
38
+ with:
39
+ python-version: "3.11"
40
+ - run: python -m pip install --upgrade pip
41
+ # Spark + GX so the full set of live reports is generated.
42
+ - run: pip install -e ".[spark,gx]"
43
+ - run: python docs/build_site.py # writes ./site (Jekyll source + reports)
44
+
45
+ # Build the site ourselves so the published output is plain static HTML and
46
+ # never depends on GitHub's branch-build behaviour.
47
+ - uses: ruby/setup-ruby@v1
48
+ with:
49
+ ruby-version: "3.3"
50
+ bundler-cache: true
51
+ working-directory: site
52
+ - run: bundle exec jekyll build # site/ -> site/_site
53
+ working-directory: site
54
+
55
+ # Manual SSH push so we can SANITIZE the deploy key first: the recurring
56
+ # `ssh-add ... error in libcrypto` is caused by CR characters / a missing
57
+ # trailing newline in the pasted secret. Stripping \r and re-adding a newline
58
+ # makes it robust regardless of how the secret was pasted.
59
+ - name: Publish to public Pages repo (SSH)
60
+ env:
61
+ DEPLOY_KEY: ${{ secrets.PAGES_DEPLOY_KEY }}
62
+ run: |
63
+ set -euo pipefail
64
+ mkdir -p "$HOME/.ssh"
65
+ printf '%s\n' "$DEPLOY_KEY" | tr -d '\r' | sed -e '/^$/d' > "$HOME/.ssh/deploy_key"
66
+ printf '\n' >> "$HOME/.ssh/deploy_key" # guarantee single trailing newline
67
+ chmod 600 "$HOME/.ssh/deploy_key"
68
+ ssh-keyscan github.com >> "$HOME/.ssh/known_hosts" 2>/dev/null
69
+ # Fail early with a clear message if the key still won't load.
70
+ ssh-keygen -y -f "$HOME/.ssh/deploy_key" > /dev/null
71
+ export GIT_SSH_COMMAND="ssh -i $HOME/.ssh/deploy_key -o IdentitiesOnly=yes -o UserKnownHostsFile=$HOME/.ssh/known_hosts"
72
+ cd site/_site
73
+ touch .nojekyll # already-built site; serve as-is
74
+ git init -q -b gh-pages
75
+ git config user.name "github-actions[bot]"
76
+ git config user.email "github-actions[bot]@users.noreply.github.com"
77
+ git add -A
78
+ git commit -q -m "docs: deploy site from ${GITHUB_SHA}"
79
+ git push -f "git@github.com:kaelonlloyd/conformare-docs.git" gh-pages
@@ -0,0 +1,74 @@
1
+ name: Publish
2
+
3
+ # Builds the distribution once, then publishes via PyPI Trusted Publishing (OIDC) -
4
+ # no API tokens stored anywhere.
5
+ #
6
+ # * Manual run (workflow_dispatch) -> TestPyPI (environment: testpypi)
7
+ # * Publishing a GitHub Release -> PyPI (environment: pypi)
8
+ #
9
+ # One-time Trusted Publisher setup (pending publisher is fine before the first run):
10
+ # TestPyPI -> https://test.pypi.org/manage/account/publishing/
11
+ # PyPI -> https://pypi.org/manage/account/publishing/
12
+ # Use these values:
13
+ # PyPI Project Name: conformare
14
+ # Owner: kaelonlloyd
15
+ # Repository name: conformare
16
+ # Workflow name: publish.yml
17
+ # Environment name: testpypi (for TestPyPI) / pypi (for PyPI)
18
+
19
+ on:
20
+ release:
21
+ types: [published]
22
+ workflow_dispatch:
23
+
24
+ permissions:
25
+ contents: read
26
+
27
+ jobs:
28
+ build:
29
+ name: Build distribution
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - uses: actions/checkout@v5
33
+ - uses: actions/setup-python@v6
34
+ with:
35
+ python-version: "3.11"
36
+ - run: python -m pip install --upgrade pip build twine
37
+ - run: python -m build
38
+ - run: python -m twine check dist/*
39
+ - uses: actions/upload-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ testpypi:
45
+ name: Publish to TestPyPI
46
+ needs: build
47
+ if: github.event_name == 'workflow_dispatch'
48
+ runs-on: ubuntu-latest
49
+ environment: testpypi
50
+ permissions:
51
+ id-token: write # required for trusted publishing
52
+ steps:
53
+ - uses: actions/download-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
57
+ - uses: pypa/gh-action-pypi-publish@release/v1
58
+ with:
59
+ repository-url: https://test.pypi.org/legacy/
60
+
61
+ pypi:
62
+ name: Publish to PyPI
63
+ needs: build
64
+ if: github.event_name == 'release'
65
+ runs-on: ubuntu-latest
66
+ environment: pypi
67
+ permissions:
68
+ id-token: write # required for trusted publishing
69
+ steps:
70
+ - uses: actions/download-artifact@v4
71
+ with:
72
+ name: dist
73
+ path: dist/
74
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ *.so
10
+
11
+ # Tooling caches
12
+ .pytest_cache/
13
+ .mypy_cache/
14
+ .ruff_cache/
15
+ .coverage
16
+ htmlcov/
17
+
18
+ # Virtual environments
19
+ .venv/
20
+ venv/
21
+ env/
22
+
23
+ # Generated example reports (examples write here; regenerate by running examples/)
24
+ output/
25
+ *.html
26
+
27
+ # Built docs site (docs/build_site.py; published to the public Pages repo by CI)
28
+ site/
29
+
30
+ # Pages deploy keypair (the private half is a secret; never commit it)
31
+ gh-pages-key
32
+ gh-pages-key.pub
33
+
34
+ # Editors / OS
35
+ .vscode/
36
+ .idea/
37
+ .DS_Store
38
+ Thumbs.db
@@ -0,0 +1,13 @@
1
+ # Local code-quality hooks. Install once with:
2
+ # pip install pre-commit && pre-commit install
3
+ # Then Ruff lint + format run on every commit. Run on everything with:
4
+ # pre-commit run --all-files
5
+ #
6
+ # This mirrors the CI `lint` job (Ruff lint + format check).
7
+ repos:
8
+ - repo: https://github.com/astral-sh/ruff-pre-commit
9
+ rev: v0.15.18
10
+ hooks:
11
+ - id: ruff
12
+ args: [--fix]
13
+ - id: ruff-format
@@ -0,0 +1,31 @@
1
+ # Changelog
2
+
3
+ All notable changes to Conformare are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and Conformare follows
5
+ [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ While in `0.x`, breaking changes may land in a **minor** release (`0.1 -> 0.2`); the
8
+ move to `1.0.0` signals a commitment to backward compatibility.
9
+
10
+ ## [Unreleased]
11
+
12
+ ## [0.1.0] - 2026-06-21
13
+
14
+ Initial public release.
15
+
16
+ ### Added
17
+ - Lineage capture of the authored dataframe pipeline across three backends:
18
+ `trackNarwhals()`, `trackSpark()` (PySpark, zero code change) and `trackPandas()`.
19
+ - Per-step profilers: `rowCount`, `columnCount`, `dataSize`, `histogram`,
20
+ `nullFraction`, `iqrOutliers`, optional `greatExpectations` and `whylogs`.
21
+ - Governance context: `describe()` / `risk()` / `describe_process()`, a built-in risk
22
+ catalog with `register_risk()`, mitigation/owner tracking and a governance ranking.
23
+ - Data-sensitivity detection (name-based heuristics + manual `mark_sensitive`) with an
24
+ exfiltration check for columns that reach a written output.
25
+ - Outputs: self-contained interactive HTML report (`to_html`), `to_mermaid`, `to_json`,
26
+ and a formal, sign-off-ready Markdown risk checklist (`to_risk_checklist`).
27
+ - Non-intrusive modes: docstring tagging and `bootstrap()` for unmodified scripts.
28
+ - `conformare.__version__` exposed from package metadata.
29
+
30
+ [Unreleased]: https://github.com/kaelonlloyd/conformare/compare/v0.1.0...HEAD
31
+ [0.1.0]: https://github.com/kaelonlloyd/conformare/releases/tag/v0.1.0
@@ -0,0 +1,77 @@
1
+ # PolyForm Noncommercial License 1.0.0
2
+
3
+ <https://polyformproject.org/licenses/noncommercial/1.0.0>
4
+
5
+ Required Notice: Copyright 2026 Kaelon Lloyd
6
+
7
+ For a commercial license, contact Kaelon Lloyd <kaelonlloyd@gmail.com>.
8
+
9
+ ## Acceptance
10
+
11
+ In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
12
+
13
+ ## Copyright License
14
+
15
+ The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
16
+
17
+ ## Distribution License
18
+
19
+ The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
20
+
21
+ ## Notices
22
+
23
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
24
+
25
+ > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
26
+
27
+ ## Changes and New Works License
28
+
29
+ The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
30
+
31
+ ## Patent License
32
+
33
+ The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
34
+
35
+ ## Noncommercial Purposes
36
+
37
+ Any noncommercial purpose is a permitted purpose.
38
+
39
+ ## Personal Uses
40
+
41
+ Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
42
+
43
+ ## Noncommercial Organizations
44
+
45
+ Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
46
+
47
+ ## Fair Use
48
+
49
+ You may have "fair use" rights for the software under the law. These terms do not limit them.
50
+
51
+ ## No Other Rights
52
+
53
+ These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
54
+
55
+ ## Patent Defense
56
+
57
+ If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
58
+
59
+ ## Violations
60
+
61
+ The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
62
+
63
+ ## No Liability
64
+
65
+ ***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
66
+
67
+ ## Definitions
68
+
69
+ The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
70
+
71
+ **You** refers to the individual or entity agreeing to these terms.
72
+
73
+ **Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
74
+
75
+ **Your licenses** are all the licenses granted to you for the software under these terms.
76
+
77
+ **Use** means anything you do with the software requiring one of your licenses.