provenanced 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- provenanced-0.5.0/LICENSE +30 -0
- provenanced-0.5.0/PKG-INFO +176 -0
- provenanced-0.5.0/README.md +100 -0
- provenanced-0.5.0/proveit/__init__.py +301 -0
- provenanced-0.5.0/proveit/anchoring.py +2110 -0
- provenanced-0.5.0/proveit/cli.py +259 -0
- provenanced-0.5.0/proveit/compliance/__init__.py +98 -0
- provenanced-0.5.0/proveit/compliance/base.py +361 -0
- provenanced-0.5.0/proveit/compliance/colorado_ai_act.py +349 -0
- provenanced-0.5.0/proveit/compliance/cro_features.py +1437 -0
- provenanced-0.5.0/proveit/compliance/ecoa.py +1047 -0
- provenanced-0.5.0/proveit/compliance/eu_ai_act.py +969 -0
- provenanced-0.5.0/proveit/compliance/nyc_ll144.py +340 -0
- provenanced-0.5.0/proveit/compliance/score.py +368 -0
- provenanced-0.5.0/proveit/compliance/sr117.py +884 -0
- provenanced-0.5.0/proveit/config.py +441 -0
- provenanced-0.5.0/proveit/dashboard/__init__.py +23 -0
- provenanced-0.5.0/proveit/dashboard/admin_app.py +617 -0
- provenanced-0.5.0/proveit/dashboard/client_app.py +1783 -0
- provenanced-0.5.0/proveit/dashboard/dashdb.py +834 -0
- provenanced-0.5.0/proveit/dashboard/server.py +593 -0
- provenanced-0.5.0/proveit/dashboard/sqlite_dashdb.py +796 -0
- provenanced-0.5.0/proveit/dashboard/sync.py +454 -0
- provenanced-0.5.0/proveit/dashboard/templates/admin_dashboard.html +1681 -0
- provenanced-0.5.0/proveit/dashboard/templates/client_dashboard.html +2088 -0
- provenanced-0.5.0/proveit/dashboard/templates/dashboard.html +1103 -0
- provenanced-0.5.0/proveit/demo_suite/__init__.py +13 -0
- provenanced-0.5.0/proveit/demo_suite/data/credit_scoring_data.csv +10001 -0
- provenanced-0.5.0/proveit/demo_suite/data/credit_scoring_metadata.json +9 -0
- provenanced-0.5.0/proveit/demo_suite/server.py +246 -0
- provenanced-0.5.0/proveit/demo_suite/static/css/demo.css +157 -0
- provenanced-0.5.0/proveit/demo_suite/static/js/charts.js +8 -0
- provenanced-0.5.0/proveit/demo_suite/static/js/scenario.js +523 -0
- provenanced-0.5.0/proveit/demo_suite/templates/base.html +39 -0
- provenanced-0.5.0/proveit/demo_suite/templates/index.html +299 -0
- provenanced-0.5.0/proveit/demo_suite/templates/performance.html +271 -0
- provenanced-0.5.0/proveit/demo_suite/templates/scenario.html +332 -0
- provenanced-0.5.0/proveit/demo_suite/use_cases/__init__.py +1 -0
- provenanced-0.5.0/proveit/demo_suite/use_cases/credit_scoring.py +300 -0
- provenanced-0.5.0/proveit/demo_suite/yc_demo.py +457 -0
- provenanced-0.5.0/proveit/docs/__init__.py +60 -0
- provenanced-0.5.0/proveit/docs/__main__.py +5 -0
- provenanced-0.5.0/proveit/docs/cli.py +177 -0
- provenanced-0.5.0/proveit/docs/engine.py +851 -0
- provenanced-0.5.0/proveit/docs/gap_analysis.py +395 -0
- provenanced-0.5.0/proveit/docs/templates.py +4281 -0
- provenanced-0.5.0/proveit/examiner/__init__.py +27 -0
- provenanced-0.5.0/proveit/examiner/model_card.py +447 -0
- provenanced-0.5.0/proveit/examiner/package.py +379 -0
- provenanced-0.5.0/proveit/fairness/__init__.py +100 -0
- provenanced-0.5.0/proveit/fairness/__main__.py +5 -0
- provenanced-0.5.0/proveit/fairness/adverse_action.py +451 -0
- provenanced-0.5.0/proveit/fairness/cli.py +253 -0
- provenanced-0.5.0/proveit/fairness/disparate_impact.py +511 -0
- provenanced-0.5.0/proveit/fairness/engine.py +646 -0
- provenanced-0.5.0/proveit/fairness/lda_search.py +394 -0
- provenanced-0.5.0/proveit/fairness/model_adapter.py +258 -0
- provenanced-0.5.0/proveit/fairness/proxy_detection.py +752 -0
- provenanced-0.5.0/proveit/fairness/py.typed +0 -0
- provenanced-0.5.0/proveit/fairness/regulatory.py +687 -0
- provenanced-0.5.0/proveit/governance/__init__.py +69 -0
- provenanced-0.5.0/proveit/governance/data_classification.py +215 -0
- provenanced-0.5.0/proveit/governance/exception_tracker.py +608 -0
- provenanced-0.5.0/proveit/governance/model_registry.py +601 -0
- provenanced-0.5.0/proveit/governance/regulatory_calendar.py +326 -0
- provenanced-0.5.0/proveit/governance/retention.py +314 -0
- provenanced-0.5.0/proveit/hydra24_dag.py +1142 -0
- provenanced-0.5.0/proveit/hydra24_simple.py +777 -0
- provenanced-0.5.0/proveit/integrations/__init__.py +5 -0
- provenanced-0.5.0/proveit/persistence/__init__.py +77 -0
- provenanced-0.5.0/proveit/persistence/async_writer.py +536 -0
- provenanced-0.5.0/proveit/persistence/duckdb_storage.py +628 -0
- provenanced-0.5.0/proveit/persistence/factory.py +165 -0
- provenanced-0.5.0/proveit/persistence/hosted_storage.py +305 -0
- provenanced-0.5.0/proveit/persistence/query.py +393 -0
- provenanced-0.5.0/proveit/persistence/s3_storage.py +672 -0
- provenanced-0.5.0/proveit/persistence/schema.py +268 -0
- provenanced-0.5.0/proveit/persistence/sqlite_storage.py +571 -0
- provenanced-0.5.0/proveit/persistence/storage.py +784 -0
- provenanced-0.5.0/proveit/py.typed +0 -0
- provenanced-0.5.0/proveit/reports/__init__.py +9 -0
- provenanced-0.5.0/proveit/reports/compliance.py +1025 -0
- provenanced-0.5.0/proveit/vendor/__init__.py +42 -0
- provenanced-0.5.0/proveit/vendor/certifications.py +236 -0
- provenanced-0.5.0/proveit/vendor/due_diligence.py +405 -0
- provenanced-0.5.0/proveit/vendor/health_check.py +540 -0
- provenanced-0.5.0/provenanced.egg-info/PKG-INFO +176 -0
- provenanced-0.5.0/provenanced.egg-info/SOURCES.txt +112 -0
- provenanced-0.5.0/provenanced.egg-info/dependency_links.txt +1 -0
- provenanced-0.5.0/provenanced.egg-info/entry_points.txt +6 -0
- provenanced-0.5.0/provenanced.egg-info/requires.txt +61 -0
- provenanced-0.5.0/provenanced.egg-info/top_level.txt +1 -0
- provenanced-0.5.0/pyproject.toml +110 -0
- provenanced-0.5.0/setup.cfg +4 -0
- provenanced-0.5.0/tests/test_compliance_moats.py +796 -0
- provenanced-0.5.0/tests/test_compliance_score.py +428 -0
- provenanced-0.5.0/tests/test_cro_features.py +830 -0
- provenanced-0.5.0/tests/test_dashboards.py +647 -0
- provenanced-0.5.0/tests/test_docs.py +854 -0
- provenanced-0.5.0/tests/test_docs_gaps.py +946 -0
- provenanced-0.5.0/tests/test_end_to_end.py +411 -0
- provenanced-0.5.0/tests/test_fairness.py +1688 -0
- provenanced-0.5.0/tests/test_fairness_gaps.py +311 -0
- provenanced-0.5.0/tests/test_hardening.py +409 -0
- provenanced-0.5.0/tests/test_hydra24_enhanced.py +453 -0
- provenanced-0.5.0/tests/test_legal_compliance.py +808 -0
- provenanced-0.5.0/tests/test_moats.py +1899 -0
- provenanced-0.5.0/tests/test_model_registry.py +366 -0
- provenanced-0.5.0/tests/test_persistence.py +673 -0
- provenanced-0.5.0/tests/test_persistence_duckdb.py +29 -0
- provenanced-0.5.0/tests/test_pipeline.py +575 -0
- provenanced-0.5.0/tests/test_production.py +744 -0
- provenanced-0.5.0/tests/test_production_hardening.py +430 -0
- provenanced-0.5.0/tests/test_vendor_readiness.py +974 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Licensor: Provenance, Inc.
|
|
4
|
+
Licensed Work: ProveIt
|
|
5
|
+
|
|
6
|
+
The Licensed Work is provided under the terms of this License. The Licensed
|
|
7
|
+
Work is (c) 2024-2026 Provenance, Inc.
|
|
8
|
+
|
|
9
|
+
Grant of Rights: Subject to the terms and conditions of this License, the
|
|
10
|
+
Licensor hereby grants you a non-exclusive, worldwide, royalty-free right to
|
|
11
|
+
use, copy, modify, and distribute the Licensed Work, in each case subject to
|
|
12
|
+
the limitations below.
|
|
13
|
+
|
|
14
|
+
Limitation: You may not use the Licensed Work for a Commercial Offering that
|
|
15
|
+
competes with any product or service offered by the Licensor that includes the
|
|
16
|
+
Licensed Work.
|
|
17
|
+
|
|
18
|
+
Change Date: Four years from the date the Licensed Work is first publicly
|
|
19
|
+
distributed by the Licensor.
|
|
20
|
+
|
|
21
|
+
Change License: Apache License, Version 2.0
|
|
22
|
+
|
|
23
|
+
Notice: This License does not grant you any right in any trademark or logo of
|
|
24
|
+
the Licensor. THE LICENSED WORK IS PROVIDED "AS IS". THE LICENSOR HEREBY
|
|
25
|
+
DISCLAIMS ALL WARRANTIES, WHETHER EXPRESS, IMPLIED, STATUTORY OR OTHERWISE,
|
|
26
|
+
AND SPECIFICALLY DISCLAIMS ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
27
|
+
FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT, WITH RESPECT TO THE LICENSED
|
|
28
|
+
WORK.
|
|
29
|
+
|
|
30
|
+
For full terms, see https://mariadb.com/bsl11/
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: provenanced
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: ProveIt: Cryptographically Provable AI Compliance for Banks
|
|
5
|
+
Author-email: "Provenance, Inc." <info@provenance.ai>
|
|
6
|
+
License-Expression: BUSL-1.1
|
|
7
|
+
Project-URL: Homepage, https://proveit.ai
|
|
8
|
+
Keywords: provenance,ml,machine-learning,data-lineage,audit,compliance,fairness,fair-lending,disparate-impact,documentation,model-risk-management,sr-11-7,ecoa,eu-ai-act,banking
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing
|
|
23
|
+
Classifier: Topic :: Documentation
|
|
24
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
25
|
+
Classifier: Topic :: System :: Logging
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: numpy>=1.24
|
|
30
|
+
Requires-Dist: pandas>=2.0
|
|
31
|
+
Provides-Extra: fairness
|
|
32
|
+
Requires-Dist: scikit-learn>=1.3; extra == "fairness"
|
|
33
|
+
Provides-Extra: torch
|
|
34
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
35
|
+
Provides-Extra: tensorflow
|
|
36
|
+
Requires-Dist: tensorflow>=2.13; extra == "tensorflow"
|
|
37
|
+
Provides-Extra: xgboost
|
|
38
|
+
Requires-Dist: xgboost>=1.7; extra == "xgboost"
|
|
39
|
+
Provides-Extra: lightgbm
|
|
40
|
+
Requires-Dist: lightgbm>=4.0; extra == "lightgbm"
|
|
41
|
+
Provides-Extra: catboost
|
|
42
|
+
Requires-Dist: catboost>=1.2; extra == "catboost"
|
|
43
|
+
Provides-Extra: shap
|
|
44
|
+
Requires-Dist: scikit-learn>=1.3; extra == "shap"
|
|
45
|
+
Requires-Dist: shap>=0.42; extra == "shap"
|
|
46
|
+
Provides-Extra: dashboard
|
|
47
|
+
Requires-Dist: flask>=2.3; extra == "dashboard"
|
|
48
|
+
Requires-Dist: python-dotenv>=1.0; extra == "dashboard"
|
|
49
|
+
Provides-Extra: postgres
|
|
50
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "postgres"
|
|
51
|
+
Provides-Extra: otel
|
|
52
|
+
Requires-Dist: opentelemetry-api>=1.20; extra == "otel"
|
|
53
|
+
Requires-Dist: opentelemetry-sdk>=1.20; extra == "otel"
|
|
54
|
+
Provides-Extra: pdf
|
|
55
|
+
Requires-Dist: fpdf2>=2.7; extra == "pdf"
|
|
56
|
+
Provides-Extra: docx
|
|
57
|
+
Requires-Dist: python-docx>=1.0; extra == "docx"
|
|
58
|
+
Provides-Extra: export
|
|
59
|
+
Requires-Dist: fpdf2>=2.7; extra == "export"
|
|
60
|
+
Requires-Dist: python-docx>=1.0; extra == "export"
|
|
61
|
+
Provides-Extra: all
|
|
62
|
+
Requires-Dist: scikit-learn>=1.3; extra == "all"
|
|
63
|
+
Requires-Dist: flask>=2.3; extra == "all"
|
|
64
|
+
Requires-Dist: python-dotenv>=1.0; extra == "all"
|
|
65
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "all"
|
|
66
|
+
Requires-Dist: shap>=0.42; extra == "all"
|
|
67
|
+
Requires-Dist: opentelemetry-api>=1.20; extra == "all"
|
|
68
|
+
Requires-Dist: opentelemetry-sdk>=1.20; extra == "all"
|
|
69
|
+
Requires-Dist: fpdf2>=2.7; extra == "all"
|
|
70
|
+
Requires-Dist: python-docx>=1.0; extra == "all"
|
|
71
|
+
Provides-Extra: dev
|
|
72
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
73
|
+
Requires-Dist: build; extra == "dev"
|
|
74
|
+
Requires-Dist: twine; extra == "dev"
|
|
75
|
+
Dynamic: license-file
|
|
76
|
+
|
|
77
|
+
# ProveIt
|
|
78
|
+
|
|
79
|
+
Cryptographically provable AI compliance for banks: tamper-proof provenance tracking, fair lending analysis, and automated compliance documentation.
|
|
80
|
+
|
|
81
|
+
## Install
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install -e .
|
|
85
|
+
pip install -e ".[all]" # with sklearn, flask, postgres, shap, opentelemetry
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Quick Start
|
|
89
|
+
|
|
90
|
+
### Provenance Tracking
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import proveit
|
|
94
|
+
|
|
95
|
+
proveit.configure(dag=True)
|
|
96
|
+
apps = proveit.wrap(apps_df, "applications.csv")
|
|
97
|
+
credit = proveit.wrap(credit_df, "credit_bureau.csv")
|
|
98
|
+
|
|
99
|
+
df = apps.merge(credit, on="applicant_id")
|
|
100
|
+
risk_score = df["balance"] / df["limit"]
|
|
101
|
+
|
|
102
|
+
dag = proveit.get_dag_simple(risk_score)
|
|
103
|
+
print(dag.source_nodes)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Fairness Analysis
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from proveit.fairness import FairLensEngine, FairLensConfig
|
|
110
|
+
|
|
111
|
+
config = FairLensConfig(
|
|
112
|
+
protected_columns=["race", "gender", "age"],
|
|
113
|
+
industry="lending",
|
|
114
|
+
)
|
|
115
|
+
engine = FairLensEngine(config=config)
|
|
116
|
+
results = engine.analyze(
|
|
117
|
+
model=model, X_train=X_train, y_train=y_train,
|
|
118
|
+
X_test=X_test, y_test=y_test, protected_data=prot_test,
|
|
119
|
+
)
|
|
120
|
+
print(results.disparate_impact_results)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Compliance Documentation
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from proveit.docs import AutoDocEngine, TemplateType, ModelMetadata
|
|
127
|
+
|
|
128
|
+
engine = AutoDocEngine()
|
|
129
|
+
report = engine.generate(
|
|
130
|
+
template=TemplateType.SR_11_7,
|
|
131
|
+
model_metadata=ModelMetadata(
|
|
132
|
+
name="Risk Model v2",
|
|
133
|
+
version="2.1.0",
|
|
134
|
+
model_type="XGBoost",
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
report.save("sr117_report.md")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## CLI
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
proveit fairness analyze --model m.pkl --data d.csv --protected race,sex --industry lending
|
|
144
|
+
proveit docs generate --template sr11-7 --model metadata.json
|
|
145
|
+
proveit docs templates
|
|
146
|
+
proveit dashboard
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Modules
|
|
150
|
+
|
|
151
|
+
| Module | Description |
|
|
152
|
+
|--------|-------------|
|
|
153
|
+
| `proveit` | Core provenance DAG engine with Merkle tree anchoring |
|
|
154
|
+
| `proveit.fairness` | Disparate impact, proxy detection, LDA search, adverse action codes |
|
|
155
|
+
| `proveit.docs` | SR 11-7, EU AI Act, Model Cards, ECOA, GDPR, NIST AI RMF, dbt |
|
|
156
|
+
| `proveit.compliance` | EU AI Act and ECOA compliance engines |
|
|
157
|
+
| `proveit.persistence` | SQLite, PostgreSQL, DuckDB, S3 storage backends |
|
|
158
|
+
| `proveit.integrations` | Airflow, Kubeflow, Prefect, Dagster, dbt, Vertex AI, SageMaker, Azure ML |
|
|
159
|
+
| `proveit.dashboard` | Flask-based provenance dashboard |
|
|
160
|
+
|
|
161
|
+
## Test
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pytest tests/ -v
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Requirements
|
|
168
|
+
|
|
169
|
+
- Python >= 3.9
|
|
170
|
+
- numpy >= 1.24
|
|
171
|
+
- pandas >= 2.0
|
|
172
|
+
- scikit-learn >= 1.3
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
Proprietary. Copyright (c) 2024-2026 Provenance, Inc. All rights reserved.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# ProveIt
|
|
2
|
+
|
|
3
|
+
Cryptographically provable AI compliance for banks: tamper-proof provenance tracking, fair lending analysis, and automated compliance documentation.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e .
|
|
9
|
+
pip install -e ".[all]" # with sklearn, flask, postgres, shap, opentelemetry
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
### Provenance Tracking
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import proveit
|
|
18
|
+
|
|
19
|
+
proveit.configure(dag=True)
|
|
20
|
+
apps = proveit.wrap(apps_df, "applications.csv")
|
|
21
|
+
credit = proveit.wrap(credit_df, "credit_bureau.csv")
|
|
22
|
+
|
|
23
|
+
df = apps.merge(credit, on="applicant_id")
|
|
24
|
+
risk_score = df["balance"] / df["limit"]
|
|
25
|
+
|
|
26
|
+
dag = proveit.get_dag_simple(risk_score)
|
|
27
|
+
print(dag.source_nodes)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Fairness Analysis
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from proveit.fairness import FairLensEngine, FairLensConfig
|
|
34
|
+
|
|
35
|
+
config = FairLensConfig(
|
|
36
|
+
protected_columns=["race", "gender", "age"],
|
|
37
|
+
industry="lending",
|
|
38
|
+
)
|
|
39
|
+
engine = FairLensEngine(config=config)
|
|
40
|
+
results = engine.analyze(
|
|
41
|
+
model=model, X_train=X_train, y_train=y_train,
|
|
42
|
+
X_test=X_test, y_test=y_test, protected_data=prot_test,
|
|
43
|
+
)
|
|
44
|
+
print(results.disparate_impact_results)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Compliance Documentation
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from proveit.docs import AutoDocEngine, TemplateType, ModelMetadata
|
|
51
|
+
|
|
52
|
+
engine = AutoDocEngine()
|
|
53
|
+
report = engine.generate(
|
|
54
|
+
template=TemplateType.SR_11_7,
|
|
55
|
+
model_metadata=ModelMetadata(
|
|
56
|
+
name="Risk Model v2",
|
|
57
|
+
version="2.1.0",
|
|
58
|
+
model_type="XGBoost",
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
report.save("sr117_report.md")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## CLI
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
proveit fairness analyze --model m.pkl --data d.csv --protected race,sex --industry lending
|
|
68
|
+
proveit docs generate --template sr11-7 --model metadata.json
|
|
69
|
+
proveit docs templates
|
|
70
|
+
proveit dashboard
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Modules
|
|
74
|
+
|
|
75
|
+
| Module | Description |
|
|
76
|
+
|--------|-------------|
|
|
77
|
+
| `proveit` | Core provenance DAG engine with Merkle tree anchoring |
|
|
78
|
+
| `proveit.fairness` | Disparate impact, proxy detection, LDA search, adverse action codes |
|
|
79
|
+
| `proveit.docs` | SR 11-7, EU AI Act, Model Cards, ECOA, GDPR, NIST AI RMF, dbt |
|
|
80
|
+
| `proveit.compliance` | EU AI Act and ECOA compliance engines |
|
|
81
|
+
| `proveit.persistence` | SQLite, PostgreSQL, DuckDB, S3 storage backends |
|
|
82
|
+
| `proveit.integrations` | Airflow, Kubeflow, Prefect, Dagster, dbt, Vertex AI, SageMaker, Azure ML |
|
|
83
|
+
| `proveit.dashboard` | Flask-based provenance dashboard |
|
|
84
|
+
|
|
85
|
+
## Test
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pytest tests/ -v
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Requirements
|
|
92
|
+
|
|
93
|
+
- Python >= 3.9
|
|
94
|
+
- numpy >= 1.24
|
|
95
|
+
- pandas >= 2.0
|
|
96
|
+
- scikit-learn >= 1.3
|
|
97
|
+
|
|
98
|
+
## License
|
|
99
|
+
|
|
100
|
+
Proprietary. Copyright (c) 2024-2026 Provenance, Inc. All rights reserved.
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ProveIt: Cryptographically Provable AI Compliance for Banks
|
|
3
|
+
|
|
4
|
+
Tamper-proof data provenance tracking, algorithmic fairness analysis,
|
|
5
|
+
and automated compliance documentation for financial institutions.
|
|
6
|
+
|
|
7
|
+
Quick start:
|
|
8
|
+
import proveit
|
|
9
|
+
|
|
10
|
+
proveit.configure(api_key="hyd_live_...") # required
|
|
11
|
+
df = proveit.wrap(raw_df, "users.csv")
|
|
12
|
+
dag = proveit.get_dag_simple(df['income'] / df['debt'])
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__version__ = "0.5.0"
|
|
16
|
+
|
|
17
|
+
# =============================================================================
|
|
18
|
+
# Configuration API (always available - these ARE the gate)
|
|
19
|
+
# =============================================================================
|
|
20
|
+
from .config import (
|
|
21
|
+
configure,
|
|
22
|
+
wrap,
|
|
23
|
+
get_dag as get_dag_simple,
|
|
24
|
+
audit as audit_simple,
|
|
25
|
+
flush,
|
|
26
|
+
reset,
|
|
27
|
+
get_stats,
|
|
28
|
+
tracking,
|
|
29
|
+
get_tracker,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# Lazy-loaded names: require proveit.configure(api_key=...) before access
|
|
34
|
+
# =============================================================================
|
|
35
|
+
_LAZY_IMPORTS = {
|
|
36
|
+
# HYDRA-24 DAG System
|
|
37
|
+
"DAGTracker": ("hydra24_dag", "DAGTracker"),
|
|
38
|
+
"DAGTrackedSeries": ("hydra24_dag", "DAGTrackedSeries"),
|
|
39
|
+
"DAGTrackedDataFrame": ("hydra24_dag", "DAGTrackedDataFrame"),
|
|
40
|
+
"wrap_dataframe": ("hydra24_dag", "wrap_dataframe_dag"),
|
|
41
|
+
"get_dag": ("hydra24_dag", "get_dag_for_series"),
|
|
42
|
+
"dag_audit": ("hydra24_dag", "dag_audit"),
|
|
43
|
+
"ComputationDAG": ("hydra24_dag", "ComputationDAG"),
|
|
44
|
+
"DAGNode": ("hydra24_dag", "DAGNode"),
|
|
45
|
+
"DAGEdge": ("hydra24_dag", "DAGEdge"),
|
|
46
|
+
"NodeType": ("hydra24_dag", "NodeType"),
|
|
47
|
+
|
|
48
|
+
# HYDRA-24 Enhanced System
|
|
49
|
+
"ProvenanceTracker": ("hydra24_simple", "ProvenanceTracker"),
|
|
50
|
+
"TrackedSeries": ("hydra24_simple", "TrackedSeries"),
|
|
51
|
+
"TrackedDataFrame": ("hydra24_simple", "TrackedDataFrame"),
|
|
52
|
+
"wrap_dataframe_simple": ("hydra24_simple", "wrap_dataframe"),
|
|
53
|
+
"audit": ("hydra24_simple", "audit"),
|
|
54
|
+
"audit_series_diversity": ("hydra24_simple", "audit_series_diversity"),
|
|
55
|
+
"OpCode": ("hydra24_simple", "OpCode"),
|
|
56
|
+
"MAX_PAIRS": ("hydra24_simple", "MAX_PAIRS"),
|
|
57
|
+
|
|
58
|
+
# Persistence
|
|
59
|
+
"create_storage": ("persistence.factory", "create_storage"),
|
|
60
|
+
"init_database": ("persistence", "init_database"),
|
|
61
|
+
"get_schema_ddl": ("persistence", "get_schema_ddl"),
|
|
62
|
+
"PostgreSQLStorage": ("persistence", "PostgreSQLStorage"),
|
|
63
|
+
"reconstruct_dag": ("persistence", "reconstruct_dag"),
|
|
64
|
+
"query_by_fingerprint": ("persistence", "query_by_fingerprint"),
|
|
65
|
+
"query_by_source": ("persistence", "query_by_source"),
|
|
66
|
+
|
|
67
|
+
# Fairness
|
|
68
|
+
"FairLensEngine": ("fairness", "FairLensEngine"),
|
|
69
|
+
"FairLensConfig": ("fairness", "FairLensConfig"),
|
|
70
|
+
|
|
71
|
+
# Documentation
|
|
72
|
+
"AutoDocEngine": ("docs", "AutoDocEngine"),
|
|
73
|
+
"TemplateType": ("docs", "TemplateType"),
|
|
74
|
+
|
|
75
|
+
# Vendor Readiness
|
|
76
|
+
"VendorDueDiligencePackage": ("vendor", "VendorDueDiligencePackage"),
|
|
77
|
+
"CertificationTracker": ("vendor", "CertificationTracker"),
|
|
78
|
+
"PlatformHealthCheck": ("vendor", "PlatformHealthCheck"),
|
|
79
|
+
|
|
80
|
+
# Governance
|
|
81
|
+
"ModelRecord": ("governance", "ModelRecord"),
|
|
82
|
+
"ModelRegistry": ("governance", "ModelRegistry"),
|
|
83
|
+
"PIIDetector": ("governance", "PIIDetector"),
|
|
84
|
+
"RetentionPolicyEngine": ("governance", "RetentionPolicyEngine"),
|
|
85
|
+
"RegulatoryCalendar": ("governance", "RegulatoryCalendar"),
|
|
86
|
+
|
|
87
|
+
# Examiner
|
|
88
|
+
"ExaminerPackage": ("examiner", "ExaminerPackage"),
|
|
89
|
+
"ModelCardGenerator": ("examiner", "ModelCardGenerator"),
|
|
90
|
+
|
|
91
|
+
# Compliance
|
|
92
|
+
"ComplianceScorer": ("compliance.score", "ComplianceScorer"),
|
|
93
|
+
"AttestationManager": ("compliance.cro_features", "AttestationManager"),
|
|
94
|
+
"PeerBenchmark": ("compliance.cro_features", "PeerBenchmark"),
|
|
95
|
+
"QuickWinRecommender": ("compliance.cro_features", "QuickWinRecommender"),
|
|
96
|
+
"RegulatoryChangeAssessor": ("compliance.cro_features", "RegulatoryChangeAssessor"),
|
|
97
|
+
"RemediationQueue": ("compliance.cro_features", "RemediationQueue"),
|
|
98
|
+
"RiskHeatmap": ("compliance.cro_features", "RiskHeatmap"),
|
|
99
|
+
"ExceptionTracker": ("governance.exception_tracker", "ExceptionTracker"),
|
|
100
|
+
"ComplianceException": ("governance.exception_tracker", "ComplianceException"),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def __getattr__(name: str):
|
|
105
|
+
"""Lazy-load proveit classes; require configure() first in production."""
|
|
106
|
+
if name in _LAZY_IMPORTS:
|
|
107
|
+
import os
|
|
108
|
+
from .config import _config
|
|
109
|
+
|
|
110
|
+
_testing = (
|
|
111
|
+
os.environ.get("PROVEIT_TESTING")
|
|
112
|
+
or os.environ.get("HYDRA24_TESTING")
|
|
113
|
+
)
|
|
114
|
+
if not _config._initialized and not _testing:
|
|
115
|
+
raise RuntimeError(
|
|
116
|
+
f"ProveIt not configured. Call proveit.configure(api_key='...') "
|
|
117
|
+
f"before accessing proveit.{name}.\n"
|
|
118
|
+
"Get your API key at https://proveit.ai/signup"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
module_name, real_name = _LAZY_IMPORTS[name]
|
|
122
|
+
import importlib
|
|
123
|
+
try:
|
|
124
|
+
mod = importlib.import_module(f".{module_name}", __package__)
|
|
125
|
+
except ImportError as exc:
|
|
126
|
+
raise AttributeError(
|
|
127
|
+
f"proveit.{name} requires an optional dependency: {exc}"
|
|
128
|
+
) from exc
|
|
129
|
+
obj = getattr(mod, real_name)
|
|
130
|
+
globals()[name] = obj
|
|
131
|
+
return obj
|
|
132
|
+
|
|
133
|
+
raise AttributeError(f"module 'proveit' has no attribute {name!r}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# =============================================================================
|
|
137
|
+
# Public API
|
|
138
|
+
# =============================================================================
|
|
139
|
+
__all__ = [
|
|
140
|
+
# Version
|
|
141
|
+
"__version__",
|
|
142
|
+
|
|
143
|
+
# Simplified API (easiest - recommended for new users)
|
|
144
|
+
"configure",
|
|
145
|
+
"wrap",
|
|
146
|
+
"get_dag_simple",
|
|
147
|
+
"audit_simple",
|
|
148
|
+
"flush",
|
|
149
|
+
"reset",
|
|
150
|
+
"get_stats",
|
|
151
|
+
"tracking",
|
|
152
|
+
"get_tracker",
|
|
153
|
+
|
|
154
|
+
# HYDRA-24 DAG (advanced API)
|
|
155
|
+
"DAGTracker",
|
|
156
|
+
"DAGTrackedSeries",
|
|
157
|
+
"DAGTrackedDataFrame",
|
|
158
|
+
"wrap_dataframe",
|
|
159
|
+
"get_dag",
|
|
160
|
+
"dag_audit",
|
|
161
|
+
"ComputationDAG",
|
|
162
|
+
"DAGNode",
|
|
163
|
+
"DAGEdge",
|
|
164
|
+
"NodeType",
|
|
165
|
+
|
|
166
|
+
# HYDRA-24 Enhanced
|
|
167
|
+
"ProvenanceTracker",
|
|
168
|
+
"TrackedSeries",
|
|
169
|
+
"TrackedDataFrame",
|
|
170
|
+
"wrap_dataframe_simple",
|
|
171
|
+
"audit",
|
|
172
|
+
"audit_series_diversity",
|
|
173
|
+
"OpCode",
|
|
174
|
+
"MAX_PAIRS",
|
|
175
|
+
|
|
176
|
+
# Storage factory
|
|
177
|
+
"create_storage",
|
|
178
|
+
|
|
179
|
+
# Persistence (optional)
|
|
180
|
+
"init_database",
|
|
181
|
+
"get_schema_ddl",
|
|
182
|
+
"PostgreSQLStorage",
|
|
183
|
+
"reconstruct_dag",
|
|
184
|
+
"query_by_fingerprint",
|
|
185
|
+
"query_by_source",
|
|
186
|
+
|
|
187
|
+
# Fairness
|
|
188
|
+
"FairLensEngine",
|
|
189
|
+
"FairLensConfig",
|
|
190
|
+
|
|
191
|
+
# Docs
|
|
192
|
+
"AutoDocEngine",
|
|
193
|
+
"TemplateType",
|
|
194
|
+
|
|
195
|
+
# Vendor Readiness
|
|
196
|
+
"VendorDueDiligencePackage",
|
|
197
|
+
"CertificationTracker",
|
|
198
|
+
"PlatformHealthCheck",
|
|
199
|
+
|
|
200
|
+
# Data Governance & Model Registry
|
|
201
|
+
"ModelRecord",
|
|
202
|
+
"ModelRegistry",
|
|
203
|
+
"PIIDetector",
|
|
204
|
+
"RetentionPolicyEngine",
|
|
205
|
+
"RegulatoryCalendar",
|
|
206
|
+
|
|
207
|
+
# Examiner Package
|
|
208
|
+
"ExaminerPackage",
|
|
209
|
+
"ModelCardGenerator",
|
|
210
|
+
|
|
211
|
+
# Compliance Scoring
|
|
212
|
+
"ComplianceScorer",
|
|
213
|
+
|
|
214
|
+
# CRO Features
|
|
215
|
+
"AttestationManager",
|
|
216
|
+
"ComplianceException",
|
|
217
|
+
"ExceptionTracker",
|
|
218
|
+
"PeerBenchmark",
|
|
219
|
+
"QuickWinRecommender",
|
|
220
|
+
"RegulatoryChangeAssessor",
|
|
221
|
+
"RemediationQueue",
|
|
222
|
+
"RiskHeatmap",
|
|
223
|
+
|
|
224
|
+
# Logging
|
|
225
|
+
"setup_logging",
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# =============================================================================
|
|
230
|
+
# Production Logging Setup
|
|
231
|
+
# =============================================================================
|
|
232
|
+
|
|
233
|
+
def setup_logging(
|
|
234
|
+
level: str = "INFO",
|
|
235
|
+
fmt: str = "json",
|
|
236
|
+
include_timestamp: bool = True,
|
|
237
|
+
) -> None:
|
|
238
|
+
"""Configure logging for production bank deployments.
|
|
239
|
+
|
|
240
|
+
Sets up structured logging across all ``proveit.*`` loggers so that
|
|
241
|
+
output is compatible with log aggregation tools (Splunk, ELK,
|
|
242
|
+
Datadog, CloudWatch Logs).
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
level: Log level (``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``).
|
|
246
|
+
fmt: ``"json"`` for structured JSON lines (default, recommended
|
|
247
|
+
for production), or ``"text"`` for human-readable output.
|
|
248
|
+
include_timestamp: Include ISO-8601 timestamps (default ``True``).
|
|
249
|
+
|
|
250
|
+
Example::
|
|
251
|
+
|
|
252
|
+
import proveit
|
|
253
|
+
proveit.setup_logging(level="INFO", fmt="json")
|
|
254
|
+
"""
|
|
255
|
+
import logging
|
|
256
|
+
import json as _json
|
|
257
|
+
import sys
|
|
258
|
+
from datetime import datetime, timezone
|
|
259
|
+
|
|
260
|
+
numeric_level = getattr(logging, level.upper(), logging.INFO)
|
|
261
|
+
|
|
262
|
+
class _JSONFormatter(logging.Formatter):
|
|
263
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
264
|
+
entry = {
|
|
265
|
+
"logger": record.name,
|
|
266
|
+
"level": record.levelname,
|
|
267
|
+
"message": record.getMessage(),
|
|
268
|
+
}
|
|
269
|
+
if include_timestamp:
|
|
270
|
+
entry["timestamp"] = (
|
|
271
|
+
datetime.fromtimestamp(record.created, tz=timezone.utc)
|
|
272
|
+
.isoformat()
|
|
273
|
+
)
|
|
274
|
+
if record.exc_info and record.exc_info[0] is not None:
|
|
275
|
+
entry["exception"] = self.formatException(record.exc_info)
|
|
276
|
+
return _json.dumps(entry, default=str)
|
|
277
|
+
|
|
278
|
+
class _TextFormatter(logging.Formatter):
|
|
279
|
+
_fmt_ts = "%(asctime)s %(name)s %(levelname)s %(message)s"
|
|
280
|
+
_fmt_no_ts = "%(name)s %(levelname)s %(message)s"
|
|
281
|
+
|
|
282
|
+
def __init__(self, with_ts: bool = True) -> None:
|
|
283
|
+
super().__init__(
|
|
284
|
+
fmt=self._fmt_ts if with_ts else self._fmt_no_ts,
|
|
285
|
+
datefmt="%Y-%m-%dT%H:%M:%S%z",
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
root_logger = logging.getLogger("proveit")
|
|
289
|
+
root_logger.setLevel(numeric_level)
|
|
290
|
+
|
|
291
|
+
root_logger.handlers.clear()
|
|
292
|
+
|
|
293
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
294
|
+
handler.setLevel(numeric_level)
|
|
295
|
+
|
|
296
|
+
if fmt == "json":
|
|
297
|
+
handler.setFormatter(_JSONFormatter())
|
|
298
|
+
else:
|
|
299
|
+
handler.setFormatter(_TextFormatter(with_ts=include_timestamp))
|
|
300
|
+
|
|
301
|
+
root_logger.addHandler(handler)
|