pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transform pipeline: simple_ops → jsonata → custom_function.
|
|
3
|
+
|
|
4
|
+
Single entry point: apply_transforms(data, transform_config, **kwargs).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
from pycharter.etl_generator.transformers.config import normalize_transform_config
|
|
10
|
+
from pycharter.etl_generator.transformers.custom_function import (
|
|
11
|
+
apply_custom_function,
|
|
12
|
+
)
|
|
13
|
+
from pycharter.etl_generator.transformers.jsonata_transformer import (
|
|
14
|
+
apply_jsonata,
|
|
15
|
+
)
|
|
16
|
+
from pycharter.etl_generator.transformers.simple_operations import (
|
|
17
|
+
apply_simple_operations,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def apply_transforms(
|
|
22
|
+
data: List[Dict[str, Any]],
|
|
23
|
+
transform_config: Dict[str, Any],
|
|
24
|
+
**kwargs: Any,
|
|
25
|
+
) -> List[Dict[str, Any]]:
|
|
26
|
+
"""
|
|
27
|
+
Run the full transform pipeline on data.
|
|
28
|
+
|
|
29
|
+
Order: simple_ops → jsonata → custom_function. Each step is skipped
|
|
30
|
+
if not configured.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data: Input list of records.
|
|
34
|
+
transform_config: Raw transform config (canonical or legacy).
|
|
35
|
+
**kwargs: Passed to custom_function.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Transformed list of records.
|
|
39
|
+
"""
|
|
40
|
+
if not transform_config:
|
|
41
|
+
return data
|
|
42
|
+
|
|
43
|
+
normalized = normalize_transform_config(transform_config)
|
|
44
|
+
|
|
45
|
+
if normalized.get("simple_ops"):
|
|
46
|
+
data = apply_simple_operations(data, normalized["simple_ops"])
|
|
47
|
+
if normalized.get("jsonata"):
|
|
48
|
+
data = apply_jsonata(data, normalized["jsonata"])
|
|
49
|
+
if normalized.get("custom_function"):
|
|
50
|
+
data = apply_custom_function(
|
|
51
|
+
data, normalized["custom_function"], **kwargs
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return data
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple declarative transformation operations.
|
|
3
|
+
|
|
4
|
+
Order: rename → convert → defaults → add → select → drop.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import date, datetime
|
|
9
|
+
from typing import Any, Dict, List
|
|
10
|
+
|
|
11
|
+
from pycharter.etl_generator.expression import evaluate_expression, ExpressionEvaluator
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def apply_simple_operations(
|
|
17
|
+
data: List[Dict[str, Any]], config: Dict[str, Any]
|
|
18
|
+
) -> List[Dict[str, Any]]:
|
|
19
|
+
"""
|
|
20
|
+
Apply simple declarative operations to a list of records.
|
|
21
|
+
|
|
22
|
+
Operations (in order): rename, convert, defaults, add, select, drop.
|
|
23
|
+
"""
|
|
24
|
+
if not data:
|
|
25
|
+
return data
|
|
26
|
+
|
|
27
|
+
result = []
|
|
28
|
+
available_fields = set(data[0].keys())
|
|
29
|
+
rename_map = config.get("rename") or {}
|
|
30
|
+
convert_map = config.get("convert") or {}
|
|
31
|
+
defaults_map = config.get("defaults") or {}
|
|
32
|
+
add_map = config.get("add") or {}
|
|
33
|
+
select_fields = config.get("select")
|
|
34
|
+
drop_fields = set(config.get("drop") or [])
|
|
35
|
+
|
|
36
|
+
if rename_map:
|
|
37
|
+
missing = [k for k in rename_map if k not in available_fields]
|
|
38
|
+
if missing:
|
|
39
|
+
logger.warning(
|
|
40
|
+
"Rename operation: fields not in data: %s. Available: %s",
|
|
41
|
+
missing,
|
|
42
|
+
sorted(available_fields),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
for record in data:
|
|
46
|
+
row = dict(record)
|
|
47
|
+
for old_name, new_name in rename_map.items():
|
|
48
|
+
if old_name in row:
|
|
49
|
+
row[new_name] = row.pop(old_name)
|
|
50
|
+
for field_name, target_type in convert_map.items():
|
|
51
|
+
if field_name in row:
|
|
52
|
+
try:
|
|
53
|
+
row[field_name] = convert_type(row[field_name], target_type)
|
|
54
|
+
except (ValueError, TypeError) as e:
|
|
55
|
+
logger.warning(
|
|
56
|
+
"Failed to convert field %r to %s: %s. Keeping original.",
|
|
57
|
+
field_name,
|
|
58
|
+
target_type,
|
|
59
|
+
e,
|
|
60
|
+
)
|
|
61
|
+
for field_name, default_value in defaults_map.items():
|
|
62
|
+
if field_name not in row or row[field_name] is None:
|
|
63
|
+
row[field_name] = default_value
|
|
64
|
+
for field_name, expression in add_map.items():
|
|
65
|
+
try:
|
|
66
|
+
row[field_name] = evaluate_expression(expression, row)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(
|
|
69
|
+
"Failed to compute field %r: %s. Skipping.", field_name, e
|
|
70
|
+
)
|
|
71
|
+
if select_fields:
|
|
72
|
+
row = {k: v for k, v in row.items() if k in select_fields}
|
|
73
|
+
if drop_fields:
|
|
74
|
+
row = {k: v for k, v in row.items() if k not in drop_fields}
|
|
75
|
+
result.append(row)
|
|
76
|
+
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def convert_type(value: Any, target_type: str) -> Any:
|
|
81
|
+
"""Convert a value to the given type (string, integer, float, boolean, datetime, date)."""
|
|
82
|
+
if value is None:
|
|
83
|
+
return None
|
|
84
|
+
t = target_type.lower().strip()
|
|
85
|
+
if t in ("str", "string"):
|
|
86
|
+
return str(value)
|
|
87
|
+
if t in ("int", "integer"):
|
|
88
|
+
if isinstance(value, str):
|
|
89
|
+
try:
|
|
90
|
+
return int(float(value))
|
|
91
|
+
except ValueError:
|
|
92
|
+
return int(value)
|
|
93
|
+
return int(value)
|
|
94
|
+
if t in ("float", "number", "numeric"):
|
|
95
|
+
return float(value) if isinstance(value, str) else float(value)
|
|
96
|
+
if t in ("bool", "boolean"):
|
|
97
|
+
if isinstance(value, str):
|
|
98
|
+
return value.lower() in ("true", "1", "yes", "on")
|
|
99
|
+
return bool(value)
|
|
100
|
+
if t == "datetime":
|
|
101
|
+
if isinstance(value, str):
|
|
102
|
+
for fmt in (
|
|
103
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
104
|
+
"%Y-%m-%dT%H:%M:%S.%f",
|
|
105
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
106
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
107
|
+
"%Y-%m-%d %H:%M:%S",
|
|
108
|
+
"%Y-%m-%d %H:%M:%S.%f",
|
|
109
|
+
):
|
|
110
|
+
try:
|
|
111
|
+
return datetime.strptime(value, fmt)
|
|
112
|
+
except ValueError:
|
|
113
|
+
continue
|
|
114
|
+
raise ValueError(f"Cannot parse datetime: {value}")
|
|
115
|
+
return value
|
|
116
|
+
if t == "date":
|
|
117
|
+
if isinstance(value, str):
|
|
118
|
+
for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"]:
|
|
119
|
+
try:
|
|
120
|
+
return datetime.strptime(value, fmt).date()
|
|
121
|
+
except ValueError:
|
|
122
|
+
continue
|
|
123
|
+
raise ValueError(f"Cannot parse date: {value}")
|
|
124
|
+
if isinstance(value, datetime):
|
|
125
|
+
return value.date()
|
|
126
|
+
return value
|
|
127
|
+
raise ValueError(f"Unsupported target type: {target_type}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# evaluate_expression is now imported from pycharter.etl_generator.expression
|
|
131
|
+
# Keeping this comment for backwards compatibility - the function is available via import
|
pycharter/quality/__init__.py
CHANGED
|
@@ -3,6 +3,9 @@ Data Quality Assurance Module
|
|
|
3
3
|
|
|
4
4
|
Provides quality checking, metrics calculation, violation tracking, and reporting
|
|
5
5
|
for data contracts.
|
|
6
|
+
|
|
7
|
+
Submodules:
|
|
8
|
+
- tracking: Time-series metrics collection and analysis
|
|
6
9
|
"""
|
|
7
10
|
|
|
8
11
|
from pycharter.quality.check import QualityCheck
|
|
@@ -16,7 +19,20 @@ from pycharter.quality.models import (
|
|
|
16
19
|
from pycharter.quality.profiling import DataProfiler
|
|
17
20
|
from pycharter.quality.violations import ViolationRecord, ViolationTracker
|
|
18
21
|
|
|
22
|
+
# Tracking submodule exports
|
|
23
|
+
from pycharter.quality import tracking
|
|
24
|
+
from pycharter.quality.tracking import (
|
|
25
|
+
MetricsCollector,
|
|
26
|
+
ValidationMetric,
|
|
27
|
+
MetricsSummary,
|
|
28
|
+
MetricsFilter,
|
|
29
|
+
MetricsStore,
|
|
30
|
+
InMemoryMetricsStore,
|
|
31
|
+
SQLiteMetricsStore,
|
|
32
|
+
)
|
|
33
|
+
|
|
19
34
|
__all__ = [
|
|
35
|
+
# Quality checking
|
|
20
36
|
"QualityCheck",
|
|
21
37
|
"QualityMetrics",
|
|
22
38
|
"QualityScore",
|
|
@@ -27,5 +43,14 @@ __all__ = [
|
|
|
27
43
|
"ViolationTracker",
|
|
28
44
|
"ViolationRecord",
|
|
29
45
|
"DataProfiler",
|
|
46
|
+
# Tracking submodule
|
|
47
|
+
"tracking",
|
|
48
|
+
"MetricsCollector",
|
|
49
|
+
"ValidationMetric",
|
|
50
|
+
"MetricsSummary",
|
|
51
|
+
"MetricsFilter",
|
|
52
|
+
"MetricsStore",
|
|
53
|
+
"InMemoryMetricsStore",
|
|
54
|
+
"SQLiteMetricsStore",
|
|
30
55
|
]
|
|
31
56
|
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quality Tracking - Time-series metrics collection and analysis.
|
|
3
|
+
|
|
4
|
+
This submodule provides tools for tracking validation metrics over time,
|
|
5
|
+
enabling trend analysis and quality monitoring.
|
|
6
|
+
|
|
7
|
+
Primary Interface:
|
|
8
|
+
- MetricsCollector: Collect and query validation metrics
|
|
9
|
+
- ValidationMetric: Single validation run metric
|
|
10
|
+
- MetricsSummary: Aggregated metrics summary
|
|
11
|
+
|
|
12
|
+
Stores:
|
|
13
|
+
- MetricsStore: Protocol for metrics storage backends
|
|
14
|
+
- InMemoryMetricsStore: In-memory store for testing/development
|
|
15
|
+
- SQLiteMetricsStore: SQLite-based persistent storage
|
|
16
|
+
|
|
17
|
+
Exporters:
|
|
18
|
+
- export_json: Export metrics as JSON
|
|
19
|
+
- export_prometheus: Export metrics in Prometheus format
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
>>> from pycharter.quality.tracking import MetricsCollector, InMemoryMetricsStore
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Create collector with in-memory store
|
|
25
|
+
>>> store = InMemoryMetricsStore()
|
|
26
|
+
>>> collector = MetricsCollector(store)
|
|
27
|
+
>>>
|
|
28
|
+
>>> # Record validation results
|
|
29
|
+
>>> result = validator.validate(data)
|
|
30
|
+
>>> collector.record(result, schema_name="users", version="1.0.0")
|
|
31
|
+
>>>
|
|
32
|
+
>>> # Query metrics
|
|
33
|
+
>>> metrics = collector.query(schema_name="users", limit=10)
|
|
34
|
+
>>> summary = collector.get_summary("users")
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from pycharter.quality.tracking.collector import MetricsCollector
|
|
38
|
+
from pycharter.quality.tracking.models import (
|
|
39
|
+
MetricsFilter,
|
|
40
|
+
MetricsSummary,
|
|
41
|
+
ValidationMetric,
|
|
42
|
+
)
|
|
43
|
+
from pycharter.quality.tracking.store import (
|
|
44
|
+
InMemoryMetricsStore,
|
|
45
|
+
MetricsStore,
|
|
46
|
+
SQLiteMetricsStore,
|
|
47
|
+
)
|
|
48
|
+
from pycharter.quality.tracking.exporters import export_json, export_prometheus
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
# Primary interface
|
|
52
|
+
"MetricsCollector",
|
|
53
|
+
# Models
|
|
54
|
+
"ValidationMetric",
|
|
55
|
+
"MetricsSummary",
|
|
56
|
+
"MetricsFilter",
|
|
57
|
+
# Stores
|
|
58
|
+
"MetricsStore",
|
|
59
|
+
"InMemoryMetricsStore",
|
|
60
|
+
"SQLiteMetricsStore",
|
|
61
|
+
# Exporters
|
|
62
|
+
"export_json",
|
|
63
|
+
"export_prometheus",
|
|
64
|
+
]
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quality Tracking Collector - Metrics collection and querying.
|
|
3
|
+
|
|
4
|
+
Provides the main interface for recording validation metrics
|
|
5
|
+
and querying historical data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from pycharter.quality.tracking.models import MetricsFilter, MetricsSummary, ValidationMetric
|
|
14
|
+
from pycharter.quality.tracking.store import MetricsStore, InMemoryMetricsStore
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pycharter.runtime_validator.validator_core import ValidationResult, QualityMetrics
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MetricsCollector:
|
|
21
|
+
"""
|
|
22
|
+
Collect and query validation metrics over time.
|
|
23
|
+
|
|
24
|
+
The MetricsCollector is the primary interface for tracking validation
|
|
25
|
+
quality metrics. It records metrics from validation results and provides
|
|
26
|
+
querying capabilities for analysis.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
>>> from pycharter.quality.tracking import MetricsCollector, InMemoryMetricsStore
|
|
30
|
+
>>> from pycharter.runtime_validator import Validator
|
|
31
|
+
>>>
|
|
32
|
+
>>> # Create collector
|
|
33
|
+
>>> store = InMemoryMetricsStore()
|
|
34
|
+
>>> collector = MetricsCollector(store)
|
|
35
|
+
>>>
|
|
36
|
+
>>> # Record validation results
|
|
37
|
+
>>> validator = Validator(contract_dir="contracts/users")
|
|
38
|
+
>>> result = validator.validate(data)
|
|
39
|
+
>>> collector.record(result, schema_name="users", version="1.0.0")
|
|
40
|
+
>>>
|
|
41
|
+
>>> # Query metrics
|
|
42
|
+
>>> recent = collector.query(schema_name="users", limit=10)
|
|
43
|
+
>>> summary = collector.get_summary("users", window_hours=24)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, store: Optional[MetricsStore] = None):
|
|
47
|
+
"""
|
|
48
|
+
Initialize the metrics collector.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
store: Storage backend for metrics. Defaults to InMemoryMetricsStore.
|
|
52
|
+
"""
|
|
53
|
+
self._store = store or InMemoryMetricsStore()
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def store(self) -> MetricsStore:
|
|
57
|
+
"""Get the underlying store."""
|
|
58
|
+
return self._store
|
|
59
|
+
|
|
60
|
+
def record(
|
|
61
|
+
self,
|
|
62
|
+
result: "ValidationResult",
|
|
63
|
+
schema_name: str,
|
|
64
|
+
version: str,
|
|
65
|
+
duration_ms: float = 0.0,
|
|
66
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
67
|
+
) -> ValidationMetric:
|
|
68
|
+
"""
|
|
69
|
+
Record metrics from a validation result.
|
|
70
|
+
|
|
71
|
+
Extracts metrics from the ValidationResult and stores them
|
|
72
|
+
for later analysis.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
result: ValidationResult from a validation operation
|
|
76
|
+
schema_name: Name of the schema validated against
|
|
77
|
+
version: Version of the schema
|
|
78
|
+
duration_ms: Validation duration in milliseconds
|
|
79
|
+
metadata: Additional custom metadata to store
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The recorded ValidationMetric
|
|
83
|
+
"""
|
|
84
|
+
# Extract quality metrics if available
|
|
85
|
+
quality = result.quality
|
|
86
|
+
if quality:
|
|
87
|
+
completeness = quality.completeness
|
|
88
|
+
field_completeness = quality.field_completeness
|
|
89
|
+
record_count = quality.record_count
|
|
90
|
+
valid_count = quality.valid_count
|
|
91
|
+
error_count = quality.error_count
|
|
92
|
+
validity_rate = quality.validity_rate
|
|
93
|
+
else:
|
|
94
|
+
# Compute basic metrics from result
|
|
95
|
+
completeness = 1.0
|
|
96
|
+
field_completeness = {}
|
|
97
|
+
record_count = 1
|
|
98
|
+
valid_count = 1 if result.is_valid else 0
|
|
99
|
+
error_count = 0 if result.is_valid else 1
|
|
100
|
+
validity_rate = 1.0 if result.is_valid else 0.0
|
|
101
|
+
|
|
102
|
+
# Extract error types from error messages
|
|
103
|
+
errors_by_type = self._categorize_errors(result.errors)
|
|
104
|
+
|
|
105
|
+
metric = ValidationMetric(
|
|
106
|
+
schema_name=schema_name,
|
|
107
|
+
version=version,
|
|
108
|
+
timestamp=datetime.utcnow(),
|
|
109
|
+
record_count=record_count,
|
|
110
|
+
valid_count=valid_count,
|
|
111
|
+
error_count=error_count,
|
|
112
|
+
validity_rate=validity_rate,
|
|
113
|
+
completeness=completeness,
|
|
114
|
+
field_completeness=field_completeness,
|
|
115
|
+
duration_ms=duration_ms,
|
|
116
|
+
errors_by_type=errors_by_type,
|
|
117
|
+
metadata=metadata or {},
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
self._store.store(metric)
|
|
121
|
+
return metric
|
|
122
|
+
|
|
123
|
+
def record_batch(
|
|
124
|
+
self,
|
|
125
|
+
results: List["ValidationResult"],
|
|
126
|
+
schema_name: str,
|
|
127
|
+
version: str,
|
|
128
|
+
duration_ms: float = 0.0,
|
|
129
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
130
|
+
) -> ValidationMetric:
|
|
131
|
+
"""
|
|
132
|
+
Record aggregated metrics from a batch of validation results.
|
|
133
|
+
|
|
134
|
+
Combines metrics from multiple validation results into a single
|
|
135
|
+
metric record representing the batch.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
results: List of ValidationResults from batch validation
|
|
139
|
+
schema_name: Name of the schema validated against
|
|
140
|
+
version: Version of the schema
|
|
141
|
+
duration_ms: Total validation duration in milliseconds
|
|
142
|
+
metadata: Additional custom metadata to store
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The recorded ValidationMetric for the batch
|
|
146
|
+
"""
|
|
147
|
+
if not results:
|
|
148
|
+
return self.record(
|
|
149
|
+
_EmptyResult(), # type: ignore
|
|
150
|
+
schema_name,
|
|
151
|
+
version,
|
|
152
|
+
duration_ms,
|
|
153
|
+
metadata,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
record_count = len(results)
|
|
157
|
+
valid_count = sum(1 for r in results if r.is_valid)
|
|
158
|
+
error_count = record_count - valid_count
|
|
159
|
+
validity_rate = valid_count / record_count if record_count > 0 else 1.0
|
|
160
|
+
|
|
161
|
+
# Aggregate errors
|
|
162
|
+
all_errors = []
|
|
163
|
+
for r in results:
|
|
164
|
+
all_errors.extend(r.errors)
|
|
165
|
+
errors_by_type = self._categorize_errors(all_errors)
|
|
166
|
+
|
|
167
|
+
# Aggregate quality metrics if available
|
|
168
|
+
completeness_values = []
|
|
169
|
+
field_completeness_agg: Dict[str, List[float]] = defaultdict(list)
|
|
170
|
+
|
|
171
|
+
for r in results:
|
|
172
|
+
if r.quality:
|
|
173
|
+
completeness_values.append(r.quality.completeness)
|
|
174
|
+
for field, comp in r.quality.field_completeness.items():
|
|
175
|
+
field_completeness_agg[field].append(comp)
|
|
176
|
+
|
|
177
|
+
completeness = (
|
|
178
|
+
sum(completeness_values) / len(completeness_values)
|
|
179
|
+
if completeness_values
|
|
180
|
+
else 1.0
|
|
181
|
+
)
|
|
182
|
+
field_completeness = {
|
|
183
|
+
field: sum(values) / len(values)
|
|
184
|
+
for field, values in field_completeness_agg.items()
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
metric = ValidationMetric(
|
|
188
|
+
schema_name=schema_name,
|
|
189
|
+
version=version,
|
|
190
|
+
timestamp=datetime.utcnow(),
|
|
191
|
+
record_count=record_count,
|
|
192
|
+
valid_count=valid_count,
|
|
193
|
+
error_count=error_count,
|
|
194
|
+
validity_rate=validity_rate,
|
|
195
|
+
completeness=completeness,
|
|
196
|
+
field_completeness=field_completeness,
|
|
197
|
+
duration_ms=duration_ms,
|
|
198
|
+
errors_by_type=errors_by_type,
|
|
199
|
+
metadata=metadata or {},
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
self._store.store(metric)
|
|
203
|
+
return metric
|
|
204
|
+
|
|
205
|
+
def query(
|
|
206
|
+
self,
|
|
207
|
+
schema_name: Optional[str] = None,
|
|
208
|
+
version: Optional[str] = None,
|
|
209
|
+
since: Optional[datetime] = None,
|
|
210
|
+
until: Optional[datetime] = None,
|
|
211
|
+
min_validity_rate: Optional[float] = None,
|
|
212
|
+
limit: int = 100,
|
|
213
|
+
offset: int = 0,
|
|
214
|
+
) -> List[ValidationMetric]:
|
|
215
|
+
"""
|
|
216
|
+
Query stored metrics with filters.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
schema_name: Filter by schema name
|
|
220
|
+
version: Filter by schema version
|
|
221
|
+
since: Filter metrics after this time
|
|
222
|
+
until: Filter metrics before this time
|
|
223
|
+
min_validity_rate: Filter by minimum validity rate
|
|
224
|
+
limit: Maximum number of results
|
|
225
|
+
offset: Offset for pagination
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of matching ValidationMetrics, ordered by timestamp descending
|
|
229
|
+
"""
|
|
230
|
+
filters = MetricsFilter(
|
|
231
|
+
schema_name=schema_name,
|
|
232
|
+
version=version,
|
|
233
|
+
since=since,
|
|
234
|
+
until=until,
|
|
235
|
+
min_validity_rate=min_validity_rate,
|
|
236
|
+
limit=limit,
|
|
237
|
+
offset=offset,
|
|
238
|
+
)
|
|
239
|
+
return self._store.query(filters)
|
|
240
|
+
|
|
241
|
+
def get_summary(self, schema_name: str, window_hours: int = 24) -> MetricsSummary:
|
|
242
|
+
"""
|
|
243
|
+
Get aggregated summary for a schema within a time window.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
schema_name: Name of the schema to summarize
|
|
247
|
+
window_hours: Number of hours to look back
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
MetricsSummary with aggregated statistics
|
|
251
|
+
"""
|
|
252
|
+
return self._store.get_summary(schema_name, window_hours)
|
|
253
|
+
|
|
254
|
+
def get_all_schemas(self) -> List[str]:
|
|
255
|
+
"""
|
|
256
|
+
Get list of all schema names with recorded metrics.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of unique schema names
|
|
260
|
+
"""
|
|
261
|
+
# Query all metrics to get unique schemas
|
|
262
|
+
metrics = self._store.query(MetricsFilter(limit=10000))
|
|
263
|
+
return list(set(m.schema_name for m in metrics))
|
|
264
|
+
|
|
265
|
+
def _categorize_errors(self, errors: List[str]) -> Dict[str, int]:
|
|
266
|
+
"""
|
|
267
|
+
Categorize errors by type.
|
|
268
|
+
|
|
269
|
+
Extracts error types from error messages for aggregation.
|
|
270
|
+
"""
|
|
271
|
+
error_counts: Dict[str, int] = defaultdict(int)
|
|
272
|
+
|
|
273
|
+
for error in errors:
|
|
274
|
+
error_type = self._extract_error_type(error)
|
|
275
|
+
error_counts[error_type] += 1
|
|
276
|
+
|
|
277
|
+
return dict(error_counts)
|
|
278
|
+
|
|
279
|
+
def _extract_error_type(self, error: str) -> str:
|
|
280
|
+
"""
|
|
281
|
+
Extract error type from error message.
|
|
282
|
+
|
|
283
|
+
Attempts to identify the error category from the message.
|
|
284
|
+
"""
|
|
285
|
+
error_lower = error.lower()
|
|
286
|
+
|
|
287
|
+
# Common error patterns
|
|
288
|
+
if "required" in error_lower or "missing" in error_lower:
|
|
289
|
+
return "missing_required"
|
|
290
|
+
if "type" in error_lower:
|
|
291
|
+
return "type_error"
|
|
292
|
+
if "pattern" in error_lower or "regex" in error_lower:
|
|
293
|
+
return "pattern_mismatch"
|
|
294
|
+
if "min" in error_lower or "max" in error_lower or "range" in error_lower:
|
|
295
|
+
return "range_error"
|
|
296
|
+
if "enum" in error_lower or "allowed" in error_lower:
|
|
297
|
+
return "enum_error"
|
|
298
|
+
if "format" in error_lower:
|
|
299
|
+
return "format_error"
|
|
300
|
+
if "null" in error_lower or "none" in error_lower:
|
|
301
|
+
return "null_error"
|
|
302
|
+
if "unique" in error_lower or "duplicate" in error_lower:
|
|
303
|
+
return "uniqueness_error"
|
|
304
|
+
|
|
305
|
+
# Extract field name if present (e.g., "('field_name',): error message")
|
|
306
|
+
field_match = re.search(r"\('?(\w+)'?,?\)", error)
|
|
307
|
+
if field_match:
|
|
308
|
+
return f"validation_error_{field_match.group(1)}"
|
|
309
|
+
|
|
310
|
+
return "validation_error"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class _EmptyResult:
|
|
314
|
+
"""Placeholder for empty batch results."""
|
|
315
|
+
|
|
316
|
+
is_valid = True
|
|
317
|
+
errors: List[str] = []
|
|
318
|
+
quality = None
|