PyPI - adri - Versions diffs - 0.1.0__tar.gz - Mend

adri 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

adri-0.1.0/LICENSE +21 -0
adri-0.1.0/MANIFEST.in +6 -0
adri-0.1.0/PKG-INFO +95 -0
adri-0.1.0/README.md +55 -0
adri-0.1.0/adri/__init__.py +21 -0
adri-0.1.0/adri/assessor.py +188 -0
adri-0.1.0/adri/cli.py +179 -0
adri-0.1.0/adri/connectors/__init__.py +179 -0
adri-0.1.0/adri/connectors/base.py +245 -0
adri-0.1.0/adri/connectors/file.py +359 -0
adri-0.1.0/adri/dimensions/__init__.py +203 -0
adri-0.1.0/adri/dimensions/completeness.py +148 -0
adri-0.1.0/adri/dimensions/consistency.py +163 -0
adri-0.1.0/adri/dimensions/freshness.py +179 -0
adri-0.1.0/adri/dimensions/validity.py +152 -0
adri-0.1.0/adri/report.py +273 -0
adri-0.1.0/adri/templates/report_template.html +206 -0
adri-0.1.0/adri/utils/validators.py +40 -0
adri-0.1.0/adri.egg-info/PKG-INFO +95 -0
adri-0.1.0/adri.egg-info/SOURCES.txt +24 -0
adri-0.1.0/adri.egg-info/dependency_links.txt +1 -0
adri-0.1.0/adri.egg-info/entry_points.txt +2 -0
adri-0.1.0/adri.egg-info/requires.txt +22 -0
adri-0.1.0/adri.egg-info/top_level.txt +1 -0
adri-0.1.0/pyproject.toml +79 -0
adri-0.1.0/setup.cfg +4 -0

adri-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Think Evolve Solve
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

adri-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,6 @@
+<![CDATA[
+include LICENSE
+include README.md
+include pyproject.toml
+recursive-include adri/templates *.html
+]]>

adri-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,95 @@
+Metadata-Version: 2.4
+Name: adri
+Version: 0.1.0
+Summary: Agent Data Readiness Index - A framework for evaluating data quality for agentic AI systems
+Author-email: Verodat <info@verodat.ai>
+License: MIT
+Project-URL: Homepage, https://github.com/verodat/agent-data-readiness-index
+Project-URL: Bug Tracker, https://github.com/verodat/agent-data-readiness-index/issues
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pandas>=1.0.0
+Requires-Dist: matplotlib>=3.3.0
+Requires-Dist: jinja2>=3.0.0
+Requires-Dist: pyyaml>=6.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=2.12.0; extra == "dev"
+Requires-Dist: black>=22.0.0; extra == "dev"
+Requires-Dist: isort>=5.10.0; extra == "dev"
+Requires-Dist: flake8>=4.0.0; extra == "dev"
+Requires-Dist: mypy>=0.9.0; extra == "dev"
+Provides-Extra: database
+Requires-Dist: sqlalchemy>=1.4.0; extra == "database"
+Requires-Dist: psycopg2-binary>=2.9.0; extra == "database"
+Provides-Extra: api
+Requires-Dist: requests>=2.25.0; extra == "api"
+Provides-Extra: full
+Requires-Dist: adri[api,database,dev]; extra == "full"
+Dynamic: license-file
+<![CDATA[
+# Agent Data Readiness Index (ADRI)
+ADRI is the industry’s first open standard for evaluating data quality for agentic AI systems. It provides a comprehensive, five-dimensional assessment of data sources by measuring Validity, Completeness, Freshness, Consistency, and Plausibility.
+## Installation
+Install ADRI from PyPI:
+```bash
+pip install adri
+```
+> Note: If "adri" is already taken on PyPI, consider using an alternative package name (e.g., `agent-data-readiness-index`).
+## Quick Start
+Run an assessment on your data source with:
+```bash
+adri assess --source your_data.csv --output report
+```
+Then, view the generated report with:
+```bash
+adri report view report.json
+```
+## Features
+- **Five-Dimensional Assessment:** Evaluates data sources across Validity, Completeness, Freshness, Consistency, and Plausibility.
+- **Agent-Centric Evaluation:** Clearly communicates data quality attributes to AI agents.
+- **Benchmarking:** Compare your scores against industry assessments.
+- **Rich Reporting:** Generates outputs in both JSON and HTML formats.
+- **Extensibility:** Designed to integrate with multiple data sources and environments.
+## Documentation
+For detailed information, please refer to our [GitHub Wiki](https://github.com/verodat/agent-data-readiness-index/wiki) and additional documentation in the repository.
+## Contributing
+We welcome contributions! To contribute:
+1. Fork the repository on GitHub.
+2. Run your assessments using ADRI.
+3. Submit your HTML report to the `docs/reports/` directory.
+4. Update the repository index and create a pull request.
+For more details, see the [Contributing Guide](https://github.com/verodat/agent-data-readiness-index#contributing-to-the-benchmark).
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
+]]>

adri-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,55 @@
+<![CDATA[
+# Agent Data Readiness Index (ADRI)
+ADRI is the industry’s first open standard for evaluating data quality for agentic AI systems. It provides a comprehensive, five-dimensional assessment of data sources by measuring Validity, Completeness, Freshness, Consistency, and Plausibility.
+## Installation
+Install ADRI from PyPI:
+```bash
+pip install adri
+```
+> Note: If "adri" is already taken on PyPI, consider using an alternative package name (e.g., `agent-data-readiness-index`).
+## Quick Start
+Run an assessment on your data source with:
+```bash
+adri assess --source your_data.csv --output report
+```
+Then, view the generated report with:
+```bash
+adri report view report.json
+```
+## Features
+- **Five-Dimensional Assessment:** Evaluates data sources across Validity, Completeness, Freshness, Consistency, and Plausibility.
+- **Agent-Centric Evaluation:** Clearly communicates data quality attributes to AI agents.
+- **Benchmarking:** Compare your scores against industry assessments.
+- **Rich Reporting:** Generates outputs in both JSON and HTML formats.
+- **Extensibility:** Designed to integrate with multiple data sources and environments.
+## Documentation
+For detailed information, please refer to our [GitHub Wiki](https://github.com/verodat/agent-data-readiness-index/wiki) and additional documentation in the repository.
+## Contributing
+We welcome contributions! To contribute:
+1. Fork the repository on GitHub.
+2. Run your assessments using ADRI.
+3. Submit your HTML report to the `docs/reports/` directory.
+4. Update the repository index and create a pull request.
+For more details, see the [Contributing Guide](https://github.com/verodat/agent-data-readiness-index#contributing-to-the-benchmark).
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
+]]>

adri-0.1.0/adri/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Agent Data Readiness Index (ADRI)
+A framework for evaluating how well data sources communicate their quality to AI agents.
+"""
+import logging
+from .assessor import DataSourceAssessor
+from .report import AssessmentReport
+__version__ = "0.1.0"
+__author__ = "Verodat"
+# Set up a null handler to avoid "No handler found" warnings
+logging.getLogger(__name__).addHandler(logging.NullHandler())
+__all__ = [
+    "DataSourceAssessor",
+    "AssessmentReport",
+]

adri-0.1.0/adri/assessor.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""
+Core assessment logic for the Agent Data Readiness Index.
+This module provides the main DataSourceAssessor class that coordinates
+the assessment of data sources across all dimensions.
+"""
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Union, Any
+from .dimensions import (
+    ValidityAssessor,
+    CompletenessAssessor,
+    FreshnessAssessor,
+    ConsistencyAssessor,
+    PlausibilityAssessor,
+)
+from .connectors import (
+    BaseConnector,
+    FileConnector,
+    DatabaseConnector,
+    APIConnector,
+)
+from .report import AssessmentReport
+from .utils.validators import validate_config
+logger = logging.getLogger(__name__)
+class DataSourceAssessor:
+    """
+    Main assessor class for evaluating data sources against the
+    Agent Data Readiness Index criteria.
+    """
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """
+        Initialize the assessor with optional custom configuration.
+        Args:
+            config: Optional configuration dictionary that can customize
+                   dimension weights, thresholds, etc.
+        """
+        self.config = config or {}
+        validate_config(self.config)
+        # Initialize dimension assessors
+        self.dimensions = {
+            "validity": ValidityAssessor(self.config.get("validity", {})),
+            "completeness": CompletenessAssessor(self.config.get("completeness", {})),
+            "freshness": FreshnessAssessor(self.config.get("freshness", {})),
+            "consistency": ConsistencyAssessor(self.config.get("consistency", {})),
+            "plausibility": PlausibilityAssessor(self.config.get("plausibility", {})),
+        }
+    def assess_file(
+        self, file_path: Union[str, Path], file_type: Optional[str] = None
+    ) -> AssessmentReport:
+        """
+        Assess a file-based data source.
+        Args:
+            file_path: Path to the file to assess
+            file_type: Optional file type override (csv, json, etc.)
+        Returns:
+            AssessmentReport: The assessment results
+        """
+        connector = FileConnector(file_path, file_type)
+        return self.assess_source(connector)
+    def assess_database(
+        self, connection_string: str, table_name: str
+    ) -> AssessmentReport:
+        """
+        Assess a database table.
+        Args:
+            connection_string: Database connection string
+            table_name: Name of the table to assess
+        Returns:
+            AssessmentReport: The assessment results
+        """
+        connector = DatabaseConnector(connection_string, table_name)
+        return self.assess_source(connector)
+    def assess_api(self, endpoint: str, auth: Optional[Dict[str, Any]] = None) -> AssessmentReport:
+        """
+        Assess an API endpoint.
+        Args:
+            endpoint: API endpoint URL
+            auth: Optional authentication details
+        Returns:
+            AssessmentReport: The assessment results
+        """
+        connector = APIConnector(endpoint, auth)
+        return self.assess_source(connector)
+    def assess_source(self, connector: BaseConnector) -> AssessmentReport:
+        """
+        Assess any data source using a connector.
+        Args:
+            connector: Data source connector instance
+        Returns:
+            AssessmentReport: The assessment results
+        """
+        logger.info(f"Starting assessment of {connector}")
+        # Initialize report
+        report = AssessmentReport(
+            source_name=connector.get_name(),
+            source_type=connector.get_type(),
+            source_metadata=connector.get_metadata(),
+        )
+        # Assess each dimension
+        dimension_results = {}
+        for dim_name, assessor in self.dimensions.items():
+            logger.debug(f"Assessing {dim_name} dimension")
+            score, findings, recommendations = assessor.assess(connector)
+            dimension_results[dim_name] = {
+                "score": score,
+                "findings": findings,
+                "recommendations": recommendations,
+            }
+            logger.debug(f"{dim_name} score: {score}")
+        # Calculate overall score and populate report
+        report.populate_from_dimension_results(dimension_results)
+        logger.info(f"Assessment complete. Overall score: {report.overall_score}")
+        return report
+    def assess_from_config(self, config_path: Union[str, Path]) -> Dict[str, AssessmentReport]:
+        """
+        Assess multiple data sources specified in a configuration file.
+        Args:
+            config_path: Path to the configuration file
+        Returns:
+            Dict[str, AssessmentReport]: Dictionary of assessment reports
+                                        keyed by source name
+        """
+        import yaml
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+        reports = {}
+        for source_config in config.get('sources', []):
+            source_name = source_config.get('name', 'Unknown')
+            source_type = source_config.get('type')
+            logger.info(f"Assessing {source_name} ({source_type})")
+            try:
+                if source_type == 'file':
+                    report = self.assess_file(
+                        source_config['path'],
+                        source_config.get('file_type')
+                    )
+                elif source_type == 'database':
+                    report = self.assess_database(
+                        source_config['connection'],
+                        source_config['table']
+                    )
+                elif source_type == 'api':
+                    report = self.assess_api(
+                        source_config['endpoint'],
+                        source_config.get('auth')
+                    )
+                else:
+                    logger.error(f"Unknown source type: {source_type}")
+                    continue
+                reports[source_name] = report
+            except Exception as e:
+                logger.error(f"Error assessing {source_name}: {e}")
+        return reports

adri-0.1.0/adri/cli.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""
+Command-line interface for the Agent Data Readiness Index.
+This module provides a command-line interface for running ADRI assessments
+and generating reports.
+"""
+import argparse
+import logging
+import sys
+from pathlib import Path
+from typing import List, Optional
+from .assessor import DataSourceAssessor
+from .report import AssessmentReport
+def setup_logging(verbose: bool = False):
+    """Set up logging with appropriate level based on verbosity."""
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        handlers=[logging.StreamHandler()],
+    )
+def parse_args(args: Optional[List[str]] = None):
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Agent Data Readiness Index - Evaluate data sources for agent readiness"
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    # assess command
+    assess_parser = subparsers.add_parser("assess", help="Assess a data source")
+    source_group = assess_parser.add_mutually_exclusive_group(required=True)
+    source_group.add_argument("--source", help="Path to data source or connection string")
+    source_group.add_argument("--config", help="Path to configuration file for multiple sources")
+    assess_parser.add_argument("--output", required=True, help="Output path for the report")
+    assess_parser.add_argument(
+        "--format",
+        choices=["json", "html", "both"],
+        default="both",
+        help="Output format(s) for the report"
+    )
+    assess_parser.add_argument(
+        "--source-type",
+        choices=["file", "database", "api"],
+        help="Type of the data source (auto-detected if not specified)"
+    )
+    assess_parser.add_argument("--table", help="Table name for database sources")
+    assess_parser.add_argument("--custom-config", help="Path to custom assessment configuration")
+    # report command
+    report_parser = subparsers.add_parser("report", help="Work with assessment reports")
+    report_subparsers = report_parser.add_subparsers(dest="report_command", help="Report command")
+    # report view command
+    view_parser = report_subparsers.add_parser("view", help="View an assessment report")
+    view_parser.add_argument("report_path", help="Path to the report file")
+    # Common arguments
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
+    return parser.parse_args(args)
+def run_assessment(args):
+    """Run an assessment based on command-line arguments."""
+    assessor = DataSourceAssessor()
+    if args.config:
+        # Assess multiple sources from config file
+        reports = assessor.assess_from_config(args.config)
+        # Save all reports
+        for source_name, report in reports.items():
+            base_path = Path(args.output)
+            output_dir = base_path if base_path.is_dir() else base_path.parent
+            file_prefix = f"{source_name.replace(' ', '_').lower()}_"
+            if args.format in ("json", "both"):
+                report.save_json(output_dir / f"{file_prefix}report.json")
+            if args.format in ("html", "both"):
+                report.save_html(output_dir / f"{file_prefix}report.html")
+            # Print summary to console
+            report.print_summary()
+    else:
+        # Assess a single source
+        source = args.source
+        if args.source_type == "file" or (not args.source_type and Path(source).is_file()):
+            report = assessor.assess_file(source)
+        elif args.source_type == "database" or (not args.source_type and "://" in source):
+            if not args.table:
+                raise ValueError("Table name is required for database sources")
+            report = assessor.assess_database(source, args.table)
+        elif args.source_type == "api":
+            report = assessor.assess_api(source)
+        else:
+            raise ValueError(f"Could not determine source type for: {source}")
+        # Save the report
+        if args.format in ("json", "both"):
+            report.save_json(f"{args.output}.json" if not args.output.endswith(".json") else args.output)
+        if args.format in ("html", "both"):
+            report.save_html(f"{args.output}.html" if not args.output.endswith(".html") else args.output)
+        # Print summary to console
+        report.print_summary()
+def view_report(args):
+    """View a report."""
+    report = AssessmentReport.load_json(args.report_path)
+    report.print_summary()
+def submit_benchmark(args):
+    """Submit a report to the benchmark."""
+    from datetime import datetime
+    import uuid
+    report = AssessmentReport.load_json(args.report_path)
+    # This would typically upload to a benchmark service
+    # For GitHub-based solution, we'll save to the benchmark directory
+    benchmark_dir = Path(__file__).parent.parent / "benchmark" / "data"
+    benchmark_dir.mkdir(parents=True, exist_ok=True)
+    # Anonymize if requested
+    if hasattr(args, "anonymize") and args.anonymize:
+        # Keep only necessary data for benchmarking
+        report.source_name = f"Anonymous {args.industry} Source"
+        report.source_metadata = {
+            "industry": args.industry,
+            "anonymized": True,
+            "submission_date": datetime.now().isoformat(),
+        }
+    # Generate a unique ID for the submission
+    benchmark_id = str(uuid.uuid4())[:8]
+    benchmark_file = benchmark_dir / f"{args.industry.lower().replace(' ', '_')}_{benchmark_id}.json"
+    report.save_json(benchmark_file)
+    print(f"Report submitted to benchmark as {benchmark_file.name}")
+    print("The benchmark will be updated automatically within 24 hours.")
+    print("You can view the updated benchmark at https://username.github.io/agent-data-readiness-index/")
+def main(args=None):
+    """Main entry point for the CLI."""
+    parsed_args = parse_args(args)
+    setup_logging(parsed_args.verbose)
+    try:
+        if parsed_args.command == "assess":
+            run_assessment(parsed_args)
+        elif parsed_args.command == "report":
+            if parsed_args.report_command == "view":
+                view_report(parsed_args)
+        else:
+            print("No command specified. Use --help for usage information.")
+            return 1
+    except Exception as e:
+        logging.error(f"Error: {e}")
+        if parsed_args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 1
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())