PyPI - modelaudit - Versions diffs - 0.1.0__tar.gz - Mend

modelaudit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

modelaudit-0.1.0/PKG-INFO +128 -0
modelaudit-0.1.0/README.md +92 -0
modelaudit-0.1.0/modelaudit/__init__.py +1 -0
modelaudit-0.1.0/modelaudit/cli.py +284 -0
modelaudit-0.1.0/modelaudit/core.py +274 -0
modelaudit-0.1.0/modelaudit/name_policies/__init__.py +0 -0
modelaudit-0.1.0/modelaudit/name_policies/blacklist.py +26 -0
modelaudit-0.1.0/modelaudit/scanners/__init__.py +43 -0
modelaudit-0.1.0/modelaudit/scanners/base.py +215 -0
modelaudit-0.1.0/modelaudit/scanners/keras_h5_scanner.py +232 -0
modelaudit-0.1.0/modelaudit/scanners/manifest_scanner.py +241 -0
modelaudit-0.1.0/modelaudit/scanners/pickle_scanner.py +348 -0
modelaudit-0.1.0/modelaudit/scanners/pytorch_zip_scanner.py +129 -0
modelaudit-0.1.0/modelaudit/scanners/tf_savedmodel_scanner.py +189 -0
modelaudit-0.1.0/modelaudit/utils/__init__.py +0 -0
modelaudit-0.1.0/modelaudit/utils/filetype.py +75 -0
modelaudit-0.1.0/pyproject.toml +68 -0

modelaudit-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,128 @@
+Metadata-Version: 2.1
+Name: modelaudit
+Version: 0.1.0
+Summary: Model file scanning library for detecting malicious code in ML model files
+Home-page: https://github.com/promptfoo/modelaudit
+License: MIT
+Keywords: ai,ml,security,model-scanning,pickle,tensorflow,pytorch
+Author: Ian Webster
+Author-email: ian@promptfoo.dev
+Requires-Python: >=3.9,<4.0
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Security
+Provides-Extra: all
+Provides-Extra: h5
+Provides-Extra: pytorch
+Provides-Extra: tensorflow
+Provides-Extra: yaml
+Requires-Dist: click (>=8.1.3,<9.0.0)
+Requires-Dist: h5py (>=3.1) ; extra == "h5" or extra == "all"
+Requires-Dist: pyyaml (>=6.0,<7.0) ; extra == "yaml" or extra == "all"
+Requires-Dist: tensorflow (>=2.6) ; extra == "tensorflow" or extra == "all"
+Requires-Dist: torch (>=1.6) ; extra == "pytorch" or extra == "all"
+Requires-Dist: yaspin (>=2.3.0,<3.0.0)
+Project-URL: Repository, https://github.com/promptfoo/modelaudit
+Description-Content-Type: text/markdown
+# ModelAudit
+A security scanner for machine learning models. Quickly check your AIML models for potential security risks before deployment.
+## 🔍 What It Does
+ModelAudit scans ML model files for:
+- Malicious code (e.g., `os.system` calls in pickled models)
+- Suspicious TensorFlow operations
+- Potentially unsafe Keras Lambda layers
+- Models with blacklisted names
+- Dangerous pickle opcodes and serialization patterns
+- Suspicious string patterns that might indicate encoded payloads
+- Risky configurations in model architectures
+- Suspicious patterns in model manifests and configuration files
+## 🚀 Quick Start
+### Installation
+```bash
+# Using pip
+pip install modelaudit
+# Or with optional dependencies for specific model formats
+pip install modelaudit[tensorflow,h5,pytorch]
+# For YAML manifest scanning support
+pip install modelaudit[yaml]
+# For all dependencies
+pip install modelaudit[all]
+```
+### Basic Usage
+```bash
+# Scan one or more models or directories
+modelaudit scan model.pkl model2.h5 models_directory
+# Export results to JSON
+modelaudit scan model.pkl --format json --output results.json
+# Set maximum file size to scan
+modelaudit scan model.pkl --max-file-size 1073741824  # 1GB limit
+# Add custom blacklist patterns
+modelaudit scan model.pkl --blacklist "unsafe_model" --blacklist "malicious_net"
+```
+## ✨ Features
+- **Multiple Format Support**: Scans PyTorch, TensorFlow, Keras, and pickle models
+- **Automatic Format Detection**: Identifies model formats automatically
+- **Comprehensive Scanning**: Checks for various security issues with severity levels
+- **Batch Processing**: Scan multiple files and directories at once
+- **Configurable Timeouts**: Set scan timeouts for large models
+- **Detailed Reporting**: Get information about scan duration, files scanned, and bytes processed
+- **Structured Output**: Export results as JSON for integration with other tools
+- **Name Blacklisting**: Block models with names matching suspicious patterns
+- **Manifest Scanning**: Detect suspicious patterns in model configuration files
+## 🛡️ Scanners
+ModelAudit includes specialized scanners for different model formats:
+- **Pickle Scanner**: Detects malicious code and encoded payloads in pickle files
+- **TensorFlow Scanner**: Identifies suspicious operations in SavedModel format
+- **Keras Scanner**: Checks for unsafe Lambda layers and risky configurations
+- **PyTorch Scanner**: Examines PyTorch models for security issues
+- **Manifest Scanner**: Analyzes model manifests and configuration files for suspicious patterns and blacklisted names
+## 🛠️ Development
+### Using Poetry
+```bash
+# Clone the repository
+git clone https://github.com/promptfoo/modelaudit.git
+cd modelaudit
+# Install dependencies
+poetry install
+# Install with extras
+poetry install --extras "all"
+```
+## 📝 License
+This project is licensed under the MIT License.

modelaudit-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,92 @@
+# ModelAudit
+A security scanner for machine learning models. Quickly check your AIML models for potential security risks before deployment.
+## 🔍 What It Does
+ModelAudit scans ML model files for:
+- Malicious code (e.g., `os.system` calls in pickled models)
+- Suspicious TensorFlow operations
+- Potentially unsafe Keras Lambda layers
+- Models with blacklisted names
+- Dangerous pickle opcodes and serialization patterns
+- Suspicious string patterns that might indicate encoded payloads
+- Risky configurations in model architectures
+- Suspicious patterns in model manifests and configuration files
+## 🚀 Quick Start
+### Installation
+```bash
+# Using pip
+pip install modelaudit
+# Or with optional dependencies for specific model formats
+pip install modelaudit[tensorflow,h5,pytorch]
+# For YAML manifest scanning support
+pip install modelaudit[yaml]
+# For all dependencies
+pip install modelaudit[all]
+```
+### Basic Usage
+```bash
+# Scan one or more models or directories
+modelaudit scan model.pkl model2.h5 models_directory
+# Export results to JSON
+modelaudit scan model.pkl --format json --output results.json
+# Set maximum file size to scan
+modelaudit scan model.pkl --max-file-size 1073741824  # 1GB limit
+# Add custom blacklist patterns
+modelaudit scan model.pkl --blacklist "unsafe_model" --blacklist "malicious_net"
+```
+## ✨ Features
+- **Multiple Format Support**: Scans PyTorch, TensorFlow, Keras, and pickle models
+- **Automatic Format Detection**: Identifies model formats automatically
+- **Comprehensive Scanning**: Checks for various security issues with severity levels
+- **Batch Processing**: Scan multiple files and directories at once
+- **Configurable Timeouts**: Set scan timeouts for large models
+- **Detailed Reporting**: Get information about scan duration, files scanned, and bytes processed
+- **Structured Output**: Export results as JSON for integration with other tools
+- **Name Blacklisting**: Block models with names matching suspicious patterns
+- **Manifest Scanning**: Detect suspicious patterns in model configuration files
+## 🛡️ Scanners
+ModelAudit includes specialized scanners for different model formats:
+- **Pickle Scanner**: Detects malicious code and encoded payloads in pickle files
+- **TensorFlow Scanner**: Identifies suspicious operations in SavedModel format
+- **Keras Scanner**: Checks for unsafe Lambda layers and risky configurations
+- **PyTorch Scanner**: Examines PyTorch models for security issues
+- **Manifest Scanner**: Analyzes model manifests and configuration files for suspicious patterns and blacklisted names
+## 🛠️ Development
+### Using Poetry
+```bash
+# Clone the repository
+git clone https://github.com/promptfoo/modelaudit.git
+cd modelaudit
+# Install dependencies
+poetry install
+# Install with extras
+poetry install --extras "all"
+```
+## 📝 License
+This project is licensed under the MIT License.

modelaudit-0.1.0/modelaudit/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

modelaudit-0.1.0/modelaudit/cli.py ADDED Viewed

@@ -0,0 +1,284 @@
+import click
+import sys
+import os
+import json
+import logging
+import time
+from yaspin import yaspin
+from yaspin.spinners import Spinners
+from .core import scan_model_directory_or_file
+from .scanners import ScanResult, IssueSeverity
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("modelaudit")
+@click.group()
+def cli():
+    """My Model Scanner CLI."""
+    pass
+@cli.command("scan")
+@click.argument("paths", nargs=-1, type=click.Path(exists=True), required=True)
+@click.option("--blacklist", "-b", multiple=True, help="Additional blacklist patterns to check against model names")
+@click.option("--format", "-f", type=click.Choice(["text", "json"]), default="text", help="Output format [default: text]")
+@click.option("--output", "-o", type=click.Path(), help="Output file path (prints to stdout if not specified)")
+@click.option("--timeout", "-t", type=int, default=300, help="Scan timeout in seconds [default: 300]")
+@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
+@click.option("--max-file-size", type=int, default=0,
+              help="Maximum file size to scan in bytes [default: unlimited]")
+def scan_command(paths, blacklist, format, output, timeout, verbose, max_file_size):
+    """
+    Scan one or more model files or directories for malicious content or suspicious references.
+    Usage: modelaudit scan /path/to/model1 /path/to/model2 ...
+    You can specify additional blacklist patterns with --blacklist or -b option:
+    modelaudit scan /path/to/model1 /path/to/model2 -b llama -b alpaca
+    Advanced options:
+      --format, -f       Output format (text or json)
+      --output, -o       Write results to a file instead of stdout
+      --timeout, -t      Set scan timeout in seconds
+      --verbose, -v      Show detailed information during scanning
+      --max-file-size    Maximum file size to scan in bytes
+    """
+    # Print a nice header if not in JSON mode and not writing to a file
+    if format == "text" and not output:
+        header = [
+            "─" * 80,
+            click.style("ModelAudit Security Scanner", fg="blue", bold=True),
+            click.style("Scanning for potential security issues in ML model files", fg="cyan"),
+            "─" * 80,
+        ]
+        click.echo("\n".join(header))
+        click.echo(f"Paths to scan: {click.style(', '.join(paths), fg='green')}")
+        if blacklist:
+            click.echo(f"Additional blacklist patterns: {click.style(', '.join(blacklist), fg='yellow')}")
+        click.echo("─" * 80)
+        click.echo("")
+    # Set logging level based on verbosity
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+    # Aggregated results
+    aggregated_results = {
+        "scanner_names": [],  # Track all scanner names used
+        "start_time": time.time(),
+        "bytes_scanned": 0,
+        "issues": [],
+        "has_errors": False,
+        "files_scanned": 0
+    }
+    # Scan each path
+    for path in paths:
+        # Early exit for common non-model file extensions
+        if os.path.isfile(path):
+            _, ext = os.path.splitext(path)
+            ext = ext.lower()
+            if ext in ('.md', '.txt', '.py', '.js', '.html', '.css', '.json', '.yaml', '.yml'):
+                if verbose:
+                    logger.info(f"Skipping non-model file: {path}")
+                click.echo(f"Skipping non-model file: {path}")
+                continue
+        # Show progress indicator if in text mode and not writing to a file
+        spinner = None
+        if format == "text" and not output:
+            spinner_text = f"Scanning {click.style(path, fg='cyan')}"
+            spinner = yaspin(Spinners.dots, text=spinner_text)
+            spinner.start()
+        # Perform the scan with the specified options
+        try:
+            # Define progress callback if using spinner
+            progress_callback = None
+            if spinner:
+                def update_progress(message, percentage):
+                    spinner.text = f"{message} ({percentage:.1f}%)"
+                progress_callback = update_progress
+            # Run the scan with progress reporting
+            results = scan_model_directory_or_file(
+                path,
+                blacklist_patterns=list(blacklist) if blacklist else None,
+                timeout=timeout,
+                max_file_size=max_file_size,
+                progress_callback=progress_callback
+            )
+            # Aggregate results
+            aggregated_results["bytes_scanned"] += results.get("bytes_scanned", 0)
+            aggregated_results["issues"].extend(results.get("issues", []))
+            aggregated_results["files_scanned"] += results.get("files_scanned", 1)  # Count each file scanned
+            if results.get("has_errors", False):
+                aggregated_results["has_errors"] = True
+            # Track scanner names
+            for scanner in results.get("scanners", []):
+                if scanner and scanner not in aggregated_results["scanner_names"] and scanner != "unknown":
+                    aggregated_results["scanner_names"].append(scanner)
+            # Show completion status if in text mode and not writing to a file
+            if spinner:
+                if results.get("issues", []):
+                    # Filter out DEBUG severity issues when not in verbose mode
+                    visible_issues = [issue for issue in results.get("issues", [])
+                                     if verbose or not isinstance(issue, dict)
+                                     or issue.get("severity") != "debug"]
+                    issue_count = len(visible_issues)
+                    spinner.text = f"Scanned {click.style(path, fg='cyan')}"
+                    if issue_count > 0:
+                        spinner.ok(click.style(f"✓ Found {issue_count} issues!", fg="yellow", bold=True))
+                    else:
+                        spinner.ok(click.style("✓", fg="green", bold=True))
+                else:
+                    spinner.text = f"Scanned {click.style(path, fg='cyan')}"
+                    spinner.ok(click.style("✓", fg="green", bold=True))
+        except Exception as e:
+            # Show error if in text mode and not writing to a file
+            if spinner:
+                spinner.text = f"Error scanning {click.style(path, fg='cyan')}"
+                spinner.fail(click.style("✗", fg="red", bold=True))
+            logger.error(f"Error during scan of {path}: {str(e)}", exc_info=verbose)
+            click.echo(f"Error scanning {path}: {str(e)}", err=True)
+            aggregated_results["has_errors"] = True
+    # Calculate total duration
+    aggregated_results["duration"] = time.time() - aggregated_results["start_time"]
+    # Format the output
+    if format == "json":
+        output_data = aggregated_results
+        output_text = json.dumps(output_data, indent=2)
+    else:
+        # Text format
+        output_text = format_text_output(aggregated_results, verbose)
+    # Send output to the specified destination
+    if output:
+        with open(output, "w") as f:
+            f.write(output_text)
+    else:
+        # Add a separator line between debug output and scan results
+        if format == "text":
+            click.echo("\n" + "─" * 80)
+        click.echo(output_text)
+    # Exit with appropriate error code
+    if aggregated_results.get("has_errors", False) or aggregated_results.get("issues", []):
+        sys.exit(1)
+    else:
+        sys.exit(0)
+def format_text_output(results, verbose=False):
+    """Format scan results as human-readable text with colors"""
+    output_lines = []
+    # Add summary information with styling
+    if "scanner_names" in results and results["scanner_names"]:
+        scanner_names = results["scanner_names"]
+        if len(scanner_names) == 1:
+            output_lines.append(click.style(f"Active Scanner: {scanner_names[0]}", fg="blue", bold=True))
+        else:
+            output_lines.append(click.style(f"Active Scanners: {', '.join(scanner_names)}", fg="blue", bold=True))
+    if "duration" in results:
+        output_lines.append(click.style(f"Scan completed in {results['duration']:.2f} seconds", fg="cyan"))
+    if "files_scanned" in results:
+        output_lines.append(click.style(f"Files scanned: {results['files_scanned']}", fg="cyan"))
+    if "bytes_scanned" in results:
+        # Format bytes in a more readable way
+        bytes_scanned = results['bytes_scanned']
+        if bytes_scanned >= 1024 * 1024 * 1024:
+            size_str = f"{bytes_scanned / (1024 * 1024 * 1024):.2f} GB"
+        elif bytes_scanned >= 1024 * 1024:
+            size_str = f"{bytes_scanned / (1024 * 1024):.2f} MB"
+        elif bytes_scanned >= 1024:
+            size_str = f"{bytes_scanned / 1024:.2f} KB"
+        else:
+            size_str = f"{bytes_scanned} bytes"
+        output_lines.append(click.style(f"Scanned {size_str}", fg="cyan"))
+    # Add issue details with color-coded severity
+    issues = results.get("issues", [])
+    if issues:
+        # Filter out DEBUG severity issues when not in verbose mode
+        visible_issues = [issue for issue in issues
+                         if verbose or not isinstance(issue, dict)
+                         or issue.get("severity") != "debug"]
+        # Count issues by severity (excluding DEBUG when not in verbose mode)
+        error_count = sum(1 for issue in visible_issues if isinstance(issue, dict) and issue.get("severity") == "error")
+        warning_count = sum(1 for issue in visible_issues if isinstance(issue, dict) and issue.get("severity") == "warning")
+        info_count = sum(1 for issue in visible_issues if isinstance(issue, dict) and issue.get("severity") == "info")
+        debug_count = sum(1 for issue in issues if isinstance(issue, dict) and issue.get("severity") == "debug")
+        # Only show debug count in verbose mode
+        issue_summary = []
+        if error_count:
+            issue_summary.append(click.style(f"{error_count} errors", fg="red", bold=True))
+        if warning_count:
+            issue_summary.append(click.style(f"{warning_count} warnings", fg="yellow"))
+        if info_count:
+            issue_summary.append(click.style(f"{info_count} info", fg="blue"))
+        if verbose and debug_count:
+            issue_summary.append(click.style(f"{debug_count} debug", fg="cyan"))
+        if issue_summary:
+            output_lines.append(click.style("Issues found: ", fg="white") + ", ".join(issue_summary))
+        # Only display visible issues
+        for i, issue in enumerate(visible_issues, 1):
+            severity = issue.get("severity", "warning").lower()
+            # Skip debug issues if verbose is not enabled
+            if severity == "debug" and not verbose:
+                continue
+            message = issue.get("message", "Unknown issue")
+            location = issue.get("location", "")
+            # Color-code based on severity
+            if severity == "error":
+                severity_style = click.style(f"[ERROR]", fg="red", bold=True)
+            elif severity == "warning":
+                severity_style = click.style(f"[WARNING]", fg="yellow")
+            elif severity == "info":
+                severity_style = click.style(f"[INFO]", fg="blue")
+            elif severity == "debug":
+                severity_style = click.style(f"[DEBUG]", fg="gray")
+            # Format the issue line
+            issue_num = click.style(f"{i}.", fg="white", bold=True)
+            if location:
+                location_str = click.style(f"{location}", fg="cyan", bold=True)
+                output_lines.append(f"{issue_num} {location_str}: {severity_style} {message}")
+            else:
+                output_lines.append(f"{issue_num} {severity_style} {message}")
+            # Add a small separator between issues for readability
+            if i < len(issues):
+                output_lines.append("")
+    else:
+        output_lines.append("\n" + click.style("✓ No issues found", fg="green", bold=True))
+    # Add a footer
+    output_lines.append("─" * 80)
+    if issues:
+        if any(isinstance(issue, dict) and issue.get("severity") == "error" for issue in issues):
+            status = click.style("✗ Scan completed with errors", fg="red", bold=True)
+        else:
+            status = click.style("⚠ Scan completed with warnings", fg="yellow", bold=True)
+    else:
+        status = click.style("✓ Scan completed successfully", fg="green", bold=True)
+    output_lines.append(status)
+    return "\n".join(output_lines)
+def main():
+    cli()