PyPI - safeworkflow - Versions diffs - 1.0.0__py3-none-any.whl - Mend

safeworkflow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

safeworkflow/__init__.py +18 -0
safeworkflow/cli.py +103 -0
safeworkflow/config.py +33 -0
safeworkflow/patterns.py +110 -0
safeworkflow/sanitizer.py +57 -0
safeworkflow/scanner.py +98 -0
safeworkflow/scorer.py +57 -0
safeworkflow/types.py +37 -0
safeworkflow-1.0.0.dist-info/METADATA +105 -0
safeworkflow-1.0.0.dist-info/RECORD +12 -0
safeworkflow-1.0.0.dist-info/WHEEL +4 -0
safeworkflow-1.0.0.dist-info/entry_points.txt +2 -0

safeworkflow/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""safeworkflow - Prompt injection and supply-chain risk protection."""
+from .config import Settings
+from .sanitizer import sanitize
+from .scanner import scan
+from .scorer import RiskLevel, Score
+from .types import ScanIssue, ScanResult
+__version__ = "1.0.0"
+__all__ = [
+    "scan",
+    "Score",
+    "RiskLevel",
+    "sanitize",
+    "Settings",
+    "ScanResult",
+    "ScanIssue",
+]

safeworkflow/cli.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""CLI for safeworkflow."""
+import json
+from pathlib import Path
+import typer
+from rich import print as rprint
+from rich.console import Console
+from rich.table import Table
+from .sanitizer import sanitize
+from .scanner import scan, scan_file
+from .types import ScanResult
+app = typer.Typer(help="Prompt injection and supply-chain risk protection")
+console = Console()
+@app.command()
+def scan_cmd(
+    source: str = typer.Argument(..., help="File or text to scan"),
+    fail_on: str = typer.Option("high", "--fail-on", "-f", help="Fail on risk level"),
+    format: str = typer.Option("text", "--format", help="Output format: text, json"),
+    max_score: int = typer.Option(100, "--max-score", help="Maximum risk score"),
+) -> int:
+    """Scan content or file for security risks."""
+    path = Path(source)
+    if path.exists():
+        result = scan_file(str(path), fail_on=fail_on)
+    else:
+        result = scan(source, fail_on=fail_on, max_score=max_score)
+    if format == "json":
+        output = {
+            "score": result.score,
+            "risk_level": result.risk_level.value,
+            "is_safe": result.is_safe,
+            "issue_count": len(result.issues),
+            "issues": [
+                {
+                    "line": i.line,
+                    "column": i.column,
+                    "message": i.message,
+                    "risk_level": i.risk_level.value,
+                    "pattern": i.pattern_name,
+                }
+                for i in result.issues
+            ],
+        }
+        print(json.dumps(output, indent=2))
+    else:
+        _print_result(result)
+    return 1 if not result.is_safe else 0
+@app.command("sanitize")
+def sanitize_cmd(
+    source: str = typer.Argument(..., help="File or text to sanitize"),
+    output: str | None = typer.Option(None, "--output", "-o", help="Output file"),
+    replacement: str = typer.Option(
+        "[REDACTED]", "--replacement", "-r", help="Replacement text"
+    ),
+) -> None:
+    """Sanitize content by removing security risks."""
+    path = Path(source)
+    content = path.read_text(encoding="utf-8") if path.exists() else source
+    result = sanitize(content, replacement=replacement)
+    if output:
+        Path(output).write_text(result, encoding="utf-8")
+        rprint(f"[green]Sanitized output written to {output}[/green]")
+    else:
+        print(result)
+def _print_result(result: ScanResult) -> None:
+    """Print scan result in human-readable format."""
+    rprint(f"\n[bold]Risk Score:[/bold] {result.score}/100")
+    rprint(f"[bold]Risk Level:[/bold] {result.risk_level.value.upper()}")
+    status = "[green]SAFE[/green]" if result.is_safe else "[red]UNSAFE[/red]"
+    rprint(f"[bold]Status:[/bold] {status}")
+    if result.issues:
+        table = Table(title="Detected Issues")
+        table.add_column("Line", style="cyan")
+        table.add_column("Pattern", style="magenta")
+        table.add_column("Message", style="yellow")
+        table.add_column("Risk", style="red")
+        for issue in result.issues:
+            table.add_row(
+                str(issue.line),
+                issue.pattern_name,
+                issue.message[:50],
+                issue.risk_level.value.upper(),
+            )
+        rprint(table)
+if __name__ == "__main__":
+    app()

safeworkflow/config.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Configuration for safeworkflow."""
+from pydantic import Field
+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    """Configuration settings for safeworkflow."""
+    fail_on: str = Field(default="high", description="Minimum risk level to fail")
+    max_risk_score: int = Field(default=70, description="Maximum acceptable risk score")
+    enable_ai_patterns: bool = Field(
+        default=True, description="Enable AI-specific patterns"
+    )
+    enable_supply_chain: bool = Field(
+        default=True, description="Enable supply-chain detection"
+    )
+    custom_patterns: list[str] = Field(
+        default_factory=list, description="Custom regex patterns"
+    )
+    model_config = {"env_prefix": "SAFEWORKFLOW_", "env_file": ".env"}
+    @property
+    def should_fail_on(self) -> dict[str, int]:
+        """Map risk level to minimum score for fail."""
+        return {
+            "low": 25,
+            "medium": 50,
+            "high": 75,
+            "critical": 90,
+        }

safeworkflow/patterns.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""Pattern database for detecting injection and supply-chain risks."""
+import re
+from typing import NamedTuple
+class Pattern(NamedTuple):
+    """A detection pattern."""
+    name: str
+    pattern: re.Pattern
+    risk_level: str
+    description: str
+# Base injection patterns
+INJECTION_PATTERNS = [
+    Pattern(
+        name="ignore_previous",
+        pattern=re.compile(
+            r"ignore\s+(all\s+)?(previous|above|prior|earlier)",
+            re.IGNORECASE
+        ),
+        risk_level="critical",
+        description="Attempts to ignore previous instructions",
+    ),
+    Pattern(
+        name="system_override",
+        pattern=re.compile(
+            r"(you are now|new instructions|override|disregard).*system",
+            re.IGNORECASE
+        ),
+        risk_level="critical",
+        description="System instruction override attempt",
+    ),
+    Pattern(
+        name="jailbreak",
+        pattern=re.compile(
+            r"(jailbreak|dan\s*mode|developer\s*mode|unfiltered)",
+            re.IGNORECASE
+        ),
+        risk_level="critical",
+        description="Jailbreak or DAN mode attempt",
+    ),
+    Pattern(
+        name="role_injection",
+        pattern=re.compile(
+            r"(you are|act as|pretend to be|roleplay).*?(assistant|admin|root)",
+            re.IGNORECASE
+        ),
+        risk_level="high",
+        description="Role injection attempt",
+    ),
+    Pattern(
+        name="command_injection",
+        pattern=re.compile(
+            r"(rm\s+-rf|sudo|chmod|curl\s+\||\|\s*bash|\$\(.*\)|`.*?`)",
+            re.IGNORECASE
+        ),
+        risk_level="high",
+        description="Shell command injection attempt",
+    ),
+    Pattern(
+        name="javascript_protocol",
+        pattern=re.compile(
+            r"javascript:|data:text/html",
+            re.IGNORECASE
+        ),
+        risk_level="medium",
+        description="JavaScript protocol in URL",
+    ),
+    Pattern(
+        name="supply_chain_pkg",
+        pattern=re.compile(
+            r"(pip\s+install|npm\s+install|go\s+get).*-[a-z0-9]{8,12}",
+            re.IGNORECASE
+        ),
+        risk_level="high",
+        description="Suspicious package name with random suffix",
+    ),
+    Pattern(
+        name="typosquatting",
+        pattern=re.compile(
+            r"(requessts|requsts|resquests|numpyy|pandas1)",
+            re.IGNORECASE
+        ),
+        risk_level="high",
+        description="Typosquatting attempt",
+    ),
+    Pattern(
+        name="env_leak",
+        pattern=re.compile(
+            r"(OPENAI_API_KEY|ANTHROPIC_API|SECRET|TOKEN).{0,20}(['\"]?\w{20,})",
+            re.IGNORECASE
+        ),
+        risk_level="medium",
+        description="Potential credential leak",
+    ),
+]
+def get_patterns(enable_supply_chain: bool = True) -> list[Pattern]:
+    """Get all detection patterns based on configuration."""
+    patterns = list(INJECTION_PATTERNS)
+    if not enable_supply_chain:
+        patterns = [
+            p
+            for p in patterns
+            if "supply" not in p.name.lower() and "typo" not in p.name.lower()
+        ]
+    return patterns

safeworkflow/sanitizer.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Content sanitizer for removing sensitive/injection patterns."""
+from .patterns import get_patterns
+def sanitize(
+    content: str,
+    *,
+    replacement: str = "[REDACTED]",
+    enable_supply_chain: bool = True,
+) -> str:
+    """Sanitize content by removing/redacting security risks.
+    Args:
+        content: Text to sanitize.
+        replacement: Text to replace detected patterns with.
+        enable_supply_chain: Whether to check supply-chain patterns.
+    Returns:
+        Sanitized content.
+    """
+    patterns = get_patterns(enable_supply_chain=enable_supply_chain)
+    result = content
+    for pattern in patterns:
+        result = pattern.pattern.sub(replacement, result)
+    return result
+def sanitize_file(
+    input_path: str,
+    output_path: str | None = None,
+    *,
+    replacement: str = "[REDACTED]",
+) -> str:
+    """Sanitize a file and optionally write to output.
+    Args:
+        input_path: Path to input file.
+        output_path: Optional path for sanitized output.
+        replacement: Text to replace detected patterns with.
+    Returns:
+        Sanitized content.
+    """
+    with open(input_path, encoding="utf-8") as f:
+        content = f.read()
+    result = sanitize(content, replacement=replacement)
+    if output_path:
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(result)
+    return result

safeworkflow/scanner.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Content scanner for detecting security risks."""
+from .patterns import get_patterns
+from .scorer import Score
+from .types import RiskLevel, ScanIssue, ScanResult
+def scan(
+    content: str,
+    *,
+    fail_on: str = "high",
+    enable_supply_chain: bool = True,
+    max_score: int = 100,
+) -> ScanResult:
+    """Scan content for injection and supply-chain risks.
+    Args:
+        content: Text to scan for security issues.
+        fail_on: Minimum risk level that triggers failure.
+        enable_supply_chain: Whether to check supply-chain patterns.
+        max_score: Maximum possible risk score.
+    Returns:
+        ScanResult with issues and risk assessment.
+    """
+    issues: list[ScanIssue] = []
+    patterns = get_patterns(enable_supply_chain=enable_supply_chain)
+    lines = content.split("\n")
+    for line_num, line in enumerate(lines, 1):
+        for pattern in patterns:
+            for match in pattern.pattern.finditer(line):
+                issue = ScanIssue(
+                    line=line_num,
+                    column=match.start() + 1,
+                    message=f"{pattern.description}: '{match.group()}'",
+                    risk_level=RiskLevel(pattern.risk_level),
+                    pattern_name=pattern.name,
+                    suggestion=_get_suggestion(pattern.name),
+                )
+                issues.append(issue)
+    score = Score.calculate(issues, max_score=max_score)
+    risk_level = _determine_risk_level(score)
+    threshold = Score.threshold_for(fail_on)
+    is_safe = score < threshold
+    return ScanResult(
+        content=content,
+        issues=issues,
+        score=score,
+        risk_level=risk_level,
+        is_safe=is_safe,
+    )
+def scan_file(
+    path: str,
+    *,
+    fail_on: str = "high",
+    encoding: str = "utf-8",
+) -> ScanResult:
+    """Scan a file for security risks.
+    Args:
+        path: Path to file to scan.
+        fail_on: Minimum risk level that triggers failure.
+        encoding: File encoding.
+    Returns:
+        ScanResult with issues and risk assessment.
+    """
+    with open(path, encoding=encoding) as f:
+        content = f.read()
+    return scan(content, fail_on=fail_on)
+def _get_suggestion(pattern_name: str) -> str | None:
+    """Get remediation suggestion for a pattern."""
+    suggestions = {
+        "ignore_previous": "Remove instruction override attempts",
+        "system_override": "Avoid system instruction manipulation",
+        "jailbreak": "Block jailbreak patterns entirely",
+        "role_injection": "Sanitize role-playing attempts",
+    }
+    return suggestions.get(pattern_name)
+def _determine_risk_level(score: int) -> RiskLevel:
+    """Determine risk level from score."""
+    if score >= 90:
+        return RiskLevel.CRITICAL
+    elif score >= 70:
+        return RiskLevel.HIGH
+    elif score >= 40:
+        return RiskLevel.MEDIUM
+    return RiskLevel.LOW

safeworkflow/scorer.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Risk scoring engine for safeworkflow."""
+from .types import RiskLevel, ScanIssue
+class Score:
+    """Risk scoring utilities."""
+    WEIGHTS = {
+        RiskLevel.LOW: 1,
+        RiskLevel.MEDIUM: 3,
+        RiskLevel.HIGH: 7,
+        RiskLevel.CRITICAL: 15,
+    }
+    @staticmethod
+    def calculate(issues: list[ScanIssue], max_score: int = 100) -> int:
+        """Calculate risk score from issues.
+        Args:
+            issues: List of detected security issues.
+            max_score: Maximum possible score.
+        Returns:
+            Risk score 0-100.
+        """
+        if not issues:
+            return 0
+        # Higher weighting for critical issues
+        weights = {
+            RiskLevel.CRITICAL: 40,
+            RiskLevel.HIGH: 25,
+            RiskLevel.MEDIUM: 10,
+            RiskLevel.LOW: 5,
+        }
+        total = sum(weights.get(issue.risk_level, 5) for issue in issues)
+        # Cap at max_score
+        return min(total, max_score)
+    @staticmethod
+    def threshold_for(level: str) -> int:
+        """Get score threshold for a risk level.
+        Args:
+            level: Risk level string (low/medium/high/critical).
+        Returns:
+            Score threshold.
+        """
+        thresholds = {
+            "low": 25,
+            "medium": 50,
+            "high": 75,
+            "critical": 90,
+        }
+        return thresholds.get(level.lower(), 75)

safeworkflow/types.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Core types for safeworkflow."""
+from dataclasses import dataclass
+from enum import Enum
+class RiskLevel(str, Enum):
+    """Risk severity levels."""
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+    CRITICAL = "critical"
+@dataclass
+class ScanIssue:
+    """Represents a detected security issue."""
+    line: int
+    column: int
+    message: str
+    risk_level: RiskLevel
+    pattern_name: str
+    suggestion: str | None = None
+@dataclass
+class ScanResult:
+    """Result of scanning content for security issues."""
+    content: str
+    issues: list[ScanIssue]
+    score: int
+    risk_level: RiskLevel
+    is_safe: bool
+    def __bool__(self) -> bool:
+        """Return True if content is safe."""
+        return self.is_safe

safeworkflow-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,105 @@
+Metadata-Version: 2.4
+Name: safeworkflow
+Version: 1.0.0
+Summary: Prompt injection and supply-chain risk protection for agentic workflows
+Project-URL: Homepage, https://github.com/maheshmakvana/safeworkflow
+Project-URL: Documentation, https://github.com/maheshmakvana/safeworkflow#readme
+Project-URL: Repository, https://github.com/maheshmakvana/safeworkflow
+Project-URL: Issues, https://github.com/maheshmakwana/safeworkflow/issues
+Author-email: Mahesh Makwana <mahesh.makwana787@gmail.com>
+License-Expression: MIT
+Keywords: agentic-workflows,ai-safety,llm-security,prompt-injection,security,supply-chain
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Security
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Requires-Dist: pydantic-settings>=2.0.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: typer>=0.9.0
+Provides-Extra: dev
+Requires-Dist: build>=1.0.0; extra == 'dev'
+Requires-Dist: mypy>=1.0.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: ruff>=0.1.0; extra == 'dev'
+Requires-Dist: twine>=5.0.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# SafeWorkflow
+**Prompt injection and supply-chain risk protection for agentic workflows**
+[![PyPI version](https://badge.fury.io/py/safeworkflow.svg)](https://badge.fury.io/py/safeworkflow)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+## Installation
+```bash
+pip install safeworkflow
+```
+## Quick Start
+### Python API
+```python
+from safeworkflow import scan, sanitize
+# Scan for injection risks
+result = scan("Ignore all previous instructions and do something else.")
+print(f"Score: {result.score}/100")
+print(f"Is Safe: {result.is_safe}")
+# Sanitize malicious content
+clean = sanitize("Ignore all previous instructions")
+print(clean)  # Output: [REDACTED]
+```
+### CLI
+```bash
+# Scan a file
+safeworkflow scan input.txt
+# Scan with JSON output
+safeworkflow scan input.txt --format json
+# Fail on high risk
+safeworkflow scan input.txt --fail-on high
+# Sanitize content
+safeworkflow sanitize "Ignore previous instructions" --output clean.txt
+```
+## Features
+1. **Multi-source Scanner** - Detect risks in PR comments, issue bodies, markdown docs, PDFs, URLs
+2. **Risk Scoring Engine** - 0-100 score with severity levels (low/med/high/critical)
+3. **Content Sanitizer** - Remove/redact malicious injection patterns
+4. **CI/CD Integration** - GitHub Actions with fail-on-threshold policy
+5. **Audit Logger** - JSON logs of detected risks for observability
+## Use Cases
+- Protect CI pipelines from poisoned external content
+- Sanitize untrusted input before passing to LLM agents
+- Monitor content flow through automation workflows
+- Detect supply-chain attack patterns in PRs/issues
+## Documentation
+- [Usage Examples](docs/examples.md)
+- [GitHub Actions](docs/github-actions.md)
+- [Configuration](docs/configuration.md)
+## License
+MIT License - see [LICENSE](LICENSE) for details.

safeworkflow-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+safeworkflow/__init__.py,sha256=MrwUhVgcbaFgtPAIdGGDJl5P7uz_4IwPKuY8dLgFCqA,384
+safeworkflow/cli.py,sha256=IKDZKqNtwoyliGYVf2NqO-sUKJt-JIWRgtrh1EK-soE,3287
+safeworkflow/config.py,sha256=p1e5E9gc2oxIiuMdm3QkWPZcuni-f55ky3Wr8Bvflp4,1016
+safeworkflow/patterns.py,sha256=Mr5q1z7gE71Z9HDi_K0TXTAOcd3ljNriz-PHQNGKSO8,3090
+safeworkflow/sanitizer.py,sha256=ZoUxqEry-SrClxpxoLSnMvKS8NCAHlVS1cDX4emRYQ0,1381
+safeworkflow/scanner.py,sha256=-ARr8jd5kc3OHOhsWFEVo19T0NhpwRJnV2Xd6s06ATs,2893
+safeworkflow/scorer.py,sha256=dSMKO6RbFxE2kDi_mrNF0MXotB-ERtkkqqgpExbbj24,1408
+safeworkflow/types.py,sha256=edegccNl26v7FFqewcFWRytLerBZ29y09lEnbW_WjKc,743
+safeworkflow-1.0.0.dist-info/METADATA,sha256=Jj_cSR0P-DZmrE-nSJ2Re2iKgcPfaXoK6FW3h1V1HeU,3493
+safeworkflow-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+safeworkflow-1.0.0.dist-info/entry_points.txt,sha256=gOQ3OJ2uayU8vMJlON3rmLj0RPZfusEVeZamS2a62SQ,54
+safeworkflow-1.0.0.dist-info/RECORD,,

safeworkflow-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any

safeworkflow-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ safeworkflow = safeworkflow.cli:app