PyPI - agents-shipgate - Versions diffs - 0.2.0__py3-none-any.whl - Mend

agents-shipgate 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

agents_shipgate/__init__.py +3 -0
agents_shipgate/__main__.py +5 -0
agents_shipgate/checks/__init__.py +2 -0
agents_shipgate/checks/api.py +400 -0
agents_shipgate/checks/auth.py +104 -0
agents_shipgate/checks/base.py +71 -0
agents_shipgate/checks/documentation.py +113 -0
agents_shipgate/checks/inventory.py +70 -0
agents_shipgate/checks/manifest_consistency.py +166 -0
agents_shipgate/checks/manifest_scope.py +170 -0
agents_shipgate/checks/policy.py +65 -0
agents_shipgate/checks/registry.py +210 -0
agents_shipgate/checks/schema.py +129 -0
agents_shipgate/checks/side_effects.py +49 -0
agents_shipgate/ci/__init__.py +2 -0
agents_shipgate/ci/exit_policy.py +35 -0
agents_shipgate/ci/github_summary.py +27 -0
agents_shipgate/cli/__init__.py +2 -0
agents_shipgate/cli/discovery.py +205 -0
agents_shipgate/cli/main.py +477 -0
agents_shipgate/cli/scan.py +366 -0
agents_shipgate/config/__init__.py +2 -0
agents_shipgate/config/loader.py +120 -0
agents_shipgate/config/schema.py +312 -0
agents_shipgate/core/__init__.py +2 -0
agents_shipgate/core/baseline.py +113 -0
agents_shipgate/core/context.py +16 -0
agents_shipgate/core/errors.py +11 -0
agents_shipgate/core/findings.py +249 -0
agents_shipgate/core/logging.py +38 -0
agents_shipgate/core/models.py +272 -0
agents_shipgate/core/risk_hints.py +173 -0
agents_shipgate/inputs/__init__.py +2 -0
agents_shipgate/inputs/common.py +141 -0
agents_shipgate/inputs/mcp.py +114 -0
agents_shipgate/inputs/openai_api.py +355 -0
agents_shipgate/inputs/openai_sdk_static.py +162 -0
agents_shipgate/inputs/openapi.py +324 -0
agents_shipgate/py.typed +1 -0
agents_shipgate/report/__init__.py +2 -0
agents_shipgate/report/json_report.py +10 -0
agents_shipgate/report/markdown.py +248 -0
agents_shipgate-0.2.0.dist-info/METADATA +217 -0
agents_shipgate-0.2.0.dist-info/RECORD +47 -0
agents_shipgate-0.2.0.dist-info/WHEEL +4 -0
agents_shipgate-0.2.0.dist-info/entry_points.txt +12 -0
agents_shipgate-0.2.0.dist-info/licenses/LICENSE +185 -0

agents_shipgate/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Agents Shipgate package."""
+__version__ = "0.2.0"

agents_shipgate/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from agents_shipgate.cli.main import app
+if __name__ == "__main__":
+    app()

agents_shipgate/checks/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """Deterministic Agents Shipgate checks."""
2	+

agents_shipgate/checks/api.py ADDED Viewed

@@ -0,0 +1,400 @@
+from __future__ import annotations
+from typing import Any
+from agents_shipgate.checks.base import agent_finding, tool_finding
+from agents_shipgate.core.context import ScanContext
+from agents_shipgate.core.models import Tool, ToolParameter
+from agents_shipgate.core.risk_hints import (
+    has_risk_tag,
+    is_high_risk_tool,
+    is_write_tool,
+    risk_tags,
+)
+BROAD_TEXT_NAMES = {
+    "action",
+    "body",
+    "command",
+    "content",
+    "instructions",
+    "message",
+    "prompt",
+    "update",
+    "updates",
+}
+RISKY_NUMERIC_NAMES = {"amount", "amt", "count", "qty", "quantity", "limit", "cap", "size"}
+READ_ONLY_PROMPT_TERMS = (
+    "advise only",
+    "advice only",
+    "only advise",
+    "read-only",
+    "read only",
+    "do not take action",
+)
+APPROVAL_PROMPT_TERMS = ("approval", "approved", "human review", "requires review")
+CONFIRMATION_PROMPT_TERMS = ("confirm", "confirmation", "explicit consent", "ask before")
+def run(context: ScanContext):
+    if context.api_artifacts is None:
+        return []
+    findings = []
+    findings.extend(_function_schema_strictness(context))
+    findings.extend(_structured_output_readiness(context))
+    findings.extend(_prompt_tool_scope_mismatch(context))
+    findings.extend(_operational_readiness(context))
+    return findings
+def _function_schema_strictness(context: ScanContext):
+    findings = []
+    for tool in _api_tools(context):
+        issues = _function_schema_issues(tool)
+        if not issues:
+            continue
+        high_risk = is_write_tool(tool) or is_high_risk_tool(tool)
+        findings.append(
+            tool_finding(
+                tool=tool,
+                check_id="SHIP-API-FUNCTION-SCHEMA-STRICTNESS",
+                title=f"{tool.name} function schema is not strict enough",
+                severity="high" if high_risk else "medium",
+                category="api",
+                evidence={"issues": issues, "risk_tags": risk_tags(tool, min_confidence="medium")},
+                confidence="high",
+                recommendation=(
+                    f"Make {tool.name} a strict function schema: object parameters, "
+                    "additionalProperties=false, complete required list, and bounded risky fields."
+                ),
+                context=context,
+            )
+        )
+    return findings
+def _structured_output_readiness(context: ScanContext):
+    artifacts = context.api_artifacts
+    if artifacts is None:
+        return []
+    high_risk_tools = [tool.name for tool in _api_tools(context) if is_high_risk_tool(tool)]
+    if not artifacts.response_formats:
+        return [
+            agent_finding(
+                check_id="SHIP-API-STRUCTURED-OUTPUT-READINESS",
+                title="OpenAI API response format is not declared",
+                severity="high" if high_risk_tools else "medium",
+                category="api",
+                evidence={"high_risk_tools": high_risk_tools},
+                confidence="high",
+                recommendation=(
+                    "Declare a structured response format with decision/status, error/refusal, "
+                    "and needs_review fields where downstream behavior depends on the output."
+                ),
+                context=context,
+            )
+        ]
+    findings = []
+    for response_format in artifacts.response_formats:
+        issues = _response_schema_issues(
+            response_format.json_schema,
+            response_format.downstream_critical_fields,
+        )
+        if not issues:
+            continue
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-STRUCTURED-OUTPUT-READINESS",
+                title=f"Response format {response_format.path} is under-specified",
+                severity="medium",
+                category="api",
+                evidence={
+                    "path": response_format.path,
+                    "issues": issues,
+                    "downstream_critical_fields": response_format.downstream_critical_fields,
+                },
+                confidence="medium",
+                recommendation=(
+                    "Tighten the structured output schema with enums, "
+                    "needs_review/refusal/error modeling, and declared critical fields."
+                ),
+                context=context,
+            )
+        )
+    return findings
+def _prompt_tool_scope_mismatch(context: ScanContext):
+    artifacts = context.api_artifacts
+    if artifacts is None or not artifacts.prompt_text:
+        return []
+    prompt = artifacts.prompt_text.lower()
+    api_tools = _api_tools(context)
+    write_or_high_risk = [
+        tool for tool in api_tools if is_write_tool(tool) or is_high_risk_tool(tool)
+    ]
+    findings = []
+    if write_or_high_risk and any(term in prompt for term in READ_ONLY_PROMPT_TERMS):
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-PROMPT-TOOL-SCOPE-MISMATCH",
+                title=(
+                    "Prompt says read-only or advise-only while write/high-risk "
+                    "tools are enabled"
+                ),
+                severity="high",
+                category="api",
+                evidence={"tools": [tool.name for tool in write_or_high_risk]},
+                confidence="high",
+                recommendation=(
+                    "Align prompt scope with enabled tools or remove write/high-risk tools."
+                ),
+                context=context,
+            )
+        )
+    needs_confirmation = [
+        tool
+        for tool in api_tools
+        if has_risk_tag(
+            tool,
+            {"destructive", "external_write", "customer_communication", "financial_action"},
+            min_confidence="medium",
+        )
+    ]
+    if needs_confirmation and not (
+        any(term in prompt for term in CONFIRMATION_PROMPT_TERMS)
+        and any(term in prompt for term in APPROVAL_PROMPT_TERMS)
+    ):
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-PROMPT-TOOL-SCOPE-MISMATCH",
+                title="Prompt lacks approval/confirmation language for high-risk tools",
+                severity="medium",
+                category="api",
+                evidence={"tools": [tool.name for tool in needs_confirmation]},
+                confidence="medium",
+                recommendation=(
+                    "Add prompt instructions requiring human approval and explicit confirmation "
+                    "before financial, destructive, or external customer actions."
+                ),
+                context=context,
+            )
+        )
+    return findings
+def _operational_readiness(context: ScanContext):
+    artifacts = context.api_artifacts
+    if artifacts is None:
+        return []
+    findings = []
+    api_tools = _api_tools(context)
+    high_risk_tools = [tool for tool in api_tools if is_high_risk_tool(tool)]
+    retry_policy = artifacts.retry_policy()
+    timeouts = artifacts.timeouts()
+    output_schemas = artifacts.tool_output_schemas()
+    if high_risk_tools and not retry_policy:
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-OPERATIONAL-READINESS",
+                title="OpenAI API flow lacks retry policy metadata",
+                severity="medium",
+                category="api",
+                evidence={"high_risk_tools": [tool.name for tool in high_risk_tools]},
+                confidence="medium",
+                recommendation="Declare retry_policy in openai_api.policy_rules or model_config.",
+                context=context,
+            )
+        )
+    if high_risk_tools and not timeouts:
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-OPERATIONAL-READINESS",
+                title="OpenAI API flow lacks timeout metadata",
+                severity="medium",
+                category="api",
+                evidence={"high_risk_tools": [tool.name for tool in high_risk_tools]},
+                confidence="medium",
+                recommendation="Declare tool-call timeout metadata for high-risk OpenAI API flows.",
+                context=context,
+            )
+        )
+    if high_risk_tools and not artifacts.test_cases:
+        findings.append(
+            agent_finding(
+                check_id="SHIP-API-OPERATIONAL-READINESS",
+                title="OpenAI API flow lacks test case metadata for high-risk tools",
+                severity="medium",
+                category="api",
+                evidence={"high_risk_tools": [tool.name for tool in high_risk_tools]},
+                confidence="medium",
+                recommendation="Add simple OpenAI API test cases for high-risk tool-call flows.",
+                context=context,
+            )
+        )
+    for tool in high_risk_tools:
+        if tool.name not in output_schemas:
+            findings.append(
+                tool_finding(
+                    tool=tool,
+                    check_id="SHIP-API-OPERATIONAL-READINESS",
+                    title=f"{tool.name} lacks success/failure output modeling",
+                    severity="medium",
+                    category="api",
+                    evidence={"tool_output_schemas": sorted(output_schemas)},
+                    confidence="medium",
+                    recommendation=(
+                        f"Declare success_fields and failure_fields for {tool.name} "
+                        "in openai_api policy rules."
+                    ),
+                    context=context,
+                )
+            )
+        if retry_policy and _needs_idempotency(tool, artifacts):
+            findings.append(
+                tool_finding(
+                    tool=tool,
+                    check_id="SHIP-API-OPERATIONAL-READINESS",
+                    title=f"{tool.name} may be retried without idempotency evidence",
+                    severity="high",
+                    category="api",
+                    evidence={
+                        "retry_policy": retry_policy,
+                        "risk_tags": risk_tags(tool, min_confidence="medium"),
+                    },
+                    confidence="high",
+                    recommendation=(
+                        f"Add idempotency evidence for {tool.name} or avoid retrying "
+                        "this side effect."
+                    ),
+                    context=context,
+                )
+            )
+    _append_trace_findings(findings, context)
+    return findings
+def _append_trace_findings(findings: list, context: ScanContext) -> None:
+    artifacts = context.api_artifacts
+    if artifacts is None:
+        return
+    approval_tools = context.manifest.policies.approval_tools() | artifacts.approval_tools()
+    confirmation_tools = (
+        context.manifest.policies.confirmation_tools() | artifacts.confirmation_tools()
+    )
+    for event in artifacts.trace_samples:
+        tool_name = event.get("tool_name")
+        if not isinstance(tool_name, str):
+            continue
+        if tool_name in approval_tools and event.get("approved") is False:
+            findings.append(
+                agent_finding(
+                    check_id="SHIP-API-OPERATIONAL-READINESS",
+                    title=f"Trace sample shows {tool_name} without approval",
+                    severity="medium",
+                    category="api",
+                    evidence={"tool_name": tool_name, "approved": event.get("approved")},
+                    confidence="medium",
+                    recommendation=f"Require approval before calling {tool_name}.",
+                    context=context,
+                )
+            )
+        if tool_name in confirmation_tools and event.get("confirmed") is False:
+            findings.append(
+                agent_finding(
+                    check_id="SHIP-API-OPERATIONAL-READINESS",
+                    title=f"Trace sample shows {tool_name} without confirmation",
+                    severity="medium",
+                    category="api",
+                    evidence={"tool_name": tool_name, "confirmed": event.get("confirmed")},
+                    confidence="medium",
+                    recommendation=f"Require explicit confirmation before calling {tool_name}.",
+                    context=context,
+                )
+            )
+def _function_schema_issues(tool: Tool) -> list[str]:
+    issues: list[str] = []
+    schema = tool.input_schema
+    if not schema:
+        return ["missing_parameters_schema"]
+    if tool.annotations.get("openaiStrict") is not True:
+        issues.append("missing_strict_true")
+    if schema.get("type") != "object":
+        issues.append("parameters_schema_not_object")
+    if schema.get("additionalProperties") is not False:
+        issues.append("additional_properties_not_false")
+    properties = schema.get("properties")
+    if isinstance(properties, dict):
+        required = set(schema.get("required") or [])
+        missing_required = sorted(set(properties) - required)
+        if missing_required:
+            issues.append(f"properties_missing_from_required:{','.join(missing_required)}")
+    for parameter in tool.parameters:
+        if _risky_field_without_bounds_or_enum(parameter):
+            issues.append(f"risky_field_unbounded:{parameter.name}")
+        if _broad_free_text(parameter):
+            issues.append(f"broad_free_text:{parameter.name}")
+    return issues
+def _response_schema_issues(schema: dict[str, Any], critical_fields: list[str]) -> list[str]:
+    issues: list[str] = []
+    if schema.get("type") != "object":
+        issues.append("response_schema_not_object")
+    if schema.get("additionalProperties") is not False:
+        issues.append("additional_properties_not_false")
+    properties = schema.get("properties") if isinstance(schema.get("properties"), dict) else {}
+    if not critical_fields:
+        issues.append("missing_downstream_critical_fields")
+    else:
+        missing_critical = sorted(set(critical_fields) - set(properties))
+        if missing_critical:
+            issues.append(f"critical_fields_missing_from_schema:{','.join(missing_critical)}")
+    if not any(field in properties for field in ("refusal", "needs_review", "error")):
+        issues.append("missing_refusal_needs_review_or_error_field")
+    for field in ("decision", "status"):
+        value = properties.get(field)
+        if isinstance(value, dict) and not value.get("enum"):
+            issues.append(f"missing_enum:{field}")
+    return issues
+def _needs_idempotency(tool: Tool, artifacts) -> bool:
+    if tool.name in artifacts.idempotency_tools():
+        return False
+    if tool.annotations.get("idempotentHint") is True:
+        return False
+    if any(parameter.name == "idempotency_key" for parameter in tool.parameters):
+        return False
+    return is_write_tool(tool) and has_risk_tag(
+        tool,
+        {"financial_action", "destructive", "external_write"},
+        min_confidence="medium",
+    )
+def _risky_field_without_bounds_or_enum(parameter: ToolParameter) -> bool:
+    name = parameter.name.lower()
+    risky_name = any(token in name for token in RISKY_NUMERIC_NAMES)
+    return (
+        risky_name
+        and parameter.type in {"number", "integer"}
+        and parameter.maximum is None
+        and not parameter.enum
+    )
+def _broad_free_text(parameter: ToolParameter) -> bool:
+    return (
+        parameter.name.lower() in BROAD_TEXT_NAMES
+        and parameter.type in {None, "string", "object"}
+        and not parameter.enum
+    )
+def _api_tools(context: ScanContext) -> list[Tool]:
+    return [tool for tool in context.tools if tool.source_type == "openai_api"]

agents_shipgate/checks/auth.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+from agents_shipgate.checks.base import agent_finding, tool_finding
+from agents_shipgate.core.context import ScanContext
+from agents_shipgate.core.risk_hints import has_risk_tag, is_write_tool
+def run(context: ScanContext):
+    findings = []
+    broad_global_scopes = [scope for scope in context.manifest.permissions.scopes if _is_broad_scope(scope)]
+    if broad_global_scopes:
+        findings.append(
+            agent_finding(
+                check_id="SHIP-AUTH-MANIFEST-BROAD-SCOPE",
+                title="Manifest declares broad permission scopes",
+                severity="high",
+                category="auth",
+                evidence={"scopes": broad_global_scopes},
+                confidence="high",
+                recommendation="Replace broad manifest permission scopes with the narrowest scopes needed for this release.",
+                context=context,
+            )
+        )
+    for tool in context.tools:
+        if _tool_requires_scope(tool) and not tool.auth.scopes:
+            findings.append(
+                tool_finding(
+                    tool=tool,
+                    check_id="SHIP-AUTH-MISSING-SCOPE",
+                    title=f"{tool.name} lacks declared auth scopes",
+                    severity="high",
+                    category="auth",
+                    evidence={"risk_tags": [hint.tag for hint in tool.risk_hints if hint.confidence in {"medium", "high"}]},
+                    confidence="medium",
+                    recommendation=f"Declare auth scopes for {tool.name} in OpenAPI, MCP metadata, or the manifest before release review.",
+                    context=context,
+                )
+            )
+        missing_scopes = [
+            scope
+            for scope in tool.auth.scopes
+            if not _scope_covered(scope, context.manifest.permissions.scopes)
+        ]
+        if missing_scopes:
+            findings.append(
+                tool_finding(
+                    tool=tool,
+                    check_id="SHIP-AUTH-SCOPE-COVERAGE-MISSING",
+                    title=f"{tool.name} requires scopes not declared in the manifest",
+                    severity="high",
+                    category="auth",
+                    evidence={
+                        "tool_scopes": tool.auth.scopes,
+                        "manifest_scopes": context.manifest.permissions.scopes,
+                        "missing_scopes": missing_scopes,
+                    },
+                    confidence="high",
+                    recommendation=(
+                        f"Add the required scopes for {tool.name} to permissions.scopes "
+                        "or narrow the tool's declared auth requirements."
+                    ),
+                    context=context,
+                )
+            )
+        broad_scopes = [scope for scope in tool.auth.scopes if _is_broad_scope(scope)]
+        if broad_scopes:
+            findings.append(
+                tool_finding(
+                    tool=tool,
+                    check_id="SHIP-AUTH-TOOL-BROAD-SCOPE",
+                    title=f"{tool.name} uses broad auth scopes",
+                    severity="high",
+                    category="auth",
+                    evidence={"scopes": broad_scopes},
+                    confidence="high",
+                    recommendation=f"Replace broad scopes for {tool.name} with narrower operation-specific scopes.",
+                    context=context,
+                )
+            )
+    return findings
+def _is_broad_scope(scope: str) -> bool:
+    lowered = scope.lower()
+    return lowered in {"*", "admin"} or lowered.endswith(":*") or "write-all" in lowered or "admin" in lowered
+def _tool_requires_scope(tool) -> bool:
+    return is_write_tool(tool) or has_risk_tag(
+        tool,
+        {"sensitive_data_access"},
+        min_confidence="medium",
+    )
+def _scope_covered(required_scope: str, manifest_scopes: list[str]) -> bool:
+    required = required_scope.lower()
+    for declared_scope in manifest_scopes:
+        declared = declared_scope.lower()
+        if declared in {"*", required}:
+            return True
+        if declared.endswith(":*") and required.startswith(declared[:-1]):
+            return True
+    return False

agents_shipgate/checks/base.py ADDED Viewed

@@ -0,0 +1,71 @@
+from __future__ import annotations
+from pathlib import Path
+from agents_shipgate.core.context import ScanContext
+from agents_shipgate.core.models import (
+    Finding,
+    SourceReference,
+    Tool,
+    parse_confidence,
+    parse_severity,
+)
+def tool_finding(
+    *,
+    tool: Tool,
+    check_id: str,
+    title: str,
+    severity: str,
+    category: str,
+    evidence: dict[str, object],
+    confidence: str,
+    recommendation: str,
+    context: ScanContext,
+) -> Finding:
+    return Finding(
+        check_id=check_id,
+        title=title,
+        severity=parse_severity(severity),
+        category=category,
+        tool_id=tool.id,
+        tool_name=tool.name,
+        agent_id=context.agent.id,
+        evidence=evidence,
+        confidence=parse_confidence(confidence),
+        source=SourceReference(
+            type=tool.source_type,
+            ref=tool.source_ref,
+            location=tool.source_location,
+        ),
+        recommendation=recommendation,
+    )
+def agent_finding(
+    *,
+    check_id: str,
+    title: str,
+    severity: str,
+    category: str,
+    evidence: dict[str, object],
+    confidence: str,
+    recommendation: str,
+    context: ScanContext,
+) -> Finding:
+    return Finding(
+        check_id=check_id,
+        title=title,
+        severity=parse_severity(severity),
+        category=category,
+        agent_id=context.agent.id,
+        evidence=evidence,
+        confidence=parse_confidence(confidence),
+        source=SourceReference(type="manifest", ref=_manifest_ref(context.config_path)),
+        recommendation=recommendation,
+    )
+def _manifest_ref(config_path: Path) -> str:
+    return config_path.name