PyPI - apisec-code-bolt - Versions diffs - 0.1.0__py3-none-any.whl - Mend

apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

apisec_code_bolt/__init__.py +42 -0
apisec_code_bolt/__main__.py +11 -0
apisec_code_bolt/analysis/__init__.py +96 -0
apisec_code_bolt/analysis/analyzer.py +2309 -0
apisec_code_bolt/analysis/binding_tracker.py +341 -0
apisec_code_bolt/analysis/call_graph.py +1197 -0
apisec_code_bolt/analysis/call_graph_types.py +332 -0
apisec_code_bolt/analysis/call_resolver.py +988 -0
apisec_code_bolt/analysis/capability_tagger.py +322 -0
apisec_code_bolt/analysis/config_scanner.py +197 -0
apisec_code_bolt/analysis/data_flow.py +1883 -0
apisec_code_bolt/analysis/dependency_extractor.py +959 -0
apisec_code_bolt/analysis/flow_analysis.py +1406 -0
apisec_code_bolt/analysis/hof_catalog.py +61 -0
apisec_code_bolt/analysis/integration_detector.py +1399 -0
apisec_code_bolt/analysis/literal_scanner.py +300 -0
apisec_code_bolt/analysis/path_normalizer.py +55 -0
apisec_code_bolt/analysis/read_site_detector.py +310 -0
apisec_code_bolt/analysis/request_patterns.py +162 -0
apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
apisec_code_bolt/analysis/sink_evidence.py +333 -0
apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
apisec_code_bolt/cli/__init__.py +5 -0
apisec_code_bolt/cli/exit_codes.py +17 -0
apisec_code_bolt/cli/main.py +1069 -0
apisec_code_bolt/cloud/__init__.py +1 -0
apisec_code_bolt/cloud/apisec_client.py +118 -0
apisec_code_bolt/cloud/client.py +255 -0
apisec_code_bolt/core/__init__.py +75 -0
apisec_code_bolt/core/config.py +528 -0
apisec_code_bolt/core/credentials.py +65 -0
apisec_code_bolt/core/discovery.py +433 -0
apisec_code_bolt/core/log_format.py +115 -0
apisec_code_bolt/core/manifest.py +1009 -0
apisec_code_bolt/core/repo.py +280 -0
apisec_code_bolt/core/state.py +59 -0
apisec_code_bolt/core/telemetry.py +451 -0
apisec_code_bolt/core/types.py +587 -0
apisec_code_bolt/fingerprinting/__init__.py +1 -0
apisec_code_bolt/frameworks/__init__.py +29 -0
apisec_code_bolt/frameworks/_jwt_common.py +50 -0
apisec_code_bolt/frameworks/auth_helpers.py +437 -0
apisec_code_bolt/frameworks/base.py +608 -0
apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
apisec_code_bolt/frameworks/java/__init__.py +6 -0
apisec_code_bolt/frameworks/java/_annotations.py +167 -0
apisec_code_bolt/frameworks/java/_constraints.py +128 -0
apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
apisec_code_bolt/frameworks/js/__init__.py +8 -0
apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
apisec_code_bolt/frameworks/python/__init__.py +19 -0
apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
apisec_code_bolt/parsing/__init__.py +62 -0
apisec_code_bolt/parsing/base.py +554 -0
apisec_code_bolt/parsing/csharp/__init__.py +5 -0
apisec_code_bolt/parsing/csharp/language_services.py +203 -0
apisec_code_bolt/parsing/csharp/literals.py +72 -0
apisec_code_bolt/parsing/csharp/parser.py +1158 -0
apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
apisec_code_bolt/parsing/js/__init__.py +5 -0
apisec_code_bolt/parsing/js/language_services.py +118 -0
apisec_code_bolt/parsing/js/parser.py +622 -0
apisec_code_bolt/parsing/jvm/__init__.py +7 -0
apisec_code_bolt/parsing/jvm/language_services.py +270 -0
apisec_code_bolt/parsing/jvm/parser.py +774 -0
apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
apisec_code_bolt/parsing/python/__init__.py +150 -0
apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
apisec_code_bolt/parsing/python/expression_utils.py +221 -0
apisec_code_bolt/parsing/python/extraction_types.py +271 -0
apisec_code_bolt/parsing/python/language_services.py +487 -0
apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
apisec_code_bolt/parsing/python/parser.py +719 -0
apisec_code_bolt/parsing/python/path_resolver.py +576 -0
apisec_code_bolt/parsing/python/router_registry.py +806 -0
apisec_code_bolt/parsing/python/type_resolver.py +730 -0
apisec_code_bolt/parsing/python/visitors.py +1544 -0
apisec_code_bolt/parsing/services.py +544 -0
apisec_code_bolt/query/__init__.py +1 -0
apisec_code_bolt/query/ast_cache.py +182 -0
apisec_code_bolt/query/executor.py +283 -0
apisec_code_bolt/query/handlers.py +832 -0
apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0

apisec_code_bolt/core/manifest.py ADDED Viewed

@@ -0,0 +1,1009 @@
+"""
+Manifest schema definitions for apisec-code-bolt.
+The manifest is the primary output of the Probe - a structured JSON document
+containing all extracted facts about the codebase. This is sent to the Cloud
+for vulnerability analysis.
+Key principle: The manifest contains FACTS, not security judgments.
+The Cloud applies vulnerability rules to interpret these facts.
+"""
+from __future__ import annotations
+import hashlib
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, Field
+from .types import (
+    CodeLocation,
+)
+# =============================================================================
+# Manifest Version
+# =============================================================================
+# 1.3: PackageDependencyModel gains an explicit `ecosystem` field so the
+# reasoning engine no longer re-infers it (fragilely) from `source_file`. (F-6)
+MANIFEST_VERSION = "1.3"
+# =============================================================================
+# Content-Addressable ID Helper
+# =============================================================================
+def stable_id(prefix: str, *parts: str) -> str:
+    """Derive a deterministic entity ID from its canonical identity parts.
+    Format: ``{prefix}-{sha256(part1|part2|...)[:12]}``
+    12 hex chars = 48 bits, collision-free for any realistic codebase.
+    """
+    raw = "|".join(p or "" for p in parts)
+    digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
+    return f"{prefix}-{digest}"
+# =============================================================================
+# Code Location (Pydantic version for serialization)
+# =============================================================================
+class LocationModel(BaseModel):
+    """Code location for JSON serialization."""
+    file: str
+    line: int
+    column: int | None = None
+    end_line: int | None = None
+    end_column: int | None = None
+    function: str | None = None
+    @classmethod
+    def from_code_location(cls, loc: CodeLocation) -> LocationModel:
+        return cls(
+            file=str(loc.file),
+            line=loc.line,
+            column=loc.column,
+            end_line=loc.end_line,
+            end_column=loc.end_column,
+        )
+# =============================================================================
+# Project Metadata
+# =============================================================================
+class ProjectMetadata(BaseModel):
+    """Metadata about the analyzed project."""
+    root: str = Field(description="Project root directory")
+    name: str | None = Field(
+        default=None, description="Project name (from pyproject.toml, pom.xml, etc.)"
+    )
+    languages_detected: list[str] = Field(
+        default_factory=list, description="Programming languages found"
+    )
+    frameworks_detected: list[str] = Field(default_factory=list, description="Frameworks detected")
+    files_analyzed: int = Field(default=0, description="Number of files successfully analyzed")
+    files_skipped: int = Field(default=0, description="Number of files skipped")
+    analysis_duration_ms: int = Field(default=0, description="Analysis duration in milliseconds")
+# =============================================================================
+# Function and Class Definitions
+# =============================================================================
+class ParameterModel(BaseModel):
+    """Function/method parameter."""
+    name: str
+    type_annotation: str | None = None
+    default_value: str | None = None
+    is_variadic: bool = False  # Python *args, Java varargs, JS rest params
+    is_keyword_variadic: bool = False  # Python **kwargs, JS destructured options
+class FunctionModel(BaseModel):
+    """Function or method definition — language-agnostic."""
+    id: str = Field(description="Unique identifier")
+    qualified_name: str = Field(description="Fully qualified name")
+    name: str = Field(description="Simple name")
+    location: LocationModel
+    binding: Literal["instance", "static", "free"] = Field(
+        default="free",
+        description="How the function is bound: instance method, static/class method, or free (module-level)",
+    )
+    visibility: Literal["public", "private", "protected", "internal"] | None = Field(
+        default=None,
+        description="Access control level. None when the language uses convention rather than keywords (e.g. Python)",
+    )
+    is_async: bool = False
+    owner_type: str | None = Field(
+        default=None,
+        description="Owning type (class, struct, impl, interface) — None for free functions",
+    )
+    parameters: list[ParameterModel] = Field(default_factory=list)
+    return_type: str | None = None
+    annotations: list[str] = Field(
+        default_factory=list,
+        description="Decorators (Python), annotations (Java/Kotlin), attributes (C#)",
+    )
+    docstring: str | None = None
+    content_hash: str | None = Field(
+        default=None,
+        description="SHA-256 of parameter signatures + return type + annotations + normalized body source. "
+        "Body is normalized (comments/blank lines stripped, whitespace collapsed) before hashing. "
+        "Used for change detection across analyses, not for identity.",
+    )
+class ClassModel(BaseModel):
+    """Class definition."""
+    id: str
+    qualified_name: str
+    name: str
+    location: LocationModel
+    base_classes: list[str] = Field(default_factory=list)
+    annotations: list[str] = Field(
+        default_factory=list, description="Decorators / annotations / attributes"
+    )
+    methods: list[str] = Field(
+        default_factory=list, description="FunctionModel IDs of methods belonging to this class"
+    )
+    docstring: str | None = None
+# =============================================================================
+# HTTP Parameters
+# =============================================================================
+class HttpParameterModel(BaseModel):
+    """HTTP request parameter."""
+    name: str
+    location: str = Field(description="path, query, header, cookie, body, form")
+    type_annotation: str | None = None
+    required: bool = True
+    default_value: str | None = None
+    # Framework-specific metadata
+    alias: str | None = None  # Alternative name in request
+    constraints: dict[str, Any] = Field(default_factory=dict)  # Validation constraints
+class BodyFieldModel(BaseModel):
+    """A field within a request body model."""
+    name: str
+    type_annotation: str | None = None
+    required: bool = True
+    is_complex: bool = Field(
+        default=False,
+        description="True when the type is another model/schema rather than a primitive",
+    )
+    nested_model: str | None = Field(
+        default=None,
+        description="Name of the nested schema (present when is_complex=True)",
+    )
+    constraints: dict[str, Any] = Field(default_factory=dict)
+class HttpBodyModel(BaseModel):
+    """HTTP request body definition."""
+    content_type: str | None = None  # application/json, multipart/form-data, etc.
+    model_name: str | None = None  # Pydantic model, Java DTO class
+    model_fields: list[BodyFieldModel] = Field(default_factory=list)
+    required: bool = True
+class SchemaFieldModel(BaseModel):
+    """A field within a schema definition."""
+    name: str
+    type_annotation: str | None = None
+    required: bool = True
+    default_value: str | None = None
+    is_complex: bool = False
+    nested_model: str | None = None
+    alias: str | None = None
+    constraints: dict[str, Any] = Field(default_factory=dict)
+class SchemaModel(BaseModel):
+    """A data model / schema referenced by route bodies or other schemas."""
+    name: str
+    qualified_name: str | None = None
+    fields: list[SchemaFieldModel] = Field(default_factory=list)
+    base_classes: list[str] = Field(default_factory=list)
+    is_enum: bool = False
+    usage: list[str] = Field(
+        default_factory=list,
+        description="How this schema is used: 'request_body', 'response', 'orm', 'domain', 'config'",
+    )
+# =============================================================================
+# Entry Points (Routes)
+# =============================================================================
+class RouteModel(BaseModel):
+    """Entry point definition (HTTP route, CLI command, task, message consumer, etc.)."""
+    id: str = Field(description="Unique identifier (ep-001, ep-002, etc.)")
+    kind: str = Field(
+        default="http",
+        description=(
+            "Transport category for this entry point. Values are language-agnostic: "
+            "http, cli, task, consumer, webhook, event, lifecycle, scheduled, grpc, websocket. "
+            "'lifecycle' covers FastAPI/Starlette @on_event('startup'|'shutdown') and similar "
+            "application-lifetime hooks that cannot receive external requests."
+        ),
+    )
+    method: str = Field(
+        default="",
+        description="HTTP method (GET, POST, …) for http kind; empty or trigger type for non-http kinds",
+    )
+    path: str = Field(
+        default="",
+        description=(
+            "URL path pattern (http), command path (cli), task name (task), queue/topic (consumer). "
+            "GraphQL operations use the internal convention /graphql:<OperationType>.<fieldName> "
+            "(e.g. /graphql:Query.users, /graphql:Mutation.createUser). This is NOT a real HTTP path — "
+            "downstream consumers (reasoning engine, DAST) must translate it to a GraphQL request: "
+            'POST /graphql with body {"query": "{ users { ... } }"}. '
+            "Query → GET semantics, Mutation/Subscription → POST semantics."
+        ),
+    )
+    path_params: list[HttpParameterModel] = Field(default_factory=list)
+    query_params: list[HttpParameterModel] = Field(default_factory=list)
+    header_params: list[HttpParameterModel] = Field(default_factory=list)
+    cookie_params: list[HttpParameterModel] = Field(default_factory=list)
+    body: HttpBodyModel | None = None
+    # Handler
+    handler_function: str = Field(description="Qualified name of handler function")
+    handler_location: LocationModel
+    # Framework metadata
+    framework: str
+    router_name: str | None = None
+    tags: list[str] = Field(default_factory=list)
+    operation_id: str | None = None
+    summary: str | None = None
+    deprecated: bool = False
+    # Dependencies (FastAPI Depends, Spring @Autowired, etc.)
+    dependencies: list[str] = Field(
+        default_factory=list,
+        description="IDs of dependency definitions used by this route",
+    )
+    # Response
+    response_model: str | None = None
+    response_status_code: int = 200
+    # Auth reference (populated after auth analysis)
+    auth_config_ref: str | None = None
+# =============================================================================
+# Function Calls
+# =============================================================================
+class ArgumentModel(BaseModel):
+    """Argument passed to a function call."""
+    position: int | None = None
+    name: str | None = None
+    value_type: str = Field(description="Type of value: literal, variable, expression, call_result")
+    literal_value: str | None = None
+    variable_name: str | None = None
+    expression: str | None = None
+    # Data flow reference (if this argument is tracked)
+    origin_flow_refs: list[str] = Field(
+        default_factory=list,
+        description="IDs of data flows that reach this argument",
+    )
+class CallContextModel(BaseModel):
+    """Structural context about where a call occurs."""
+    in_try_block: bool = False
+    in_conditional: bool = False
+    in_loop: bool = False
+class FunctionCallModel(BaseModel):
+    """A function call site."""
+    id: str = Field(description="Unique identifier")
+    caller: str = Field(description="Qualified name of calling function")
+    callee: str = Field(description="Qualified name of called function (or best guess)")
+    callee_resolved: bool = Field(
+        default=True,
+        description="Whether we could resolve the exact callee",
+    )
+    location: LocationModel
+    arguments: list[ArgumentModel] = Field(default_factory=list)
+    context: CallContextModel = Field(default_factory=CallContextModel)
+    # If callee couldn't be resolved, possible candidates
+    possible_callees: list[str] = Field(default_factory=list)
+    sequence_index: int | None = Field(
+        default=None,
+        description="Monotonic index of this call within the containing function (0-based)",
+    )
+# =============================================================================
+# Data Flows
+# =============================================================================
+class DataOriginModel(BaseModel):
+    """Where data originates."""
+    type: str = Field(description="Origin type (HTTP_PATH_PARAM, etc.)")
+    name: str | None = Field(default=None, description="Parameter/variable name")
+    location: LocationModel
+    # Reference to entry point if this is HTTP data
+    entry_point_ref: str | None = None
+    # Additional context
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class DataSinkModel(BaseModel):
+    """Where data ends up."""
+    function: str = Field(description="Qualified name of function receiving data")
+    argument_index: int | None = None
+    argument_name: str | None = None
+    location: LocationModel
+    # Reference to call site
+    call_ref: str | None = None
+class FlowStepModel(BaseModel):
+    """A step in a data flow path."""
+    depth: int
+    caller: str
+    callee: str
+    argument_mapping: dict[str, str] = Field(
+        default_factory=dict,
+        description="How arguments map: {'input_param': 'callee_param'}",
+    )
+    location: LocationModel
+class TransformationModel(BaseModel):
+    """A transformation applied to data along a flow path."""
+    depth: int
+    location: LocationModel
+    type: str = Field(
+        description="Transformation type: function_call, method_call, string_format, type_constructor, etc."
+    )
+    description: str | None = None
+    function: str | None = Field(
+        default=None,
+        description="Qualified name of the function/method that performed the transformation",
+    )
+    call_evidence: CallSiteEvidenceModel | None = Field(
+        default=None,
+        description="Structural evidence about the transformation call site (co-arguments, keyword args)",
+    )
+# =============================================================================
+# Call-Site Evidence (shared across transformations and sinks)
+# =============================================================================
+class ArgumentConstruction(BaseModel):
+    """How a specific argument value is constructed."""
+    method: str = Field(
+        description=(
+            "Construction method (language-neutral): direct, string_interpolation, "
+            "string_concatenation, format_call, call_wrapped, spread, collection"
+        ),
+    )
+    container_type: str | None = Field(
+        default=None,
+        description="If the argument is a container literal: list, tuple, dict, set, array, map",
+    )
+class ArgumentEvidenceModel(BaseModel):
+    """Structural evidence about a single argument at a call site."""
+    position: int | None = Field(
+        default=None, description="Positional index (None for keyword-only)"
+    )
+    name: str | None = Field(default=None, description="Keyword argument name")
+    is_literal: bool = False
+    literal_value: str | None = Field(
+        default=None, description="String repr of literal value (truncated if long)"
+    )
+    literal_type: str | None = Field(
+        default=None, description="Type of literal: str, int, float, bool, None, list, dict, tuple"
+    )
+    is_variable: bool = False
+    variable_name: str | None = None
+    is_call_result: bool = False
+    called_function: str | None = None
+    construction: str = Field(
+        default="direct",
+        description=(
+            "How the argument value is constructed (language-neutral): "
+            "direct, string_interpolation, string_concatenation, "
+            "format_call, call_wrapped, spread, collection"
+        ),
+    )
+    container_type: str | None = Field(
+        default=None,
+        description="If the argument is a container literal: list, tuple, dict, set, array, map",
+    )
+    source_variables: list[str] = Field(
+        default_factory=list,
+        description="All variable names referenced in this argument expression",
+    )
+    expression_text: str | None = Field(
+        default=None,
+        description="Source text of non-literal, non-variable expressions (truncated at 500 chars)",
+    )
+    is_tainted: bool = Field(
+        default=False,
+        description="Whether this argument carries tainted data (set during data flow analysis)",
+    )
+class StringPatternModel(BaseModel):
+    """A recognised string pattern in a literal argument."""
+    type: str = Field(
+        description="Pattern type: sql_placeholder, sql_keyword, url_scheme, template_syntax, shell_metachar, html_tag, path_traversal"
+    )
+    pattern: str = Field(description="The matched substring or pattern")
+    argument_position: int | None = Field(
+        default=None, description="Which argument the pattern was found in"
+    )
+class CallSiteEvidenceModel(BaseModel):
+    """
+    Structural evidence about a function call site.
+    Reusable across:
+    - Sink evidence (how tainted data arrives at the destination)
+    - Transformation evidence (how a transformation call was configured)
+    """
+    tainted_argument_position: int | None = Field(
+        default=None,
+        description="Position of the argument carrying tainted data",
+    )
+    tainted_argument_name: str | None = Field(
+        default=None,
+        description="Keyword name of the argument carrying tainted data",
+    )
+    tainted_argument_construction: str | None = Field(
+        default=None,
+        description=(
+            "How the tainted argument is constructed (language-neutral): direct, "
+            "string_interpolation, string_concatenation, format_call, call_wrapped, spread"
+        ),
+    )
+    all_arguments: list[ArgumentEvidenceModel] = Field(
+        default_factory=list,
+        description="Evidence for every argument at the call site",
+    )
+    string_patterns: list[StringPatternModel] = Field(
+        default_factory=list,
+        description="String patterns detected in literal arguments (SQL placeholders, URL schemes, etc.)",
+    )
+class DataFlowModel(BaseModel):
+    """
+    A tracked data flow from origin to sink.
+    This is the core unit of taint tracking. Each flow represents
+    data moving from an entry point (origin) to a function call (sink).
+    """
+    id: str = Field(description="Unique identifier (flow-001, etc.)")
+    origin: DataOriginModel
+    sink: DataSinkModel
+    path: list[FlowStepModel] = Field(
+        default_factory=list,
+        description="Function call chain from origin to sink",
+    )
+    depth: int = Field(description="Number of function boundaries crossed")
+    truncated: bool = Field(
+        default=False,
+        description="True if flow was cut off at max depth",
+    )
+    transformations: list[TransformationModel] = Field(
+        default_factory=list,
+        description="Transformations applied to data along the path",
+    )
+    sink_evidence: CallSiteEvidenceModel | None = Field(
+        default=None,
+        description="Structural evidence about how data is used at the sink call site",
+    )
+    unresolved_calls: list[str] = Field(
+        default_factory=list,
+        description="Calls in path where callee couldn't be resolved",
+    )
+    context: CallContextModel = Field(
+        default_factory=CallContextModel,
+        description="Execution context at the sink",
+    )
+    confidence: str = Field(
+        default="HIGH",
+        description="Confidence in this flow: HIGH, MEDIUM, LOW",
+    )
+# =============================================================================
+# Authentication
+# =============================================================================
+class AuthSchemeModel(BaseModel):
+    """An authentication scheme definition."""
+    id: str
+    type: str = Field(description="Auth scheme type (OAUTH2_PASSWORD, API_KEY_HEADER, etc.)")
+    framework: str
+    location: LocationModel
+    config: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Scheme-specific configuration",
+    )
+class AuthDependencyModel(BaseModel):
+    """An authentication dependency/guard."""
+    id: str
+    name: str
+    type: str = Field(description="FUNCTION, CLASS, DECORATOR, ANNOTATION, etc.")
+    location: LocationModel
+    # What scheme(s) does this use?
+    uses_schemes: list[str] = Field(default_factory=list, description="Auth scheme IDs")
+    # What other dependencies does this depend on?
+    depends_on: list[str] = Field(default_factory=list, description="Other dependency IDs")
+    # What does this extract/validate?
+    extracts: list[str] = Field(default_factory=list, description="Fields extracted")
+    validates: list[str] = Field(default_factory=list, description="Validations performed")
+    # Role/permission requirements
+    requires_roles: list[str] = Field(default_factory=list)
+    requires_scopes: list[str] = Field(default_factory=list)
+    requires_permissions: list[str] = Field(default_factory=list)
+    # JWT-specific
+    jwt_operations: list[str] = Field(
+        default_factory=list, description="decode, validate_exp, etc."
+    )
+class RouteAuthModel(BaseModel):
+    """Authentication configuration for a route."""
+    route_id: str
+    auth_required: bool
+    mechanisms: list[str] = Field(
+        default_factory=list,
+        description="Auth dependency IDs protecting this route",
+    )
+    scopes_required: list[str] = Field(default_factory=list)
+    roles_required: list[str] = Field(default_factory=list)
+    permissions_required: list[str] = Field(default_factory=list)
+    notes: list[str] = Field(default_factory=list)
+class JwtConfigModel(BaseModel):
+    """JWT configuration detected in the codebase."""
+    detected: bool = False
+    library: str | None = None
+    locations: list[LocationModel] = Field(default_factory=list)
+    algorithms: list[str] = Field(default_factory=list)
+    validations: dict[str, bool] = Field(
+        default_factory=dict,
+        description="What is validated: signature, expiry, issuer, audience",
+    )
+    secret_source: str | None = None  # environment_variable, config_file, hardcoded
+    secret_name: str | None = None
+class MultiAuthFlowModel(BaseModel):
+    """Multi-authentication flow (OR/AND combinations)."""
+    id: str
+    routes: list[str] = Field(description="Route IDs this applies to")
+    mechanisms: list[str] = Field(description="Auth scheme/dependency IDs")
+    logic: str = Field(description="OR, AND")
+    description: str | None = None
+class SecurityConfigModel(BaseModel):
+    """Security configuration detected."""
+    cors: dict[str, Any] | None = None
+    csrf: dict[str, Any] | None = None
+    rate_limiting: dict[str, Any] | None = None
+class AuthModel(BaseModel):
+    """Complete authentication analysis."""
+    schemes_detected: list[AuthSchemeModel] = Field(default_factory=list)
+    auth_dependencies: list[AuthDependencyModel] = Field(default_factory=list)
+    route_auth_mapping: list[RouteAuthModel] = Field(default_factory=list)
+    global_middleware_auth: bool = Field(
+        default=False,
+        description=(
+            "True when a globally-applied middleware (e.g. add_middleware(AuthMiddleware)) "
+            "performs authentication for every route. Lets the engine's missing_auth "
+            "rule avoid false positives on middleware-protected apps."
+        ),
+    )
+    jwt_config: JwtConfigModel = Field(default_factory=JwtConfigModel)
+    multi_auth_flows: list[MultiAuthFlowModel] = Field(default_factory=list)
+    role_definitions: list[dict[str, Any]] = Field(default_factory=list)
+    security_config: SecurityConfigModel = Field(default_factory=SecurityConfigModel)
+# =============================================================================
+# Dependencies and Imports
+# =============================================================================
+class PackageDependencyModel(BaseModel):
+    """External package dependency."""
+    name: str
+    version: str | None = None
+    version_constraint: str | None = None
+    ecosystem: str | None = Field(
+        default=None,
+        description="OSV ecosystem: PyPI, npm, Go, crates.io, NuGet, Maven",
+    )
+    is_dev: bool = False
+    source_file: str = Field(description="requirements.txt, pyproject.toml, pom.xml, etc.")
+class ImportModel(BaseModel):
+    """Import statement."""
+    module: str
+    names: list[str] = Field(default_factory=list, description="Imported names")
+    alias: str | None = None
+    is_relative: bool = False
+    location: LocationModel
+class DependenciesModel(BaseModel):
+    """Dependencies analysis."""
+    packages: list[PackageDependencyModel] = Field(default_factory=list)
+    internal_imports: list[ImportModel] = Field(default_factory=list)
+# =============================================================================
+# Middleware and Interceptors
+# =============================================================================
+class MiddlewareModel(BaseModel):
+    """Middleware/interceptor definition."""
+    id: str
+    name: str
+    type: str = Field(description="middleware, filter, interceptor")
+    location: LocationModel
+    order: int | None = None
+    applies_to: list[str] = Field(
+        default_factory=list,
+        description="Route patterns or 'all'",
+    )
+    # What the middleware does (detected patterns)
+    operations: list[str] = Field(
+        default_factory=list,
+        description="auth, logging, cors, etc.",
+    )
+# =============================================================================
+# Integrations and Literals
+# =============================================================================
+class IntegrationTargetModel(BaseModel):
+    """
+    A concrete outbound call target extracted from an HTTP client call site.
+    Captures the destination URL/hostname that the application communicates
+    with at runtime.  Used downstream by the reasoning engine to:
+    - Build inter-service dependency graphs (cross-manifest correlation).
+    - Detect SSRF risk (user-controlled URL components).
+    - Scope DAST testing to known outbound surfaces.
+    """
+    base_url: str = Field(
+        description="Scheme + host (+ port if non-standard), e.g. 'https://api.stripe.com'."
+    )
+    path_pattern: str = Field(
+        default="",
+        description="Path portion, e.g. '/v1/charges'. Empty when only the host is known.",
+    )
+    http_method: str = Field(
+        default="",
+        description="HTTP verb inferred from the call (GET, POST, …). Empty when unknown.",
+    )
+    is_literal: bool = Field(
+        default=True,
+        description="True when the URL was a string literal; False for variable / f-string.",
+    )
+    called_from: LocationModel | None = Field(
+        default=None,
+        description="File, line, and function where this outbound call was made.",
+    )
+class IntegrationModel(BaseModel):
+    """External integration detected."""
+    id: str
+    type: str = Field(description="database, http_client, message_queue, cloud_service, etc.")
+    name: str = Field(description="PostgreSQL, Redis, AWS S3, Stripe, etc.")
+    locations: list[LocationModel] = Field(default_factory=list)
+    detection_method: str = Field(description="import, connection_string, sdk_usage")
+    confidence: str = "HIGH"
+    sensitivity_labels: list[str] = Field(
+        default_factory=list,
+        description="Sensitivity classifications: PII, FINANCIAL, AUTH_CREDENTIAL, INTERNAL, GENERAL",
+    )
+    targets: list[IntegrationTargetModel] = Field(
+        default_factory=list,
+        description="Concrete outbound call targets extracted from HTTP client call sites.",
+    )
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class LiteralPatternModel(BaseModel):
+    """Interesting literal pattern found."""
+    type: str = Field(description="url, sql_pattern, secret_pattern, etc.")
+    value: str | None = Field(
+        default=None, description="The literal value (sanitized if sensitive)"
+    )
+    pattern: str | None = Field(default=None, description="Pattern description")
+    location: LocationModel
+    confidence: str = "HIGH"
+    notes: str | None = None
+class LiteralsModel(BaseModel):
+    """Literal patterns found."""
+    urls: list[LiteralPatternModel] = Field(default_factory=list)
+    sql_patterns: list[LiteralPatternModel] = Field(default_factory=list)
+    secret_patterns: list[LiteralPatternModel] = Field(default_factory=list)
+# =============================================================================
+# Capability Tagging
+# =============================================================================
+class CapabilityModel(BaseModel):
+    """Inferred business capability."""
+    tag: str = Field(description="PAYMENT, USER_AUTH, FILE_STORAGE, DATA_PERSISTENCE, etc.")
+    evidence: list[str] = Field(
+        default_factory=list,
+        description="Integration/route IDs that support this inference",
+    )
+    confidence: str = "MEDIUM"
+# =============================================================================
+# Configuration Detection
+# =============================================================================
+class EnvVarUsageModel(BaseModel):
+    """Environment variable usage."""
+    name: str
+    locations: list[LocationModel] = Field(default_factory=list)
+    default_value: str | None = None
+class ConfigFileModel(BaseModel):
+    """Configuration file detected."""
+    path: str
+    type: str = Field(description="yaml, json, properties, env, etc.")
+class ConfigurationModel(BaseModel):
+    """Configuration analysis."""
+    env_vars_used: list[EnvVarUsageModel] = Field(default_factory=list)
+    config_files: list[ConfigFileModel] = Field(default_factory=list)
+# =============================================================================
+# Analysis Metadata
+# =============================================================================
+class AnalysisMetadataModel(BaseModel):
+    """Metadata about the analysis process."""
+    data_flow_mode: str = "inter_procedural"
+    data_flow_depth: int = 10
+    truncated_flows: int = 0
+    unresolved_calls: int = 0
+    parse_errors: list[dict[str, Any]] = Field(default_factory=list)
+    warnings: list[str] = Field(default_factory=list)
+# =============================================================================
+# Complete Manifest
+# =============================================================================
+class Manifest(BaseModel):
+    """
+    Complete manifest output from the Probe.
+    This is the primary output - a comprehensive representation of
+    all extracted facts about the codebase.
+    """
+    manifest_version: str = MANIFEST_VERSION
+    generated_at: datetime = Field(default_factory=datetime.utcnow)
+    probe_version: str = Field(description="Version of the probe that generated this")
+    project: ProjectMetadata
+    entry_points: list[RouteModel] = Field(default_factory=list)
+    functions: list[FunctionModel] = Field(default_factory=list)
+    classes: list[ClassModel] = Field(default_factory=list)
+    calls: list[FunctionCallModel] = Field(default_factory=list)
+    data_flows: list[DataFlowModel] = Field(default_factory=list)
+    auth: AuthModel = Field(default_factory=AuthModel)
+    dependencies: DependenciesModel = Field(default_factory=DependenciesModel)
+    middleware: list[MiddlewareModel] = Field(default_factory=list)
+    integrations: list[IntegrationModel] = Field(default_factory=list)
+    capabilities: list[CapabilityModel] = Field(default_factory=list)
+    schemas: dict[str, SchemaModel] = Field(
+        default_factory=dict,
+        description="Data model definitions referenced by route bodies, keyed by model name",
+    )
+    literals: LiteralsModel = Field(default_factory=LiteralsModel)
+    configuration: ConfigurationModel = Field(default_factory=ConfigurationModel)
+    analysis_metadata: AnalysisMetadataModel = Field(default_factory=AnalysisMetadataModel)
+    def to_json(self, pretty: bool = False) -> str:
+        """Serialize manifest to JSON."""
+        return self.model_dump_json(indent=2 if pretty else None)
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize manifest to dictionary."""
+        return self.model_dump(mode="json")