PyPI - airbyte-agent-greenhouse - Versions diffs - 0.17.48__py3-none-any.whl - Mend

airbyte-agent-greenhouse 0.17.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

airbyte_agent_greenhouse/_vendored/connector_sdk/introspection.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""
+Shared introspection utilities for connector metadata.
+This module provides utilities for introspecting connector metadata,
+generating descriptions, and formatting parameter signatures. These
+functions are used by both the runtime decorators and the generated
+connector code.
+The module is designed to work with any object conforming to the
+ConnectorModel and EndpointDefinition interfaces from connector_sdk.types.
+"""
+from __future__ import annotations
+from typing import Any, Protocol
+# Constants
+MAX_EXAMPLE_QUESTIONS = 5  # Maximum number of example questions to include in description
+class EndpointProtocol(Protocol):
+    """Protocol defining the expected interface for endpoint parameters.
+    This allows functions to work with any endpoint-like object
+    that has these attributes, including EndpointDefinition and mock objects.
+    """
+    path_params: list[str]
+    path_params_schema: dict[str, dict[str, Any]]
+    query_params: list[str]
+    query_params_schema: dict[str, dict[str, Any]]
+    body_fields: list[str]
+    request_schema: dict[str, Any] | None
+class EntityProtocol(Protocol):
+    """Protocol defining the expected interface for entity definitions."""
+    name: str
+    actions: list[Any]
+    endpoints: dict[Any, EndpointProtocol]
+class ConnectorModelProtocol(Protocol):
+    """Protocol defining the expected interface for connector model parameters.
+    This allows functions to work with any connector-like object
+    that has these attributes, including ConnectorModel and mock objects.
+    """
+    @property
+    def entities(self) -> list[EntityProtocol]: ...
+    @property
+    def openapi_spec(self) -> Any: ...
+def format_param_signature(endpoint: EndpointProtocol) -> str:
+    """Format parameter signature for an endpoint action.
+    Returns a string like: (id*) or (limit?, starting_after?, email?)
+    where * = required, ? = optional
+    Args:
+        endpoint: Object conforming to EndpointProtocol (e.g., EndpointDefinition)
+    Returns:
+        Formatted parameter signature string
+    """
+    params = []
+    # Defensive: safely access attributes with defaults for malformed endpoints
+    path_params = getattr(endpoint, "path_params", []) or []
+    query_params = getattr(endpoint, "query_params", []) or []
+    query_params_schema = getattr(endpoint, "query_params_schema", {}) or {}
+    body_fields = getattr(endpoint, "body_fields", []) or []
+    request_schema = getattr(endpoint, "request_schema", None)
+    # Path params (always required)
+    for name in path_params:
+        params.append(f"{name}*")
+    # Query params
+    for name in query_params:
+        schema = query_params_schema.get(name, {})
+        required = schema.get("required", False)
+        params.append(f"{name}{'*' if required else '?'}")
+    # Body fields
+    if request_schema:
+        required_fields = set(request_schema.get("required", []))
+        for name in body_fields:
+            params.append(f"{name}{'*' if name in required_fields else '?'}")
+    return f"({', '.join(params)})" if params else "()"
+def describe_entities(model: ConnectorModelProtocol) -> list[dict[str, Any]]:
+    """Generate entity descriptions from ConnectorModel.
+    Returns a list of entity descriptions with detailed parameter information
+    for each action. This is used by generated connectors' describe() method.
+    Args:
+        model: Object conforming to ConnectorModelProtocol (e.g., ConnectorModel)
+    Returns:
+        List of entity description dicts with keys:
+        - entity_name: Name of the entity (e.g., "contacts", "deals")
+        - description: Entity description from the first endpoint
+        - available_actions: List of actions (e.g., ["list", "get", "create"])
+        - parameters: Dict mapping action -> list of parameter dicts
+    """
+    entities = []
+    for entity_def in model.entities:
+        description = ""
+        parameters: dict[str, list[dict[str, Any]]] = {}
+        endpoints = getattr(entity_def, "endpoints", {}) or {}
+        if endpoints:
+            for action, endpoint in endpoints.items():
+                # Get description from first endpoint that has one
+                if not description:
+                    endpoint_desc = getattr(endpoint, "description", None)
+                    if endpoint_desc:
+                        description = endpoint_desc
+                action_params: list[dict[str, Any]] = []
+                # Defensive: safely access endpoint attributes
+                path_params = getattr(endpoint, "path_params", []) or []
+                path_params_schema = getattr(endpoint, "path_params_schema", {}) or {}
+                query_params = getattr(endpoint, "query_params", []) or []
+                query_params_schema = getattr(endpoint, "query_params_schema", {}) or {}
+                body_fields = getattr(endpoint, "body_fields", []) or []
+                request_schema = getattr(endpoint, "request_schema", None)
+                # Path params (always required)
+                for param_name in path_params:
+                    schema = path_params_schema.get(param_name, {})
+                    action_params.append(
+                        {
+                            "name": param_name,
+                            "in": "path",
+                            "required": True,
+                            "type": schema.get("type", "string"),
+                            "description": schema.get("description", ""),
+                        }
+                    )
+                # Query params
+                for param_name in query_params:
+                    schema = query_params_schema.get(param_name, {})
+                    action_params.append(
+                        {
+                            "name": param_name,
+                            "in": "query",
+                            "required": schema.get("required", False),
+                            "type": schema.get("type", "string"),
+                            "description": schema.get("description", ""),
+                        }
+                    )
+                # Body fields
+                if request_schema:
+                    required_fields = request_schema.get("required", [])
+                    properties = request_schema.get("properties", {})
+                    for param_name in body_fields:
+                        prop = properties.get(param_name, {})
+                        action_params.append(
+                            {
+                                "name": param_name,
+                                "in": "body",
+                                "required": param_name in required_fields,
+                                "type": prop.get("type", "string"),
+                                "description": prop.get("description", ""),
+                            }
+                        )
+                if action_params:
+                    # Action is an enum, use .value to get string
+                    action_key = action.value if hasattr(action, "value") else str(action)
+                    parameters[action_key] = action_params
+        actions = getattr(entity_def, "actions", []) or []
+        entities.append(
+            {
+                "entity_name": entity_def.name,
+                "description": description,
+                "available_actions": [a.value if hasattr(a, "value") else str(a) for a in actions],
+                "parameters": parameters,
+            }
+        )
+    return entities
+def generate_tool_description(model: ConnectorModelProtocol) -> str:
+    """Generate AI tool description from connector metadata.
+    Produces a detailed description that includes:
+    - Per-entity/action parameter signatures with required (*) and optional (?) markers
+    - Response structure documentation with pagination hints
+    - Example questions if available in the OpenAPI spec
+    This is used by the Connector.describe class method decorator to populate
+    function docstrings for AI framework integration.
+    Args:
+        model: Object conforming to ConnectorModelProtocol (e.g., ConnectorModel)
+    Returns:
+        Formatted description string suitable for AI tool documentation
+    """
+    lines = []
+    # Entity/action parameter details (including pagination params like limit, starting_after)
+    lines.append("ENTITIES AND PARAMETERS:")
+    for entity in model.entities:
+        lines.append(f"  {entity.name}:")
+        actions = getattr(entity, "actions", []) or []
+        endpoints = getattr(entity, "endpoints", {}) or {}
+        for action in actions:
+            action_str = action.value if hasattr(action, "value") else str(action)
+            endpoint = endpoints.get(action)
+            if endpoint:
+                param_sig = format_param_signature(endpoint)
+                lines.append(f"    - {action_str}{param_sig}")
+            else:
+                lines.append(f"    - {action_str}()")
+    # Response structure (brief, includes pagination hint)
+    lines.append("")
+    lines.append("RESPONSE STRUCTURE:")
+    lines.append("  - list/api_search: {data: [...], meta: {has_more: bool}}")
+    lines.append("  - get: Returns entity directly (no envelope)")
+    lines.append("  To paginate: pass starting_after=<last_id> while has_more is true")
+    # Add example questions if available in openapi_spec
+    openapi_spec = getattr(model, "openapi_spec", None)
+    if openapi_spec:
+        info = getattr(openapi_spec, "info", None)
+        if info:
+            example_questions = getattr(info, "x_airbyte_example_questions", None)
+            if example_questions:
+                supported = getattr(example_questions, "supported", None)
+                if supported:
+                    lines.append("")
+                    lines.append("EXAMPLE QUESTIONS:")
+                    for q in supported[:MAX_EXAMPLE_QUESTIONS]:
+                        lines.append(f"  - {q}")
+    # Generic parameter description for function signature
+    lines.append("")
+    lines.append("FUNCTION PARAMETERS:")
+    lines.append("  - entity: Entity name (string)")
+    lines.append("  - action: Operation to perform (string)")
+    lines.append("  - params: Operation parameters (dict) - see entity details above")
+    lines.append("")
+    lines.append("Parameter markers: * = required, ? = optional")
+    return "\n".join(lines)

airbyte_agent_greenhouse/_vendored/connector_sdk/logging/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Request/response logging for Airbyte SDK."""
+from .logger import NullLogger, RequestLogger
+from .types import LogSession, RequestLog
+__all__ = [
+    "RequestLogger",
+    "NullLogger",
+    "RequestLog",
+    "LogSession",
+]

airbyte_agent_greenhouse/_vendored/connector_sdk/logging/logger.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""Request/response logging implementation."""
+import base64
+import json
+import time
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Set
+from .types import LogSession, RequestLog
+# Headers to redact for security
+SENSITIVE_HEADERS: Set[str] = {
+    "authorization",
+    "bearer",
+    "api-key",
+    "x-api-key",
+    "token",
+    "secret",
+    "password",
+    "credential",
+}
+class RequestLogger:
+    """Captures HTTP request/response interactions to a JSON file.
+    Implements bounded logging with automatic rotation and flush-before-discard
+    to prevent unbounded memory growth in long-running processes.
+    """
+    def __init__(
+        self,
+        log_file: str | None = None,
+        connector_name: str | None = None,
+        max_logs: int | None = 10000,
+    ):
+        """
+        Initialize the request logger.
+        Args:
+            log_file: Path to write logs. If None, generates timestamped filename.
+            connector_name: Name of the connector being logged.
+            max_logs: Maximum number of logs to keep in memory before rotation.
+                Set to None for unlimited (not recommended for production).
+                Defaults to 10000.
+        """
+        if log_file is None:
+            timestamp = time.strftime("%Y%m%d_%H%M%S")
+            log_file = f".logs/session_{timestamp}.json"
+        self.log_file = Path(log_file)
+        self.log_file.parent.mkdir(parents=True, exist_ok=True)
+        self.session = LogSession(
+            session_id=str(uuid.uuid4()),
+            connector_name=connector_name,
+            max_logs=max_logs,
+        )
+        self._active_requests: Dict[str, Dict[str, Any]] = {}
+        # Store rotated logs that have been flushed from active buffer
+        self._rotated_logs: list[RequestLog] = []
+    def _redact_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
+        """Redact sensitive headers."""
+        redacted = {}
+        for key, value in headers.items():
+            if any(sensitive in key.lower() for sensitive in SENSITIVE_HEADERS):
+                redacted[key] = "[REDACTED]"
+            else:
+                redacted[key] = value
+        return redacted
+    def _rotate_logs_if_needed(self) -> None:
+        """Rotate logs if max_logs limit is reached.
+        Moves oldest logs to _rotated_logs before removing them from active buffer.
+        This ensures logs are preserved for final save() without memory growth.
+        """
+        max_logs = self.session.max_logs
+        if max_logs is None:
+            # Unlimited logging, no rotation needed
+            return
+        current_count = len(self.session.logs)
+        if current_count >= max_logs:
+            # Calculate how many logs to rotate (keep buffer at ~90% to avoid thrashing)
+            num_to_rotate = max(1, current_count - int(max_logs * 0.9))
+            # Move oldest logs to rotated buffer
+            rotated = self.session.logs[:num_to_rotate]
+            self._rotated_logs.extend(rotated)
+            # Remove rotated logs from active buffer
+            self.session.logs = self.session.logs[num_to_rotate:]
+    def log_request(
+        self,
+        method: str,
+        url: str,
+        path: str,
+        headers: Dict[str, str] | None = None,
+        params: Dict[str, Any] | None = None,
+        body: Any | None = None,
+    ) -> str:
+        """
+        Log the start of an HTTP request.
+        Args:
+            method: HTTP method (GET, POST, etc.)
+            url: Full URL
+            path: Request path
+            headers: Request headers
+            params: Query parameters
+            body: Request body
+        Returns:
+            Request ID for correlating with response
+        """
+        request_id = str(uuid.uuid4())
+        self._active_requests[request_id] = {
+            "start_time": time.time(),
+            "method": method,
+            "url": url,
+            "path": path,
+            "headers": self._redact_headers(headers or {}),
+            "params": params,
+            "body": body,
+        }
+        return request_id
+    def log_response(
+        self,
+        request_id: str,
+        status_code: int,
+        response_body: Any | None = None,
+        response_headers: Dict[str, str] | None = None,
+    ) -> None:
+        """
+        Log a successful HTTP response.
+        Args:
+            request_id: ID returned from log_request
+            status_code: HTTP status code
+            response_body: Response body
+            response_headers: Response headers
+        """
+        if request_id not in self._active_requests:
+            return
+        request_data = self._active_requests.pop(request_id)
+        timing_ms = (time.time() - request_data["start_time"]) * 1000
+        # Convert bytes to base64 for JSON serialization
+        serializable_body = response_body
+        if isinstance(response_body, bytes):
+            serializable_body = {
+                "_binary": True,
+                "_base64": base64.b64encode(response_body).decode("utf-8"),
+            }
+        log_entry = RequestLog(
+            method=request_data["method"],
+            url=request_data["url"],
+            path=request_data["path"],
+            headers=request_data["headers"],
+            params=request_data["params"],
+            body=request_data["body"],
+            response_status=status_code,
+            response_body=serializable_body,
+            response_headers=response_headers or {},
+            timing_ms=timing_ms,
+        )
+        self.session.logs.append(log_entry)
+        self._rotate_logs_if_needed()
+    def log_error(
+        self,
+        request_id: str,
+        error: str,
+        status_code: int | None = None,
+    ) -> None:
+        """
+        Log an HTTP request error.
+        Args:
+            request_id: ID returned from log_request
+            error: Error message
+            status_code: HTTP status code if available
+        """
+        if request_id not in self._active_requests:
+            return
+        request_data = self._active_requests.pop(request_id)
+        timing_ms = (time.time() - request_data["start_time"]) * 1000
+        log_entry = RequestLog(
+            method=request_data["method"],
+            url=request_data["url"],
+            path=request_data["path"],
+            headers=request_data["headers"],
+            params=request_data["params"],
+            body=request_data["body"],
+            response_status=status_code,
+            timing_ms=timing_ms,
+            error=error,
+        )
+        self.session.logs.append(log_entry)
+        self._rotate_logs_if_needed()
+    def log_chunk_fetch(self, chunk: bytes) -> None:
+        """Log a chunk from streaming response.
+        Args:
+            chunk: Binary chunk data from streaming response
+        """
+        self.session.chunk_logs.append(chunk)
+    def save(self) -> None:
+        """Write the current session to the log file.
+        Includes both rotated logs and current active logs to ensure
+        no data loss during bounded logging.
+        """
+        # Combine rotated logs with current logs for complete session
+        all_logs = self._rotated_logs + self.session.logs
+        # Create a temporary session with all logs for serialization
+        session_data = self.session.model_dump(mode="json")
+        session_data["logs"] = [log.model_dump(mode="json") for log in all_logs]
+        with open(self.log_file, "w") as f:
+            json.dump(session_data, f, indent=2, default=str)
+    def close(self) -> None:
+        """Finalize and save the logging session."""
+        self.save()
+class NullLogger:
+    """No-op logger for when logging is disabled."""
+    def log_request(self, *args, **kwargs) -> str:
+        """No-op log_request."""
+        return ""
+    def log_response(
+        self,
+        request_id: str,
+        status_code: int,
+        response_body: Any | None = None,
+        response_headers: Dict[str, str] | None = None,
+    ) -> None:
+        """No-op log_response."""
+        pass
+    def log_error(self, *args, **kwargs) -> None:
+        """No-op log_error."""
+        pass
+    def log_chunk_fetch(self, chunk: bytes) -> None:
+        """No-op chunk logging for production."""
+        pass
+    def save(self) -> None:
+        """No-op save."""
+        pass
+    def close(self) -> None:
+        """No-op close."""
+        pass

airbyte_agent_greenhouse/_vendored/connector_sdk/logging/types.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Type definitions for request/response logging."""
+import base64
+from datetime import UTC, datetime
+from typing import Any, Dict, List
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
+def _utc_now() -> datetime:
+    """Get current UTC datetime (timezone-aware)."""
+    return datetime.now(UTC)
+def _encode_bytes(v: bytes) -> dict:
+    """Encode bytes as base64 for JSON serialization."""
+    return {"_binary": True, "_base64": base64.b64encode(v).decode("utf-8")}
+class RequestLog(BaseModel):
+    """Captures a single HTTP request/response interaction."""
+    model_config = ConfigDict()
+    timestamp: datetime = Field(default_factory=_utc_now)
+    method: str
+    url: str
+    path: str
+    headers: Dict[str, str] = Field(default_factory=dict)
+    params: Dict[str, Any] | None = None
+    body: Any | None = None
+    response_status: int | None = None
+    response_body: Any | None = None
+    response_headers: Dict[str, str] = Field(default_factory=dict)
+    timing_ms: float | None = None
+    error: str | None = None
+    @field_serializer("timestamp")
+    def serialize_datetime(self, value: datetime) -> str:
+        return value.isoformat()
+class LogSession(BaseModel):
+    """Collection of request logs with session metadata.
+    When max_logs is set, the session will maintain a bounded buffer of recent logs.
+    Older logs should be flushed to disk before being discarded (handled by RequestLogger).
+    """
+    model_config = ConfigDict()
+    session_id: str
+    started_at: datetime = Field(default_factory=_utc_now)
+    connector_name: str | None = None
+    logs: List[RequestLog] = Field(default_factory=list)
+    max_logs: int | None = Field(
+        default=10000,
+        description="Maximum number of logs to keep in memory. "
+        "When limit is reached, oldest logs should be flushed before removal. "
+        "Set to None for unlimited (not recommended for production).",
+    )
+    chunk_logs: List[bytes] = Field(
+        default_factory=list,
+        description="Captured chunks from streaming responses. Each chunk is logged when log_chunk_fetch() is called.",
+    )
+    @field_validator("chunk_logs", mode="before")
+    @classmethod
+    def decode_chunk_logs(cls, v: Any) -> List[bytes]:
+        """Decode chunk_logs from JSON representation back to bytes."""
+        if v is None or v == []:
+            return []
+        if isinstance(v, list):
+            result = []
+            for item in v:
+                if isinstance(item, bytes):
+                    result.append(item)
+                elif isinstance(item, dict) and item.get("_binary"):
+                    # Decode from {"_binary": True, "_base64": "..."} format
+                    result.append(base64.b64decode(item["_base64"]))
+                else:
+                    result.append(item)
+            return result
+        return v
+    @field_serializer("started_at")
+    def serialize_datetime(self, value: datetime) -> str:
+        return value.isoformat()
+    @field_serializer("chunk_logs")
+    def serialize_chunk_logs(self, value: List[bytes]) -> List[dict]:
+        """Serialize bytes chunks as base64 for JSON."""
+        return [_encode_bytes(chunk) for chunk in value]

airbyte_agent_greenhouse/_vendored/connector_sdk/observability/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Shared observability components for logging and telemetry."""
+from .models import OperationMetadata
+from .redactor import DataRedactor
+from .session import ObservabilitySession
+__all__ = [
+    "DataRedactor",
+    "ObservabilitySession",
+    "OperationMetadata",
+]