PyPI - codeastra - Versions diffs - 1.0.0__tar.gz → 1.1.0__tar.gz - Mend

codeastra 1.0.0tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{codeastra-1.0.0 → codeastra-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,14 @@
 Metadata-Version: 2.4
 Name: codeastra
-Version: 1.0.0
+Version: 1.1.0
 Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
-License: MIT
+License-Expression: MIT
 Project-URL: Homepage, https://codeastra.dev
 Project-URL: Documentation, https://docs.codeastra.dev
 Project-URL: Repository, https://github.com/codeastra/codeastra-python
 Keywords: ai,agents,langchain,crewai,privacy,hipaa,security,tokenization
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11

codeastra-1.1.0/codeastra/client.py ADDED Viewed

@@ -0,0 +1,414 @@
+"""
+CodeAstraClient — full-featured async/sync HTTP client for the Codeastra API.
+New in v1.1.0:
+  - mode="cloud"   — default, uses app.codeastra.dev
+  - mode="onprem"  — pulls deployment package, runs vault locally
+  - mode="hybrid"  — local vault + cloud LLM (best for enterprise)
+  - zero_log=True  — zero logging mode, max privacy
+  - Auto-register executor on init
+  - Auto-detect environment
+  - Auto-generate on-premise package
+  - HMAC verification of executor calls
+  - Tamper-proof audit verification
+  - Auto-signup on first use
+"""
+from __future__ import annotations
+import re
+import os
+import json
+import hmac
+import socket
+import hashlib
+import asyncio
+from pathlib import Path
+from typing import Any, Optional
+import httpx
+TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
+_DEFAULT_BASE   = "https://app.codeastra.dev"
+_ONPREM_DEFAULT = "http://localhost:4000"
+def _detect_environment() -> str:
+    env_mode = os.environ.get("CODEASTRA_MODE", "").lower()
+    if env_mode in ("cloud", "onprem", "hybrid"):
+        return env_mode
+    try:
+        s = socket.create_connection(("localhost", 4000), timeout=1)
+        s.close()
+        return "onprem"
+    except Exception:
+        pass
+    return "cloud"
+def _get_base_url(mode: str, base_url: str = None) -> str:
+    if base_url:
+        return base_url.rstrip("/")
+    if mode in ("onprem", "hybrid"):
+        return os.environ.get("CODEASTRA_ONPREM_URL", _ONPREM_DEFAULT)
+    return _DEFAULT_BASE
+class CodeAstraClient:
+    """
+    Full-featured Codeastra client.
+    Modes:
+        cloud   — default. Uses app.codeastra.dev
+        onprem  — local vault. Auto-generates deployment package on first use.
+        hybrid  — local vault + cloud LLM. Best for enterprise.
+    Usage:
+        # Cloud (default — zero config)
+        client = CodeAstraClient(api_key="sk-guard-xxx")
+        # On-premise (auto-generates docker-compose + setup.sh)
+        client = CodeAstraClient(api_key="sk-guard-xxx", mode="onprem")
+        # Hybrid (local vault, cloud LLM)
+        client = CodeAstraClient(api_key="sk-guard-xxx", mode="hybrid")
+        # Zero logging
+        client = CodeAstraClient(api_key="sk-guard-xxx", zero_log=True)
+        # With executor auto-registered
+        client = CodeAstraClient(api_key="sk-guard-xxx",
+                                  executor_url="https://your-app.com/execute")
+        # No API key — auto-signup
+        client = CodeAstraClient()
+    """
+    def __init__(
+        self,
+        api_key:      str   = None,
+        base_url:     str   = None,
+        agent_id:     str   = "sdk-agent",
+        timeout:      float = 10.0,
+        executor_url: str   = None,
+        mode:         str   = "auto",
+        zero_log:     bool  = False,
+        onprem_dir:   str   = "./codeastra-onprem",
+        verbose:      bool  = False,
+    ):
+        # Auto-signup if no API key
+        if not api_key:
+            api_key = os.environ.get("CODEASTRA_API_KEY")
+        if not api_key:
+            api_key = self._auto_signup()
+        # Auto-detect mode
+        if mode == "auto":
+            mode = _detect_environment()
+        self.api_key     = api_key
+        self.agent_id    = agent_id
+        self.mode        = mode
+        self.zero_log    = zero_log
+        self._verbose    = verbose
+        self._timeout    = timeout
+        self._onprem_dir = Path(onprem_dir)
+        self.base_url    = _get_base_url(mode, base_url)
+        self._headers = {
+            "X-API-Key":    api_key,
+            "Content-Type": "application/json",
+        }
+        if zero_log:
+            self._headers["X-Zero-Log"] = "true"
+        self._sync_client:  Optional[httpx.Client]      = None
+        self._async_client: Optional[httpx.AsyncClient] = None
+        if verbose:
+            print(f"[CodeAstra] mode={mode} base={self.base_url} zero_log={zero_log}")
+        # On-premise: auto-generate deployment package
+        if mode in ("onprem", "hybrid"):
+            self._setup_onprem(mode)
+        # Auto-register executor if provided
+        if executor_url:
+            self._executor_url = executor_url
+            try:
+                self._post("/agent/executor", {
+                    "execution_url": executor_url,
+                    "action_type":   "*",
+                    "agent_id":      agent_id,
+                    "description":   f"Auto-registered by SDK agent {agent_id} ({mode})",
+                })
+                if verbose:
+                    print(f"[CodeAstra] Executor auto-registered: {executor_url}")
+            except Exception as e:
+                if verbose:
+                    print(f"[CodeAstra] Executor registration skipped: {e}")
+    # ── Auto-signup ───────────────────────────────────────────────────────────
+    def _auto_signup(self) -> str:
+        """Auto-create account on first use. Saves key to ~/.codeastra/credentials."""
+        creds_path = Path.home() / ".codeastra" / "credentials"
+        if creds_path.exists():
+            try:
+                data = json.loads(creds_path.read_text())
+                key  = data.get("api_key")
+                if key:
+                    return key
+            except Exception:
+                pass
+        import uuid
+        email    = os.environ.get("CODEASTRA_EMAIL",    f"user-{uuid.uuid4().hex[:8]}@codeastra.local")
+        password = os.environ.get("CODEASTRA_PASSWORD", uuid.uuid4().hex)
+        name     = os.environ.get("CODEASTRA_NAME",     f"SDK User {uuid.uuid4().hex[:6]}")
+        try:
+            r = httpx.post(f"{_DEFAULT_BASE}/auth/signup", json={
+                "name": name, "email": email, "password": password,
+            }, timeout=10)
+            if r.is_success:
+                data    = r.json()
+                api_key = data.get("api_key")
+                if api_key:
+                    creds_path.parent.mkdir(parents=True, exist_ok=True)
+                    creds_path.write_text(json.dumps({
+                        "api_key": api_key, "email": email, "password": password,
+                    }))
+                    print(f"[CodeAstra] Account created. Key saved to {creds_path}")
+                    return api_key
+        except Exception:
+            pass
+        raise ValueError(
+            "No API key. Set CODEASTRA_API_KEY or pass api_key= "
+            "or sign up at https://app.codeastra.dev"
+        )
+    # ── On-premise setup ──────────────────────────────────────────────────────
+    def _setup_onprem(self, mode: str):
+        """Auto-generate on-premise deployment package if not already present."""
+        setup_sh = self._onprem_dir / "setup.sh"
+        if setup_sh.exists():
+            if self._verbose:
+                print(f"[CodeAstra] On-premise package at {self._onprem_dir}")
+            return
+        if self._verbose:
+            print(f"[CodeAstra] Generating on-premise package...")
+        try:
+            resp = self._post("/onprem/generate", {
+                "deployment_mode": "docker",
+                "llm_provider":    "ollama",
+                "llm_model":       "llama3",
+                "air_gapped":      mode != "hybrid",
+                "name":            f"codeastra-{self.agent_id}",
+            })
+            files = resp.get("files", {})
+            if files:
+                self._onprem_dir.mkdir(parents=True, exist_ok=True)
+                for filename, content in files.items():
+                    fpath = self._onprem_dir / filename
+                    fpath.write_text(content)
+                if setup_sh.exists():
+                    setup_sh.chmod(0o755)
+                print(f"\n[CodeAstra] On-premise package ready: {self._onprem_dir}")
+                print(f"  Run: cd {self._onprem_dir} && bash setup.sh\n")
+        except Exception as e:
+            if self._verbose:
+                print(f"[CodeAstra] On-premise setup warning: {e} — falling back to cloud")
+            self.base_url = _DEFAULT_BASE
+            self.mode     = "cloud"
+    # ── HMAC verification ─────────────────────────────────────────────────────
+    @staticmethod
+    def verify_executor_call(payload: str, signature: str, secret: str) -> bool:
+        """
+        Verify an incoming executor call is genuinely from Codeastra.
+        Use in your executor endpoint to reject forged requests.
+        Usage:
+            @app.post("/execute")
+            def execute(request):
+                if not CodeAstraClient.verify_executor_call(
+                    request.body, request.headers["X-Codeastra-Signature"], YOUR_SECRET
+                ):
+                    raise HTTPException(401)
+        """
+        expected = "sha256=" + hmac.new(
+            secret.encode(),
+            payload.encode() if isinstance(payload, str) else payload,
+            hashlib.sha256
+        ).hexdigest()
+        return hmac.compare_digest(expected, signature)
+    # ── Audit verification ────────────────────────────────────────────────────
+    def verify_audit(self) -> dict:
+        """Verify tamper-proof audit chain integrity."""
+        try:
+            return self._get("/audit/secure/verify")
+        except Exception as e:
+            return {"verified": False, "error": str(e)}
+    def export_audit(self, output_path: str = "audit_report.json") -> str:
+        """Export full compliance audit report."""
+        try:
+            data = self._get("/audit/secure/export")
+            Path(output_path).write_text(json.dumps(data, indent=2))
+            return output_path
+        except Exception as e:
+            return str(e)
+    # ── Zero-log mode ─────────────────────────────────────────────────────────
+    def set_zero_log(self, enabled: bool = True):
+        """Enable/disable zero-logging mode."""
+        self.zero_log = enabled
+        if enabled:
+            self._headers["X-Zero-Log"] = "true"
+        else:
+            self._headers.pop("X-Zero-Log", None)
+        self._sync_client  = None
+        self._async_client = None
+    # ── sync helpers ──────────────────────────────────────────────────────────
+    def _get_sync(self) -> httpx.Client:
+        if self._sync_client is None or self._sync_client.is_closed:
+            self._sync_client = httpx.Client(
+                headers=self._headers, timeout=self._timeout)
+        return self._sync_client
+    def _post(self, path: str, body: dict) -> dict:
+        r = self._get_sync().post(f"{self.base_url}{path}", json=body)
+        r.raise_for_status()
+        return r.json()
+    def _get(self, path: str, params: dict = None) -> dict:
+        r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
+        r.raise_for_status()
+        return r.json()
+    # ── async helpers ─────────────────────────────────────────────────────────
+    def _get_async(self) -> httpx.AsyncClient:
+        if self._async_client is None or self._async_client.is_closed:
+            self._async_client = httpx.AsyncClient(
+                headers=self._headers, timeout=self._timeout)
+        return self._async_client
+    async def _apost(self, path: str, body: dict) -> dict:
+        r = await self._get_async().post(f"{self.base_url}{path}", json=body)
+        r.raise_for_status()
+        return r.json()
+    async def _aget(self, path: str, params: dict = None) -> dict:
+        r = await self._get_async().get(
+            f"{self.base_url}{path}", params=params or {})
+        r.raise_for_status()
+        return r.json()
+    # ── public sync API ───────────────────────────────────────────────────────
+    def tokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
+        resp = self._post("/vault/store", {
+            "data": data, "agent_id": self.agent_id,
+            "classification": classification, "ttl_hours": ttl_hours,
+        })
+        return resp.get("tokens", {})
+    def execute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
+        body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
+        if pipeline_id:
+            body["pipeline_id"] = pipeline_id
+            return self._post("/pipeline/action", body)
+        return self._post("/agent/action", body)
+    def grant(self, receiving_agent: str, tokens: list, allowed_actions: list = [],
+              pipeline_id: str = None, purpose: str = None) -> dict:
+        return self._post("/vault/grant", {
+            "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
+            "tokens": tokens, "allowed_actions": allowed_actions,
+            "pipeline_id": pipeline_id, "purpose": purpose,
+        })
+    def audit(self, pipeline_id: str = None, token: str = None) -> list:
+        params = {}
+        if pipeline_id: params["pipeline_id"] = pipeline_id
+        if token:       params["token"]       = token
+        return self._get("/pipeline/audit", params).get("audit", [])
+    def stats(self) -> dict:
+        return self._get("/vault/stats")
+    # ── public async API ──────────────────────────────────────────────────────
+    async def atokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
+        resp = await self._apost("/vault/store", {
+            "data": data, "agent_id": self.agent_id,
+            "classification": classification, "ttl_hours": ttl_hours,
+        })
+        return resp.get("tokens", {})
+    async def aexecute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
+        body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
+        if pipeline_id:
+            body["pipeline_id"] = pipeline_id
+            return await self._apost("/pipeline/action", body)
+        return await self._apost("/agent/action", body)
+    async def agrant(self, receiving_agent: str, tokens: list,
+                     allowed_actions: list = [], pipeline_id: str = None) -> dict:
+        return await self._apost("/vault/grant", {
+            "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
+            "tokens": tokens, "allowed_actions": allowed_actions, "pipeline_id": pipeline_id,
+        })
+    # ── utility ───────────────────────────────────────────────────────────────
+    @staticmethod
+    def extract_tokens(obj: Any) -> list:
+        text = json.dumps(obj) if not isinstance(obj, str) else obj
+        return TOKEN_RE.findall(text)
+    @staticmethod
+    def contains_token(val: Any) -> bool:
+        text = json.dumps(val) if not isinstance(val, str) else str(val)
+        return bool(TOKEN_RE.search(text))
+    @staticmethod
+    def is_token(val: str) -> bool:
+        return bool(TOKEN_RE.fullmatch(val.strip()))
+    def info(self) -> dict:
+        return {
+            "mode":      self.mode,
+            "base_url":  self.base_url,
+            "agent_id":  self.agent_id,
+            "zero_log":  self.zero_log,
+        }
+    def close(self):
+        if self._sync_client:  self._sync_client.close()
+    async def aclose(self):
+        if self._async_client: await self._async_client.aclose()
+    def __enter__(self):  return self
+    def __exit__(self, *_): self.close()
+    async def __aenter__(self): return self
+    async def __aexit__(self, *_): await self.aclose()
+    def __repr__(self):
+        return f"CodeAstraClient(mode={self.mode!r}, agent_id={self.agent_id!r}, zero_log={self.zero_log})"

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra/middleware.py RENAMED Viewed

@@ -145,16 +145,29 @@ class BlindAgentMiddleware:
     def __init__(
         self,
         agent:          Any,
-        api_key:        str,
-        agent_id:       str = "sdk-agent",
-        base_url:       str = "https://app.codeastra.dev",
-        classification: str = "pii",
+        api_key:        str  = None,
+        agent_id:       str  = "sdk-agent",
+        base_url:       str  = None,
+        classification: str  = "pii",
         pipeline_id:    Optional[str] = None,
         on_tokenize:    Optional[Callable] = None,
         verbose:        bool = False,
+        mode:           str  = "auto",        # auto | cloud | onprem | hybrid
+        zero_log:       bool = False,
+        executor_url:   str  = None,
+        onprem_dir:     str  = "./codeastra-onprem",
     ):
         self._agent          = agent
-        self._client         = CodeAstraClient(api_key, base_url, agent_id)
+        self._client         = CodeAstraClient(
+            api_key      = api_key,
+            base_url     = base_url,
+            agent_id     = agent_id,
+            mode         = mode,
+            zero_log     = zero_log,
+            executor_url = executor_url,
+            onprem_dir   = onprem_dir,
+            verbose      = verbose,
+        )
         self._classification = classification
         self._pipeline_id    = pipeline_id
         self._on_tokenize    = on_tokenize
@@ -410,3 +423,291 @@ class BlindAgentMiddleware:
     def __exit__(self, *_): self.close()
     async def __aenter__(self): return self
     async def __aexit__(self, *_): await self.aclose()
+# ══════════════════════════════════════════════════════════════════════════════
+# INPUT SCANNER — scans prompt text for raw PII/PHI/PCI before agent sees it
+# OUTPUT SCANNER — scans agent response for any leaked real values
+# ══════════════════════════════════════════════════════════════════════════════
+import re as _re
+# Regex patterns for detecting raw sensitive data in free text
+_PATTERNS = {
+    # SSN: 123-45-6789 or 123456789
+    "ssn": _re.compile(
+        r'\b(?!000|666|9\d{2})\d{3}[-\s]?(?!00)\d{2}[-\s]?(?!0000)\d{4}\b'
+    ),
+    # Credit card: 13-19 digits, passes Luhn
+    "credit_card": _re.compile(
+        r'\b(?:4[0-9]{12}(?:[0-9]{3})?'       # Visa
+        r'|5[1-5][0-9]{14}'                    # Mastercard
+        r'|3[47][0-9]{13}'                     # Amex
+        r'|6(?:011|5[0-9]{2})[0-9]{12}'       # Discover
+        r'|(?:2131|1800|35\d{3})\d{11})\b'    # JCB
+    ),
+    # Email
+    "email": _re.compile(
+        r'\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b'
+    ),
+    # Phone: various formats
+    "phone": _re.compile(
+        r'\b(?:\+?1[-.\s]?)?'
+        r'(?:\(?\d{3}\)?[-.\s]?)'
+        r'\d{3}[-.\s]?\d{4}\b'
+    ),
+    # DOB: MM/DD/YYYY or YYYY-MM-DD
+    "dob": _re.compile(
+        r'\b(?:0[1-9]|1[0-2])[\/\-](?:0[1-9]|[12]\d|3[01])[\/\-](?:19|20)\d{2}\b'
+        r'|\b(?:19|20)\d{2}[\/\-](?:0[1-9]|1[0-2])[\/\-](?:0[1-9]|[12]\d|3[01])\b'
+    ),
+    # MRN: MRN- or MRN: followed by digits
+    "mrn": _re.compile(
+        r'\bMRN[-:\s]*\s*[A-Z0-9]{4,12}\b', _re.IGNORECASE
+    ),
+    # IP address
+    "ip_address": _re.compile(
+        r'\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b'
+    ),
+}
+def _luhn_check(number: str) -> bool:
+    """Validate credit card number with Luhn algorithm."""
+    digits = [int(d) for d in number if d.isdigit()]
+    if len(digits) < 13:
+        return False
+    total = 0
+    for i, d in enumerate(reversed(digits)):
+        if i % 2 == 1:
+            d *= 2
+            if d > 9:
+                d -= 9
+        total += d
+    return total % 10 == 0
+def _scan_text_for_pii(text: str) -> dict:
+    """
+    Scan free text for raw PII/PHI/PCI patterns.
+    Returns {synthetic_field_key: matched_value} for tokenization.
+    Example:
+        "Patient John Smith SSN 123-45-6789 email john@hospital.org"
+        → {"ssn_0": "123-45-6789", "email_0": "john@hospital.org"}
+    """
+    found = {}
+    if not isinstance(text, str):
+        return found
+    for field, pattern in _PATTERNS.items():
+        matches = pattern.findall(text)
+        for i, match in enumerate(matches):
+            val = match.strip() if isinstance(match, str) else match[0].strip()
+            if not val or TOKEN_RE.search(val):
+                continue
+            # Extra validation for credit cards
+            if field == "credit_card":
+                digits_only = _re.sub(r'\D', '', val)
+                if not _luhn_check(digits_only):
+                    continue
+            key = f"{field}_{i}" if i > 0 else field
+            found[key] = val
+    return found
+def _scan_obj_for_pii(obj: Any) -> dict:
+    """Scan any object (str, dict, list) for raw PII in free text."""
+    if isinstance(obj, str):
+        return _scan_text_for_pii(obj)
+    elif isinstance(obj, dict):
+        combined = {}
+        for v in obj.values():
+            combined.update(_scan_obj_for_pii(v))
+        return combined
+    elif isinstance(obj, list):
+        combined = {}
+        for item in obj:
+            combined.update(_scan_obj_for_pii(item))
+        return combined
+    return {}
+def _blind_text(text: str, token_map: dict) -> str:
+    """Replace all known real values in text with their tokens."""
+    if not isinstance(text, str):
+        return text
+    for real, token in token_map.items():
+        if real and real in text:
+            text = text.replace(real, token)
+    return text
+def _blind_any(obj: Any, token_map: dict) -> Any:
+    """Replace real values anywhere in obj with tokens."""
+    if isinstance(obj, str):
+        return _blind_text(obj, token_map)
+    elif isinstance(obj, dict):
+        return {k: _blind_any(v, token_map) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [_blind_any(i, token_map) for i in obj]
+    return obj
+# ── Patch BlindAgentMiddleware with input + output scanning ──────────────────
+_orig_run    = BlindAgentMiddleware.run
+_orig_invoke = BlindAgentMiddleware.invoke
+_orig_chat   = BlindAgentMiddleware.chat
+_orig_arun   = BlindAgentMiddleware.arun
+_orig_ainvoke = BlindAgentMiddleware.ainvoke
+def _scan_and_blind_input(self, *args, **kwargs):
+    """
+    Scan all input args/kwargs for raw PII/PHI/PCI.
+    Tokenize any found values before passing to the agent.
+    Returns (new_args, new_kwargs).
+    """
+    # Collect all text from args and kwargs
+    all_text = json.dumps(list(args)) + json.dumps(kwargs)
+    raw_pii  = _scan_obj_for_pii(all_text)
+    if not raw_pii:
+        return args, kwargs
+    # Tokenize detected values
+    try:
+        classification = _classify(set(k.split("_")[0] for k in raw_pii))
+        minted = self._client.tokenize(raw_pii, classification=classification)
+        # Build replacement map: {real_value: token}
+        for field, token in minted.items():
+            real_val = raw_pii.get(field)
+            if real_val:
+                self._value_to_token[real_val] = token
+            self._session_tokens[field] = token
+        if self._verbose:
+            print(f"[CodeAstra] Input scan: tokenized {len(minted)} value(s) in prompt: {list(minted.keys())}")
+        # Replace real values in args and kwargs
+        new_args   = tuple(_blind_any(a, self._value_to_token) for a in args)
+        new_kwargs = {k: _blind_any(v, self._value_to_token) for k, v in kwargs.items()}
+        return new_args, new_kwargs
+    except Exception as e:
+        if self._verbose:
+            print(f"[CodeAstra] Input scan warning: {e}")
+        return args, kwargs
+def _scan_output(self, result: Any) -> Any:
+    """
+    Scan agent output for any real values that leaked through.
+    Replace with tokens using session's value_to_token map.
+    Also scan output text for any NEW raw PII not yet tokenized.
+    """
+    # Step 1: replace known real values with existing tokens
+    if self._value_to_token:
+        result = _blind_any(result, self._value_to_token)
+    # Step 2: scan output for any new raw PII that leaked
+    new_pii = _scan_obj_for_pii(result)
+    if new_pii:
+        try:
+            classification = _classify(set(k.split("_")[0] for k in new_pii))
+            minted = self._client.tokenize(new_pii, classification=classification)
+            for field, token in minted.items():
+                real_val = new_pii.get(field)
+                if real_val:
+                    self._value_to_token[real_val] = token
+                self._session_tokens[field] = token
+            result = _blind_any(result, self._value_to_token)
+            if self._verbose:
+                print(f"[CodeAstra] Output gate: caught {len(minted)} leaked value(s): {list(minted.keys())}")
+        except Exception as e:
+            if self._verbose:
+                print(f"[CodeAstra] Output gate warning: {e}")
+    return result
+async def _ascan_output(self, result: Any) -> Any:
+    """Async version of _scan_output."""
+    if self._value_to_token:
+        result = _blind_any(result, self._value_to_token)
+    new_pii = _scan_obj_for_pii(result)
+    if new_pii:
+        try:
+            classification = _classify(set(k.split("_")[0] for k in new_pii))
+            minted = await self._client.atokenize(new_pii, classification=classification)
+            for field, token in minted.items():
+                real_val = new_pii.get(field)
+                if real_val:
+                    self._value_to_token[real_val] = token
+                self._session_tokens[field] = token
+            result = _blind_any(result, self._value_to_token)
+            if self._verbose:
+                print(f"[CodeAstra] Output gate (async): caught {len(minted)} leaked value(s)")
+        except Exception as e:
+            if self._verbose:
+                print(f"[CodeAstra] Output gate warning: {e}")
+    return result
+# ── Monkey-patch all proxy methods with input + output scanning ───────────────
+def _patched_run(self, *args, **kwargs):
+    args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
+    result = self._agent.run(*args, **kwargs)
+    result = self._blind_output(result)       # tool output scan (existing)
+    return _scan_output(self, result)         # output gate scan (new)
+def _patched_invoke(self, *args, **kwargs):
+    args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
+    result = self._agent.invoke(*args, **kwargs)
+    if isinstance(result, dict) and "output" in result:
+        result["output"] = self._blind_output(result["output"])
+        result["output"] = _scan_output(self, result["output"])
+        return result
+    result = self._blind_output(result)
+    return _scan_output(self, result)
+def _patched_chat(self, *args, **kwargs):
+    args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
+    result = self._agent.chat(*args, **kwargs)
+    result = self._blind_output(result)
+    return _scan_output(self, result)
+async def _patched_arun(self, *args, **kwargs):
+    args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
+    result = await self._agent.arun(*args, **kwargs)
+    result = await self._ablind_output(result)
+    return await _ascan_output(self, result)
+async def _patched_ainvoke(self, *args, **kwargs):
+    args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
+    result = await self._agent.ainvoke(*args, **kwargs)
+    if isinstance(result, dict) and "output" in result:
+        result["output"] = await self._ablind_output(result["output"])
+        result["output"] = await _ascan_output(self, result["output"])
+        return result
+    result = await self._ablind_output(result)
+    return await _ascan_output(self, result)
+# Apply patches
+BlindAgentMiddleware.run              = _patched_run
+BlindAgentMiddleware.invoke           = _patched_invoke
+BlindAgentMiddleware.chat             = _patched_chat
+BlindAgentMiddleware.arun             = _patched_arun
+BlindAgentMiddleware.ainvoke          = _patched_ainvoke
+# Expose scanner functions for direct use
+BlindAgentMiddleware._scan_input      = _scan_and_blind_input
+BlindAgentMiddleware._scan_output     = _scan_output
+BlindAgentMiddleware.scan_text        = staticmethod(_scan_text_for_pii)

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra.egg-info/PKG-INFO RENAMED Viewed

@@ -1,15 +1,14 @@
 Metadata-Version: 2.4
 Name: codeastra
-Version: 1.0.0
+Version: 1.1.0
 Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
-License: MIT
+License-Expression: MIT
 Project-URL: Homepage, https://codeastra.dev
 Project-URL: Documentation, https://docs.codeastra.dev
 Project-URL: Repository, https://github.com/codeastra/codeastra-python
 Keywords: ai,agents,langchain,crewai,privacy,hipaa,security,tokenization
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11

{codeastra-1.0.0 → codeastra-1.1.0}/pyproject.toml RENAMED Viewed

@@ -4,16 +4,15 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "codeastra"
-version = "1.0.0"
+version = "1.1.0"
 description = "Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data."
 readme = "README.md"
-license = {text = "MIT"}
+license = "MIT"
 requires-python = ">=3.10"
 keywords = ["ai", "agents", "langchain", "crewai", "privacy", "hipaa", "security", "tokenization"]
 classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",

codeastra-1.0.0/codeastra/client.py DELETED Viewed

@@ -1,239 +0,0 @@
-"""
-CodeAstraClient — low-level async/sync HTTP client for the Codeastra API.
-All SDK components use this. Customers can also use it directly.
-"""
-from __future__ import annotations
-import re
-import json
-import asyncio
-import threading
-from typing import Any, Optional
-import httpx
-TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
-_DEFAULT_BASE = "https://app.codeastra.dev"
-class CodeAstraClient:
-    """
-    Thin wrapper around the Codeastra REST API.
-    Usage:
-        client = CodeAstraClient(api_key="sk-guard-xxx")
-        tokens = client.tokenize({"name": "John Smith", "ssn": "123-45-6789"})
-        # → {"name": "[CVT:NAME:A1B2]", "ssn": "[CVT:SSN:C3D4]"}
-    """
-    def __init__(
-        self,
-        api_key:   str,
-        base_url:  str = _DEFAULT_BASE,
-        agent_id:  str = "sdk-agent",
-        timeout:   float = 10.0,
-        executor_url: str = None,   # optional: bring your own executor
-    ):
-        self.api_key  = api_key
-        self.base_url = base_url.rstrip("/")
-        self.agent_id = agent_id
-        self._headers = {
-            "X-API-Key":    api_key,
-            "Content-Type": "application/json",
-        }
-        self._timeout = timeout
-        self._executor_url = executor_url
-        # Sync client (lazy)
-        self._sync_client: Optional[httpx.Client] = None
-        # Async client (lazy)
-        self._async_client: Optional[httpx.AsyncClient] = None
-        # Auto-register executor if provided
-        if executor_url:
-            try:
-                self._post("/agent/executor", {
-                    "execution_url": executor_url,
-                    "action_type": "*",
-                    "agent_id": agent_id,
-                    "description": f"Auto-registered by SDK agent {agent_id}",
-                })
-            except Exception:
-                pass  # non-fatal — zero-config mode still works
-    # ── sync helpers ──────────────────────────────────────────────────────────
-    def _get_sync(self) -> httpx.Client:
-        if self._sync_client is None or self._sync_client.is_closed:
-            self._sync_client = httpx.Client(
-                headers=self._headers, timeout=self._timeout)
-        return self._sync_client
-    def _post(self, path: str, body: dict) -> dict:
-        r = self._get_sync().post(f"{self.base_url}{path}", json=body)
-        r.raise_for_status()
-        return r.json()
-    def _get(self, path: str, params: dict = None) -> dict:
-        r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
-        r.raise_for_status()
-        return r.json()
-    # ── async helpers ─────────────────────────────────────────────────────────
-    def _get_async(self) -> httpx.AsyncClient:
-        if self._async_client is None or self._async_client.is_closed:
-            self._async_client = httpx.AsyncClient(
-                headers=self._headers, timeout=self._timeout)
-        return self._async_client
-    async def _apost(self, path: str, body: dict) -> dict:
-        r = await self._get_async().post(f"{self.base_url}{path}", json=body)
-        r.raise_for_status()
-        return r.json()
-    async def _aget(self, path: str, params: dict = None) -> dict:
-        r = await self._get_async().get(
-            f"{self.base_url}{path}", params=params or {})
-        r.raise_for_status()
-        return r.json()
-    # ── public sync API ───────────────────────────────────────────────────────
-    def tokenize(
-        self,
-        data:           dict,
-        classification: str = "pii",
-        ttl_hours:      int = 24,
-    ) -> dict:
-        """
-        Store real data in vault. Returns token map.
-        {"name": "John"} → {"name": "[CVT:NAME:A1B2]"}
-        """
-        resp = self._post("/vault/store", {
-            "data":           data,
-            "agent_id":       self.agent_id,
-            "classification": classification,
-            "ttl_hours":      ttl_hours,
-        })
-        return resp.get("tokens", {})
-    def execute(
-        self,
-        action_type: str,
-        params:      dict,
-        pipeline_id: str = None,
-    ) -> dict:
-        """
-        Submit an action with token params.
-        Codeastra resolves tokens → real values → POSTs to your executor.
-        Agent never sees real values.
-        """
-        body = {
-            "agent_id":    self.agent_id,
-            "action_type": action_type,
-            "params":      params,
-        }
-        if pipeline_id:
-            body["pipeline_id"] = pipeline_id
-            return self._post("/pipeline/action", body)
-        return self._post("/agent/action", body)
-    def grant(
-        self,
-        receiving_agent: str,
-        tokens:          list[str],
-        allowed_actions: list[str] = [],
-        pipeline_id:     str = None,
-        purpose:         str = None,
-    ) -> dict:
-        """Grant tokens to another agent in a pipeline."""
-        return self._post("/vault/grant", {
-            "granting_agent":  self.agent_id,
-            "receiving_agent": receiving_agent,
-            "tokens":          tokens,
-            "allowed_actions": allowed_actions,
-            "pipeline_id":     pipeline_id,
-            "purpose":         purpose,
-        })
-    def audit(self, pipeline_id: str = None, token: str = None) -> list:
-        """Get chain of custody for a pipeline or token."""
-        params = {}
-        if pipeline_id: params["pipeline_id"] = pipeline_id
-        if token:       params["token"]       = token
-        return self._get("/pipeline/audit", params).get("audit", [])
-    # ── public async API ──────────────────────────────────────────────────────
-    async def atokenize(
-        self,
-        data:           dict,
-        classification: str = "pii",
-        ttl_hours:      int = 24,
-    ) -> dict:
-        resp = await self._apost("/vault/store", {
-            "data":           data,
-            "agent_id":       self.agent_id,
-            "classification": classification,
-            "ttl_hours":      ttl_hours,
-        })
-        return resp.get("tokens", {})
-    async def aexecute(
-        self,
-        action_type: str,
-        params:      dict,
-        pipeline_id: str = None,
-    ) -> dict:
-        body = {
-            "agent_id":    self.agent_id,
-            "action_type": action_type,
-            "params":      params,
-        }
-        if pipeline_id:
-            body["pipeline_id"] = pipeline_id
-            return await self._apost("/pipeline/action", body)
-        return await self._apost("/agent/action", body)
-    async def agrant(
-        self,
-        receiving_agent: str,
-        tokens:          list[str],
-        allowed_actions: list[str] = [],
-        pipeline_id:     str = None,
-    ) -> dict:
-        return await self._apost("/vault/grant", {
-            "granting_agent":  self.agent_id,
-            "receiving_agent": receiving_agent,
-            "tokens":          tokens,
-            "allowed_actions": allowed_actions,
-            "pipeline_id":     pipeline_id,
-        })
-    # ── utility ───────────────────────────────────────────────────────────────
-    @staticmethod
-    def extract_tokens(obj: Any) -> list[str]:
-        """Extract all vault tokens from any string/dict/list."""
-        text = json.dumps(obj) if not isinstance(obj, str) else obj
-        return TOKEN_RE.findall(text)
-    @staticmethod
-    def contains_token(val: Any) -> bool:
-        text = json.dumps(val) if not isinstance(val, str) else str(val)
-        return bool(TOKEN_RE.search(text))
-    @staticmethod
-    def is_token(val: str) -> bool:
-        return bool(TOKEN_RE.fullmatch(val.strip()))
-    def close(self):
-        if self._sync_client:  self._sync_client.close()
-    async def aclose(self):
-        if self._async_client: await self._async_client.aclose()
-    def __enter__(self):  return self
-    def __exit__(self, *_): self.close()
-    async def __aenter__(self): return self
-    async def __aexit__(self, *_): await self.aclose()

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra/__init__.py RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra/wrappers.py RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra.egg-info/requires.txt RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/codeastra.egg-info/top_level.txt RENAMED Viewed

File without changes

{codeastra-1.0.0 → codeastra-1.1.0}/setup.cfg RENAMED Viewed

File without changes

codeastra 1.0.0__tar.gz → 1.1.0__tar.gz

codeastra 1.0.0tar.gz → 1.1.0tar.gz