codeastra 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeastra
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
5
- License: MIT
5
+ License-Expression: MIT
6
6
  Project-URL: Homepage, https://codeastra.dev
7
7
  Project-URL: Documentation, https://docs.codeastra.dev
8
8
  Project-URL: Repository, https://github.com/codeastra/codeastra-python
9
9
  Keywords: ai,agents,langchain,crewai,privacy,hipaa,security,tokenization
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: MIT License
13
12
  Classifier: Programming Language :: Python :: 3
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
@@ -0,0 +1,414 @@
1
+ """
2
+ CodeAstraClient — full-featured async/sync HTTP client for the Codeastra API.
3
+
4
+ New in v1.1.0:
5
+ - mode="cloud" — default, uses app.codeastra.dev
6
+ - mode="onprem" — pulls deployment package, runs vault locally
7
+ - mode="hybrid" — local vault + cloud LLM (best for enterprise)
8
+ - zero_log=True — zero logging mode, max privacy
9
+ - Auto-register executor on init
10
+ - Auto-detect environment
11
+ - Auto-generate on-premise package
12
+ - HMAC verification of executor calls
13
+ - Tamper-proof audit verification
14
+ - Auto-signup on first use
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ import os
20
+ import json
21
+ import hmac
22
+ import socket
23
+ import hashlib
24
+ import asyncio
25
+ from pathlib import Path
26
+ from typing import Any, Optional
27
+
28
+ import httpx
29
+
30
+ TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
31
+
32
+ _DEFAULT_BASE = "https://app.codeastra.dev"
33
+ _ONPREM_DEFAULT = "http://localhost:4000"
34
+
35
+
36
+ def _detect_environment() -> str:
37
+ env_mode = os.environ.get("CODEASTRA_MODE", "").lower()
38
+ if env_mode in ("cloud", "onprem", "hybrid"):
39
+ return env_mode
40
+ try:
41
+ s = socket.create_connection(("localhost", 4000), timeout=1)
42
+ s.close()
43
+ return "onprem"
44
+ except Exception:
45
+ pass
46
+ return "cloud"
47
+
48
+
49
+ def _get_base_url(mode: str, base_url: str = None) -> str:
50
+ if base_url:
51
+ return base_url.rstrip("/")
52
+ if mode in ("onprem", "hybrid"):
53
+ return os.environ.get("CODEASTRA_ONPREM_URL", _ONPREM_DEFAULT)
54
+ return _DEFAULT_BASE
55
+
56
+
57
+ class CodeAstraClient:
58
+ """
59
+ Full-featured Codeastra client.
60
+
61
+ Modes:
62
+ cloud — default. Uses app.codeastra.dev
63
+ onprem — local vault. Auto-generates deployment package on first use.
64
+ hybrid — local vault + cloud LLM. Best for enterprise.
65
+
66
+ Usage:
67
+ # Cloud (default — zero config)
68
+ client = CodeAstraClient(api_key="sk-guard-xxx")
69
+
70
+ # On-premise (auto-generates docker-compose + setup.sh)
71
+ client = CodeAstraClient(api_key="sk-guard-xxx", mode="onprem")
72
+
73
+ # Hybrid (local vault, cloud LLM)
74
+ client = CodeAstraClient(api_key="sk-guard-xxx", mode="hybrid")
75
+
76
+ # Zero logging
77
+ client = CodeAstraClient(api_key="sk-guard-xxx", zero_log=True)
78
+
79
+ # With executor auto-registered
80
+ client = CodeAstraClient(api_key="sk-guard-xxx",
81
+ executor_url="https://your-app.com/execute")
82
+
83
+ # No API key — auto-signup
84
+ client = CodeAstraClient()
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ api_key: str = None,
90
+ base_url: str = None,
91
+ agent_id: str = "sdk-agent",
92
+ timeout: float = 10.0,
93
+ executor_url: str = None,
94
+ mode: str = "auto",
95
+ zero_log: bool = False,
96
+ onprem_dir: str = "./codeastra-onprem",
97
+ verbose: bool = False,
98
+ ):
99
+ # Auto-signup if no API key
100
+ if not api_key:
101
+ api_key = os.environ.get("CODEASTRA_API_KEY")
102
+ if not api_key:
103
+ api_key = self._auto_signup()
104
+
105
+ # Auto-detect mode
106
+ if mode == "auto":
107
+ mode = _detect_environment()
108
+
109
+ self.api_key = api_key
110
+ self.agent_id = agent_id
111
+ self.mode = mode
112
+ self.zero_log = zero_log
113
+ self._verbose = verbose
114
+ self._timeout = timeout
115
+ self._onprem_dir = Path(onprem_dir)
116
+ self.base_url = _get_base_url(mode, base_url)
117
+
118
+ self._headers = {
119
+ "X-API-Key": api_key,
120
+ "Content-Type": "application/json",
121
+ }
122
+ if zero_log:
123
+ self._headers["X-Zero-Log"] = "true"
124
+
125
+ self._sync_client: Optional[httpx.Client] = None
126
+ self._async_client: Optional[httpx.AsyncClient] = None
127
+
128
+ if verbose:
129
+ print(f"[CodeAstra] mode={mode} base={self.base_url} zero_log={zero_log}")
130
+
131
+ # On-premise: auto-generate deployment package
132
+ if mode in ("onprem", "hybrid"):
133
+ self._setup_onprem(mode)
134
+
135
+ # Auto-register executor if provided
136
+ if executor_url:
137
+ self._executor_url = executor_url
138
+ try:
139
+ self._post("/agent/executor", {
140
+ "execution_url": executor_url,
141
+ "action_type": "*",
142
+ "agent_id": agent_id,
143
+ "description": f"Auto-registered by SDK agent {agent_id} ({mode})",
144
+ })
145
+ if verbose:
146
+ print(f"[CodeAstra] Executor auto-registered: {executor_url}")
147
+ except Exception as e:
148
+ if verbose:
149
+ print(f"[CodeAstra] Executor registration skipped: {e}")
150
+
151
+ # ── Auto-signup ───────────────────────────────────────────────────────────
152
+
153
+ def _auto_signup(self) -> str:
154
+ """Auto-create account on first use. Saves key to ~/.codeastra/credentials."""
155
+ creds_path = Path.home() / ".codeastra" / "credentials"
156
+
157
+ if creds_path.exists():
158
+ try:
159
+ data = json.loads(creds_path.read_text())
160
+ key = data.get("api_key")
161
+ if key:
162
+ return key
163
+ except Exception:
164
+ pass
165
+
166
+ import uuid
167
+ email = os.environ.get("CODEASTRA_EMAIL", f"user-{uuid.uuid4().hex[:8]}@codeastra.local")
168
+ password = os.environ.get("CODEASTRA_PASSWORD", uuid.uuid4().hex)
169
+ name = os.environ.get("CODEASTRA_NAME", f"SDK User {uuid.uuid4().hex[:6]}")
170
+
171
+ try:
172
+ r = httpx.post(f"{_DEFAULT_BASE}/auth/signup", json={
173
+ "name": name, "email": email, "password": password,
174
+ }, timeout=10)
175
+ if r.is_success:
176
+ data = r.json()
177
+ api_key = data.get("api_key")
178
+ if api_key:
179
+ creds_path.parent.mkdir(parents=True, exist_ok=True)
180
+ creds_path.write_text(json.dumps({
181
+ "api_key": api_key, "email": email, "password": password,
182
+ }))
183
+ print(f"[CodeAstra] Account created. Key saved to {creds_path}")
184
+ return api_key
185
+ except Exception:
186
+ pass
187
+
188
+ raise ValueError(
189
+ "No API key. Set CODEASTRA_API_KEY or pass api_key= "
190
+ "or sign up at https://app.codeastra.dev"
191
+ )
192
+
193
+ # ── On-premise setup ──────────────────────────────────────────────────────
194
+
195
+ def _setup_onprem(self, mode: str):
196
+ """Auto-generate on-premise deployment package if not already present."""
197
+ setup_sh = self._onprem_dir / "setup.sh"
198
+ if setup_sh.exists():
199
+ if self._verbose:
200
+ print(f"[CodeAstra] On-premise package at {self._onprem_dir}")
201
+ return
202
+
203
+ if self._verbose:
204
+ print(f"[CodeAstra] Generating on-premise package...")
205
+
206
+ try:
207
+ resp = self._post("/onprem/generate", {
208
+ "deployment_mode": "docker",
209
+ "llm_provider": "ollama",
210
+ "llm_model": "llama3",
211
+ "air_gapped": mode != "hybrid",
212
+ "name": f"codeastra-{self.agent_id}",
213
+ })
214
+
215
+ files = resp.get("files", {})
216
+ if files:
217
+ self._onprem_dir.mkdir(parents=True, exist_ok=True)
218
+ for filename, content in files.items():
219
+ fpath = self._onprem_dir / filename
220
+ fpath.write_text(content)
221
+ if setup_sh.exists():
222
+ setup_sh.chmod(0o755)
223
+ print(f"\n[CodeAstra] On-premise package ready: {self._onprem_dir}")
224
+ print(f" Run: cd {self._onprem_dir} && bash setup.sh\n")
225
+
226
+ except Exception as e:
227
+ if self._verbose:
228
+ print(f"[CodeAstra] On-premise setup warning: {e} — falling back to cloud")
229
+ self.base_url = _DEFAULT_BASE
230
+ self.mode = "cloud"
231
+
232
+ # ── HMAC verification ─────────────────────────────────────────────────────
233
+
234
+ @staticmethod
235
+ def verify_executor_call(payload: str, signature: str, secret: str) -> bool:
236
+ """
237
+ Verify an incoming executor call is genuinely from Codeastra.
238
+ Use in your executor endpoint to reject forged requests.
239
+
240
+ Usage:
241
+ @app.post("/execute")
242
+ def execute(request):
243
+ if not CodeAstraClient.verify_executor_call(
244
+ request.body, request.headers["X-Codeastra-Signature"], YOUR_SECRET
245
+ ):
246
+ raise HTTPException(401)
247
+ """
248
+ expected = "sha256=" + hmac.new(
249
+ secret.encode(),
250
+ payload.encode() if isinstance(payload, str) else payload,
251
+ hashlib.sha256
252
+ ).hexdigest()
253
+ return hmac.compare_digest(expected, signature)
254
+
255
+ # ── Audit verification ────────────────────────────────────────────────────
256
+
257
+ def verify_audit(self) -> dict:
258
+ """Verify tamper-proof audit chain integrity."""
259
+ try:
260
+ return self._get("/audit/secure/verify")
261
+ except Exception as e:
262
+ return {"verified": False, "error": str(e)}
263
+
264
+ def export_audit(self, output_path: str = "audit_report.json") -> str:
265
+ """Export full compliance audit report."""
266
+ try:
267
+ data = self._get("/audit/secure/export")
268
+ Path(output_path).write_text(json.dumps(data, indent=2))
269
+ return output_path
270
+ except Exception as e:
271
+ return str(e)
272
+
273
+ # ── Zero-log mode ─────────────────────────────────────────────────────────
274
+
275
+ def set_zero_log(self, enabled: bool = True):
276
+ """Enable/disable zero-logging mode."""
277
+ self.zero_log = enabled
278
+ if enabled:
279
+ self._headers["X-Zero-Log"] = "true"
280
+ else:
281
+ self._headers.pop("X-Zero-Log", None)
282
+ self._sync_client = None
283
+ self._async_client = None
284
+
285
+ # ── sync helpers ──────────────────────────────────────────────────────────
286
+
287
+ def _get_sync(self) -> httpx.Client:
288
+ if self._sync_client is None or self._sync_client.is_closed:
289
+ self._sync_client = httpx.Client(
290
+ headers=self._headers, timeout=self._timeout)
291
+ return self._sync_client
292
+
293
+ def _post(self, path: str, body: dict) -> dict:
294
+ r = self._get_sync().post(f"{self.base_url}{path}", json=body)
295
+ r.raise_for_status()
296
+ return r.json()
297
+
298
+ def _get(self, path: str, params: dict = None) -> dict:
299
+ r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
300
+ r.raise_for_status()
301
+ return r.json()
302
+
303
+ # ── async helpers ─────────────────────────────────────────────────────────
304
+
305
+ def _get_async(self) -> httpx.AsyncClient:
306
+ if self._async_client is None or self._async_client.is_closed:
307
+ self._async_client = httpx.AsyncClient(
308
+ headers=self._headers, timeout=self._timeout)
309
+ return self._async_client
310
+
311
+ async def _apost(self, path: str, body: dict) -> dict:
312
+ r = await self._get_async().post(f"{self.base_url}{path}", json=body)
313
+ r.raise_for_status()
314
+ return r.json()
315
+
316
+ async def _aget(self, path: str, params: dict = None) -> dict:
317
+ r = await self._get_async().get(
318
+ f"{self.base_url}{path}", params=params or {})
319
+ r.raise_for_status()
320
+ return r.json()
321
+
322
+ # ── public sync API ───────────────────────────────────────────────────────
323
+
324
+ def tokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
325
+ resp = self._post("/vault/store", {
326
+ "data": data, "agent_id": self.agent_id,
327
+ "classification": classification, "ttl_hours": ttl_hours,
328
+ })
329
+ return resp.get("tokens", {})
330
+
331
+ def execute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
332
+ body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
333
+ if pipeline_id:
334
+ body["pipeline_id"] = pipeline_id
335
+ return self._post("/pipeline/action", body)
336
+ return self._post("/agent/action", body)
337
+
338
+ def grant(self, receiving_agent: str, tokens: list, allowed_actions: list = [],
339
+ pipeline_id: str = None, purpose: str = None) -> dict:
340
+ return self._post("/vault/grant", {
341
+ "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
342
+ "tokens": tokens, "allowed_actions": allowed_actions,
343
+ "pipeline_id": pipeline_id, "purpose": purpose,
344
+ })
345
+
346
+ def audit(self, pipeline_id: str = None, token: str = None) -> list:
347
+ params = {}
348
+ if pipeline_id: params["pipeline_id"] = pipeline_id
349
+ if token: params["token"] = token
350
+ return self._get("/pipeline/audit", params).get("audit", [])
351
+
352
+ def stats(self) -> dict:
353
+ return self._get("/vault/stats")
354
+
355
+ # ── public async API ──────────────────────────────────────────────────────
356
+
357
+ async def atokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
358
+ resp = await self._apost("/vault/store", {
359
+ "data": data, "agent_id": self.agent_id,
360
+ "classification": classification, "ttl_hours": ttl_hours,
361
+ })
362
+ return resp.get("tokens", {})
363
+
364
+ async def aexecute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
365
+ body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
366
+ if pipeline_id:
367
+ body["pipeline_id"] = pipeline_id
368
+ return await self._apost("/pipeline/action", body)
369
+ return await self._apost("/agent/action", body)
370
+
371
+ async def agrant(self, receiving_agent: str, tokens: list,
372
+ allowed_actions: list = [], pipeline_id: str = None) -> dict:
373
+ return await self._apost("/vault/grant", {
374
+ "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
375
+ "tokens": tokens, "allowed_actions": allowed_actions, "pipeline_id": pipeline_id,
376
+ })
377
+
378
+ # ── utility ───────────────────────────────────────────────────────────────
379
+
380
+ @staticmethod
381
+ def extract_tokens(obj: Any) -> list:
382
+ text = json.dumps(obj) if not isinstance(obj, str) else obj
383
+ return TOKEN_RE.findall(text)
384
+
385
+ @staticmethod
386
+ def contains_token(val: Any) -> bool:
387
+ text = json.dumps(val) if not isinstance(val, str) else str(val)
388
+ return bool(TOKEN_RE.search(text))
389
+
390
+ @staticmethod
391
+ def is_token(val: str) -> bool:
392
+ return bool(TOKEN_RE.fullmatch(val.strip()))
393
+
394
+ def info(self) -> dict:
395
+ return {
396
+ "mode": self.mode,
397
+ "base_url": self.base_url,
398
+ "agent_id": self.agent_id,
399
+ "zero_log": self.zero_log,
400
+ }
401
+
402
+ def close(self):
403
+ if self._sync_client: self._sync_client.close()
404
+
405
+ async def aclose(self):
406
+ if self._async_client: await self._async_client.aclose()
407
+
408
+ def __enter__(self): return self
409
+ def __exit__(self, *_): self.close()
410
+ async def __aenter__(self): return self
411
+ async def __aexit__(self, *_): await self.aclose()
412
+
413
+ def __repr__(self):
414
+ return f"CodeAstraClient(mode={self.mode!r}, agent_id={self.agent_id!r}, zero_log={self.zero_log})"
@@ -145,16 +145,29 @@ class BlindAgentMiddleware:
145
145
  def __init__(
146
146
  self,
147
147
  agent: Any,
148
- api_key: str,
149
- agent_id: str = "sdk-agent",
150
- base_url: str = "https://app.codeastra.dev",
151
- classification: str = "pii",
148
+ api_key: str = None,
149
+ agent_id: str = "sdk-agent",
150
+ base_url: str = None,
151
+ classification: str = "pii",
152
152
  pipeline_id: Optional[str] = None,
153
153
  on_tokenize: Optional[Callable] = None,
154
154
  verbose: bool = False,
155
+ mode: str = "auto", # auto | cloud | onprem | hybrid
156
+ zero_log: bool = False,
157
+ executor_url: str = None,
158
+ onprem_dir: str = "./codeastra-onprem",
155
159
  ):
156
160
  self._agent = agent
157
- self._client = CodeAstraClient(api_key, base_url, agent_id)
161
+ self._client = CodeAstraClient(
162
+ api_key = api_key,
163
+ base_url = base_url,
164
+ agent_id = agent_id,
165
+ mode = mode,
166
+ zero_log = zero_log,
167
+ executor_url = executor_url,
168
+ onprem_dir = onprem_dir,
169
+ verbose = verbose,
170
+ )
158
171
  self._classification = classification
159
172
  self._pipeline_id = pipeline_id
160
173
  self._on_tokenize = on_tokenize
@@ -410,3 +423,291 @@ class BlindAgentMiddleware:
410
423
  def __exit__(self, *_): self.close()
411
424
  async def __aenter__(self): return self
412
425
  async def __aexit__(self, *_): await self.aclose()
426
+
427
+
428
+ # ══════════════════════════════════════════════════════════════════════════════
429
+ # INPUT SCANNER — scans prompt text for raw PII/PHI/PCI before agent sees it
430
+ # OUTPUT SCANNER — scans agent response for any leaked real values
431
+ # ══════════════════════════════════════════════════════════════════════════════
432
+
433
+ import re as _re
434
+
435
+ # Regex patterns for detecting raw sensitive data in free text
436
+ _PATTERNS = {
437
+ # SSN: 123-45-6789 or 123456789
438
+ "ssn": _re.compile(
439
+ r'\b(?!000|666|9\d{2})\d{3}[-\s]?(?!00)\d{2}[-\s]?(?!0000)\d{4}\b'
440
+ ),
441
+ # Credit card: 13-19 digits, passes Luhn
442
+ "credit_card": _re.compile(
443
+ r'\b(?:4[0-9]{12}(?:[0-9]{3})?' # Visa
444
+ r'|5[1-5][0-9]{14}' # Mastercard
445
+ r'|3[47][0-9]{13}' # Amex
446
+ r'|6(?:011|5[0-9]{2})[0-9]{12}' # Discover
447
+ r'|(?:2131|1800|35\d{3})\d{11})\b' # JCB
448
+ ),
449
+ # Email
450
+ "email": _re.compile(
451
+ r'\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b'
452
+ ),
453
+ # Phone: various formats
454
+ "phone": _re.compile(
455
+ r'\b(?:\+?1[-.\s]?)?'
456
+ r'(?:\(?\d{3}\)?[-.\s]?)'
457
+ r'\d{3}[-.\s]?\d{4}\b'
458
+ ),
459
+ # DOB: MM/DD/YYYY or YYYY-MM-DD
460
+ "dob": _re.compile(
461
+ r'\b(?:0[1-9]|1[0-2])[\/\-](?:0[1-9]|[12]\d|3[01])[\/\-](?:19|20)\d{2}\b'
462
+ r'|\b(?:19|20)\d{2}[\/\-](?:0[1-9]|1[0-2])[\/\-](?:0[1-9]|[12]\d|3[01])\b'
463
+ ),
464
+ # MRN: MRN- or MRN: followed by digits
465
+ "mrn": _re.compile(
466
+ r'\bMRN[-:\s]*\s*[A-Z0-9]{4,12}\b', _re.IGNORECASE
467
+ ),
468
+ # IP address
469
+ "ip_address": _re.compile(
470
+ r'\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b'
471
+ ),
472
+ }
473
+
474
+
475
+ def _luhn_check(number: str) -> bool:
476
+ """Validate credit card number with Luhn algorithm."""
477
+ digits = [int(d) for d in number if d.isdigit()]
478
+ if len(digits) < 13:
479
+ return False
480
+ total = 0
481
+ for i, d in enumerate(reversed(digits)):
482
+ if i % 2 == 1:
483
+ d *= 2
484
+ if d > 9:
485
+ d -= 9
486
+ total += d
487
+ return total % 10 == 0
488
+
489
+
490
+ def _scan_text_for_pii(text: str) -> dict:
491
+ """
492
+ Scan free text for raw PII/PHI/PCI patterns.
493
+ Returns {synthetic_field_key: matched_value} for tokenization.
494
+
495
+ Example:
496
+ "Patient John Smith SSN 123-45-6789 email john@hospital.org"
497
+ → {"ssn_0": "123-45-6789", "email_0": "john@hospital.org"}
498
+ """
499
+ found = {}
500
+ if not isinstance(text, str):
501
+ return found
502
+
503
+ for field, pattern in _PATTERNS.items():
504
+ matches = pattern.findall(text)
505
+ for i, match in enumerate(matches):
506
+ val = match.strip() if isinstance(match, str) else match[0].strip()
507
+ if not val or TOKEN_RE.search(val):
508
+ continue
509
+ # Extra validation for credit cards
510
+ if field == "credit_card":
511
+ digits_only = _re.sub(r'\D', '', val)
512
+ if not _luhn_check(digits_only):
513
+ continue
514
+ key = f"{field}_{i}" if i > 0 else field
515
+ found[key] = val
516
+
517
+ return found
518
+
519
+
520
+ def _scan_obj_for_pii(obj: Any) -> dict:
521
+ """Scan any object (str, dict, list) for raw PII in free text."""
522
+ if isinstance(obj, str):
523
+ return _scan_text_for_pii(obj)
524
+ elif isinstance(obj, dict):
525
+ combined = {}
526
+ for v in obj.values():
527
+ combined.update(_scan_obj_for_pii(v))
528
+ return combined
529
+ elif isinstance(obj, list):
530
+ combined = {}
531
+ for item in obj:
532
+ combined.update(_scan_obj_for_pii(item))
533
+ return combined
534
+ return {}
535
+
536
+
537
+ def _blind_text(text: str, token_map: dict) -> str:
538
+ """Replace all known real values in text with their tokens."""
539
+ if not isinstance(text, str):
540
+ return text
541
+ for real, token in token_map.items():
542
+ if real and real in text:
543
+ text = text.replace(real, token)
544
+ return text
545
+
546
+
547
+ def _blind_any(obj: Any, token_map: dict) -> Any:
548
+ """Replace real values anywhere in obj with tokens."""
549
+ if isinstance(obj, str):
550
+ return _blind_text(obj, token_map)
551
+ elif isinstance(obj, dict):
552
+ return {k: _blind_any(v, token_map) for k, v in obj.items()}
553
+ elif isinstance(obj, list):
554
+ return [_blind_any(i, token_map) for i in obj]
555
+ return obj
556
+
557
+
558
+ # ── Patch BlindAgentMiddleware with input + output scanning ──────────────────
559
+
560
+ _orig_run = BlindAgentMiddleware.run
561
+ _orig_invoke = BlindAgentMiddleware.invoke
562
+ _orig_chat = BlindAgentMiddleware.chat
563
+ _orig_arun = BlindAgentMiddleware.arun
564
+ _orig_ainvoke = BlindAgentMiddleware.ainvoke
565
+
566
+
567
+ def _scan_and_blind_input(self, *args, **kwargs):
568
+ """
569
+ Scan all input args/kwargs for raw PII/PHI/PCI.
570
+ Tokenize any found values before passing to the agent.
571
+ Returns (new_args, new_kwargs).
572
+ """
573
+ # Collect all text from args and kwargs
574
+ all_text = json.dumps(list(args)) + json.dumps(kwargs)
575
+ raw_pii = _scan_obj_for_pii(all_text)
576
+
577
+ if not raw_pii:
578
+ return args, kwargs
579
+
580
+ # Tokenize detected values
581
+ try:
582
+ classification = _classify(set(k.split("_")[0] for k in raw_pii))
583
+ minted = self._client.tokenize(raw_pii, classification=classification)
584
+ # Build replacement map: {real_value: token}
585
+ for field, token in minted.items():
586
+ real_val = raw_pii.get(field)
587
+ if real_val:
588
+ self._value_to_token[real_val] = token
589
+ self._session_tokens[field] = token
590
+
591
+ if self._verbose:
592
+ print(f"[CodeAstra] Input scan: tokenized {len(minted)} value(s) in prompt: {list(minted.keys())}")
593
+
594
+ # Replace real values in args and kwargs
595
+ new_args = tuple(_blind_any(a, self._value_to_token) for a in args)
596
+ new_kwargs = {k: _blind_any(v, self._value_to_token) for k, v in kwargs.items()}
597
+ return new_args, new_kwargs
598
+
599
+ except Exception as e:
600
+ if self._verbose:
601
+ print(f"[CodeAstra] Input scan warning: {e}")
602
+ return args, kwargs
603
+
604
+
605
+ def _scan_output(self, result: Any) -> Any:
606
+ """
607
+ Scan agent output for any real values that leaked through.
608
+ Replace with tokens using session's value_to_token map.
609
+ Also scan output text for any NEW raw PII not yet tokenized.
610
+ """
611
+ # Step 1: replace known real values with existing tokens
612
+ if self._value_to_token:
613
+ result = _blind_any(result, self._value_to_token)
614
+
615
+ # Step 2: scan output for any new raw PII that leaked
616
+ new_pii = _scan_obj_for_pii(result)
617
+ if new_pii:
618
+ try:
619
+ classification = _classify(set(k.split("_")[0] for k in new_pii))
620
+ minted = self._client.tokenize(new_pii, classification=classification)
621
+ for field, token in minted.items():
622
+ real_val = new_pii.get(field)
623
+ if real_val:
624
+ self._value_to_token[real_val] = token
625
+ self._session_tokens[field] = token
626
+
627
+ result = _blind_any(result, self._value_to_token)
628
+
629
+ if self._verbose:
630
+ print(f"[CodeAstra] Output gate: caught {len(minted)} leaked value(s): {list(minted.keys())}")
631
+ except Exception as e:
632
+ if self._verbose:
633
+ print(f"[CodeAstra] Output gate warning: {e}")
634
+
635
+ return result
636
+
637
+
638
+ async def _ascan_output(self, result: Any) -> Any:
639
+ """Async version of _scan_output."""
640
+ if self._value_to_token:
641
+ result = _blind_any(result, self._value_to_token)
642
+
643
+ new_pii = _scan_obj_for_pii(result)
644
+ if new_pii:
645
+ try:
646
+ classification = _classify(set(k.split("_")[0] for k in new_pii))
647
+ minted = await self._client.atokenize(new_pii, classification=classification)
648
+ for field, token in minted.items():
649
+ real_val = new_pii.get(field)
650
+ if real_val:
651
+ self._value_to_token[real_val] = token
652
+ self._session_tokens[field] = token
653
+ result = _blind_any(result, self._value_to_token)
654
+ if self._verbose:
655
+ print(f"[CodeAstra] Output gate (async): caught {len(minted)} leaked value(s)")
656
+ except Exception as e:
657
+ if self._verbose:
658
+ print(f"[CodeAstra] Output gate warning: {e}")
659
+ return result
660
+
661
+
662
+ # ── Monkey-patch all proxy methods with input + output scanning ───────────────
663
+
664
+ def _patched_run(self, *args, **kwargs):
665
+ args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
666
+ result = self._agent.run(*args, **kwargs)
667
+ result = self._blind_output(result) # tool output scan (existing)
668
+ return _scan_output(self, result) # output gate scan (new)
669
+
670
+ def _patched_invoke(self, *args, **kwargs):
671
+ args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
672
+ result = self._agent.invoke(*args, **kwargs)
673
+ if isinstance(result, dict) and "output" in result:
674
+ result["output"] = self._blind_output(result["output"])
675
+ result["output"] = _scan_output(self, result["output"])
676
+ return result
677
+ result = self._blind_output(result)
678
+ return _scan_output(self, result)
679
+
680
+ def _patched_chat(self, *args, **kwargs):
681
+ args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
682
+ result = self._agent.chat(*args, **kwargs)
683
+ result = self._blind_output(result)
684
+ return _scan_output(self, result)
685
+
686
+ async def _patched_arun(self, *args, **kwargs):
687
+ args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
688
+ result = await self._agent.arun(*args, **kwargs)
689
+ result = await self._ablind_output(result)
690
+ return await _ascan_output(self, result)
691
+
692
+ async def _patched_ainvoke(self, *args, **kwargs):
693
+ args, kwargs = _scan_and_blind_input(self, *args, **kwargs)
694
+ result = await self._agent.ainvoke(*args, **kwargs)
695
+ if isinstance(result, dict) and "output" in result:
696
+ result["output"] = await self._ablind_output(result["output"])
697
+ result["output"] = await _ascan_output(self, result["output"])
698
+ return result
699
+ result = await self._ablind_output(result)
700
+ return await _ascan_output(self, result)
701
+
702
+
703
+ # Apply patches
704
+ BlindAgentMiddleware.run = _patched_run
705
+ BlindAgentMiddleware.invoke = _patched_invoke
706
+ BlindAgentMiddleware.chat = _patched_chat
707
+ BlindAgentMiddleware.arun = _patched_arun
708
+ BlindAgentMiddleware.ainvoke = _patched_ainvoke
709
+
710
+ # Expose scanner functions for direct use
711
+ BlindAgentMiddleware._scan_input = _scan_and_blind_input
712
+ BlindAgentMiddleware._scan_output = _scan_output
713
+ BlindAgentMiddleware.scan_text = staticmethod(_scan_text_for_pii)
@@ -1,15 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeastra
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
5
- License: MIT
5
+ License-Expression: MIT
6
6
  Project-URL: Homepage, https://codeastra.dev
7
7
  Project-URL: Documentation, https://docs.codeastra.dev
8
8
  Project-URL: Repository, https://github.com/codeastra/codeastra-python
9
9
  Keywords: ai,agents,langchain,crewai,privacy,hipaa,security,tokenization
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: MIT License
13
12
  Classifier: Programming Language :: Python :: 3
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
@@ -4,16 +4,15 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codeastra"
7
- version = "1.0.0"
7
+ version = "1.1.0"
8
8
  description = "Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data."
9
9
  readme = "README.md"
10
- license = {text = "MIT"}
10
+ license = "MIT"
11
11
  requires-python = ">=3.10"
12
12
  keywords = ["ai", "agents", "langchain", "crewai", "privacy", "hipaa", "security", "tokenization"]
13
13
  classifiers = [
14
14
  "Development Status :: 5 - Production/Stable",
15
15
  "Intended Audience :: Developers",
16
- "License :: OSI Approved :: MIT License",
17
16
  "Programming Language :: Python :: 3",
18
17
  "Programming Language :: Python :: 3.10",
19
18
  "Programming Language :: Python :: 3.11",
@@ -1,239 +0,0 @@
1
- """
2
- CodeAstraClient — low-level async/sync HTTP client for the Codeastra API.
3
- All SDK components use this. Customers can also use it directly.
4
- """
5
- from __future__ import annotations
6
-
7
- import re
8
- import json
9
- import asyncio
10
- import threading
11
- from typing import Any, Optional
12
-
13
- import httpx
14
-
15
- TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
16
-
17
- _DEFAULT_BASE = "https://app.codeastra.dev"
18
-
19
-
20
- class CodeAstraClient:
21
- """
22
- Thin wrapper around the Codeastra REST API.
23
-
24
- Usage:
25
- client = CodeAstraClient(api_key="sk-guard-xxx")
26
- tokens = client.tokenize({"name": "John Smith", "ssn": "123-45-6789"})
27
- # → {"name": "[CVT:NAME:A1B2]", "ssn": "[CVT:SSN:C3D4]"}
28
- """
29
-
30
- def __init__(
31
- self,
32
- api_key: str,
33
- base_url: str = _DEFAULT_BASE,
34
- agent_id: str = "sdk-agent",
35
- timeout: float = 10.0,
36
- executor_url: str = None, # optional: bring your own executor
37
- ):
38
- self.api_key = api_key
39
- self.base_url = base_url.rstrip("/")
40
- self.agent_id = agent_id
41
- self._headers = {
42
- "X-API-Key": api_key,
43
- "Content-Type": "application/json",
44
- }
45
- self._timeout = timeout
46
- self._executor_url = executor_url
47
- # Sync client (lazy)
48
- self._sync_client: Optional[httpx.Client] = None
49
- # Async client (lazy)
50
- self._async_client: Optional[httpx.AsyncClient] = None
51
- # Auto-register executor if provided
52
- if executor_url:
53
- try:
54
- self._post("/agent/executor", {
55
- "execution_url": executor_url,
56
- "action_type": "*",
57
- "agent_id": agent_id,
58
- "description": f"Auto-registered by SDK agent {agent_id}",
59
- })
60
- except Exception:
61
- pass # non-fatal — zero-config mode still works
62
-
63
- # ── sync helpers ──────────────────────────────────────────────────────────
64
-
65
- def _get_sync(self) -> httpx.Client:
66
- if self._sync_client is None or self._sync_client.is_closed:
67
- self._sync_client = httpx.Client(
68
- headers=self._headers, timeout=self._timeout)
69
- return self._sync_client
70
-
71
- def _post(self, path: str, body: dict) -> dict:
72
- r = self._get_sync().post(f"{self.base_url}{path}", json=body)
73
- r.raise_for_status()
74
- return r.json()
75
-
76
- def _get(self, path: str, params: dict = None) -> dict:
77
- r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
78
- r.raise_for_status()
79
- return r.json()
80
-
81
- # ── async helpers ─────────────────────────────────────────────────────────
82
-
83
- def _get_async(self) -> httpx.AsyncClient:
84
- if self._async_client is None or self._async_client.is_closed:
85
- self._async_client = httpx.AsyncClient(
86
- headers=self._headers, timeout=self._timeout)
87
- return self._async_client
88
-
89
- async def _apost(self, path: str, body: dict) -> dict:
90
- r = await self._get_async().post(f"{self.base_url}{path}", json=body)
91
- r.raise_for_status()
92
- return r.json()
93
-
94
- async def _aget(self, path: str, params: dict = None) -> dict:
95
- r = await self._get_async().get(
96
- f"{self.base_url}{path}", params=params or {})
97
- r.raise_for_status()
98
- return r.json()
99
-
100
- # ── public sync API ───────────────────────────────────────────────────────
101
-
102
- def tokenize(
103
- self,
104
- data: dict,
105
- classification: str = "pii",
106
- ttl_hours: int = 24,
107
- ) -> dict:
108
- """
109
- Store real data in vault. Returns token map.
110
- {"name": "John"} → {"name": "[CVT:NAME:A1B2]"}
111
- """
112
- resp = self._post("/vault/store", {
113
- "data": data,
114
- "agent_id": self.agent_id,
115
- "classification": classification,
116
- "ttl_hours": ttl_hours,
117
- })
118
- return resp.get("tokens", {})
119
-
120
- def execute(
121
- self,
122
- action_type: str,
123
- params: dict,
124
- pipeline_id: str = None,
125
- ) -> dict:
126
- """
127
- Submit an action with token params.
128
- Codeastra resolves tokens → real values → POSTs to your executor.
129
- Agent never sees real values.
130
- """
131
- body = {
132
- "agent_id": self.agent_id,
133
- "action_type": action_type,
134
- "params": params,
135
- }
136
- if pipeline_id:
137
- body["pipeline_id"] = pipeline_id
138
- return self._post("/pipeline/action", body)
139
- return self._post("/agent/action", body)
140
-
141
- def grant(
142
- self,
143
- receiving_agent: str,
144
- tokens: list[str],
145
- allowed_actions: list[str] = [],
146
- pipeline_id: str = None,
147
- purpose: str = None,
148
- ) -> dict:
149
- """Grant tokens to another agent in a pipeline."""
150
- return self._post("/vault/grant", {
151
- "granting_agent": self.agent_id,
152
- "receiving_agent": receiving_agent,
153
- "tokens": tokens,
154
- "allowed_actions": allowed_actions,
155
- "pipeline_id": pipeline_id,
156
- "purpose": purpose,
157
- })
158
-
159
- def audit(self, pipeline_id: str = None, token: str = None) -> list:
160
- """Get chain of custody for a pipeline or token."""
161
- params = {}
162
- if pipeline_id: params["pipeline_id"] = pipeline_id
163
- if token: params["token"] = token
164
- return self._get("/pipeline/audit", params).get("audit", [])
165
-
166
- # ── public async API ──────────────────────────────────────────────────────
167
-
168
- async def atokenize(
169
- self,
170
- data: dict,
171
- classification: str = "pii",
172
- ttl_hours: int = 24,
173
- ) -> dict:
174
- resp = await self._apost("/vault/store", {
175
- "data": data,
176
- "agent_id": self.agent_id,
177
- "classification": classification,
178
- "ttl_hours": ttl_hours,
179
- })
180
- return resp.get("tokens", {})
181
-
182
- async def aexecute(
183
- self,
184
- action_type: str,
185
- params: dict,
186
- pipeline_id: str = None,
187
- ) -> dict:
188
- body = {
189
- "agent_id": self.agent_id,
190
- "action_type": action_type,
191
- "params": params,
192
- }
193
- if pipeline_id:
194
- body["pipeline_id"] = pipeline_id
195
- return await self._apost("/pipeline/action", body)
196
- return await self._apost("/agent/action", body)
197
-
198
- async def agrant(
199
- self,
200
- receiving_agent: str,
201
- tokens: list[str],
202
- allowed_actions: list[str] = [],
203
- pipeline_id: str = None,
204
- ) -> dict:
205
- return await self._apost("/vault/grant", {
206
- "granting_agent": self.agent_id,
207
- "receiving_agent": receiving_agent,
208
- "tokens": tokens,
209
- "allowed_actions": allowed_actions,
210
- "pipeline_id": pipeline_id,
211
- })
212
-
213
- # ── utility ───────────────────────────────────────────────────────────────
214
-
215
- @staticmethod
216
- def extract_tokens(obj: Any) -> list[str]:
217
- """Extract all vault tokens from any string/dict/list."""
218
- text = json.dumps(obj) if not isinstance(obj, str) else obj
219
- return TOKEN_RE.findall(text)
220
-
221
- @staticmethod
222
- def contains_token(val: Any) -> bool:
223
- text = json.dumps(val) if not isinstance(val, str) else str(val)
224
- return bool(TOKEN_RE.search(text))
225
-
226
- @staticmethod
227
- def is_token(val: str) -> bool:
228
- return bool(TOKEN_RE.fullmatch(val.strip()))
229
-
230
- def close(self):
231
- if self._sync_client: self._sync_client.close()
232
-
233
- async def aclose(self):
234
- if self._async_client: await self._async_client.aclose()
235
-
236
- def __enter__(self): return self
237
- def __exit__(self, *_): self.close()
238
- async def __aenter__(self): return self
239
- async def __aexit__(self, *_): await self.aclose()
File without changes