codeastra 1.0.1__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeastra
3
- Version: 1.0.1
3
+ Version: 1.5.0
4
4
  Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://codeastra.dev
@@ -2,7 +2,7 @@ from .middleware import BlindAgentMiddleware
2
2
  from .client import CodeAstraClient
3
3
  from .wrappers import blind_tool, BlindCrewAIAgent, BlindAutoGPTAgent
4
4
 
5
- __version__ = "1.0.0"
5
+ __version__ = "1.5.0"
6
6
  __all__ = [
7
7
  "BlindAgentMiddleware",
8
8
  "CodeAstraClient",
@@ -0,0 +1,903 @@
1
+ """
2
+ CodeAstraClient — full-featured async/sync HTTP client for the Codeastra API.
3
+
4
+ New in v1.1.0:
5
+ - mode="cloud" — default, uses app.codeastra.dev
6
+ - mode="onprem" — pulls deployment package, runs vault locally
7
+ - mode="hybrid" — local vault + cloud LLM (best for enterprise)
8
+ - zero_log=True — zero logging mode, max privacy
9
+ - Auto-register executor on init
10
+ - Auto-detect environment
11
+ - Auto-generate on-premise package
12
+ - HMAC verification of executor calls
13
+ - Tamper-proof audit verification
14
+ - Auto-signup on first use
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ import os
20
+ import json
21
+ import hmac
22
+ import socket
23
+ import hashlib
24
+ import asyncio
25
+ from pathlib import Path
26
+ from typing import Any, Optional
27
+
28
+ import httpx
29
+
30
+ TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
31
+
32
+ _DEFAULT_BASE = "https://app.codeastra.dev"
33
+ _ONPREM_DEFAULT = "http://localhost:4000"
34
+
35
+
36
+ def _detect_environment() -> str:
37
+ env_mode = os.environ.get("CODEASTRA_MODE", "").lower()
38
+ if env_mode in ("cloud", "onprem", "hybrid"):
39
+ return env_mode
40
+ try:
41
+ s = socket.create_connection(("localhost", 4000), timeout=1)
42
+ s.close()
43
+ return "onprem"
44
+ except Exception:
45
+ pass
46
+ return "cloud"
47
+
48
+
49
+ def _get_base_url(mode: str, base_url: str = None) -> str:
50
+ if base_url:
51
+ return base_url.rstrip("/")
52
+ if mode in ("onprem", "hybrid"):
53
+ return os.environ.get("CODEASTRA_ONPREM_URL", _ONPREM_DEFAULT)
54
+ return _DEFAULT_BASE
55
+
56
+
57
+ class CodeAstraClient:
58
+ """
59
+ Full-featured Codeastra client.
60
+
61
+ Modes:
62
+ cloud — default. Uses app.codeastra.dev
63
+ onprem — local vault. Auto-generates deployment package on first use.
64
+ hybrid — local vault + cloud LLM. Best for enterprise.
65
+
66
+ Usage:
67
+ # Cloud (default — zero config)
68
+ client = CodeAstraClient(api_key="sk-guard-xxx")
69
+
70
+ # On-premise (auto-generates docker-compose + setup.sh)
71
+ client = CodeAstraClient(api_key="sk-guard-xxx", mode="onprem")
72
+
73
+ # Hybrid (local vault, cloud LLM)
74
+ client = CodeAstraClient(api_key="sk-guard-xxx", mode="hybrid")
75
+
76
+ # Zero logging
77
+ client = CodeAstraClient(api_key="sk-guard-xxx", zero_log=True)
78
+
79
+ # With executor auto-registered
80
+ client = CodeAstraClient(api_key="sk-guard-xxx",
81
+ executor_url="https://your-app.com/execute")
82
+
83
+ # No API key — auto-signup
84
+ client = CodeAstraClient()
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ api_key: str = None,
90
+ base_url: str = None,
91
+ agent_id: str = "sdk-agent",
92
+ timeout: float = 10.0,
93
+ executor_url: str = None,
94
+ mode: str = "auto",
95
+ zero_log: bool = False,
96
+ onprem_dir: str = "./codeastra-onprem",
97
+ verbose: bool = False,
98
+ ):
99
+ # Auto-signup if no API key
100
+ if not api_key:
101
+ api_key = os.environ.get("CODEASTRA_API_KEY")
102
+ if not api_key:
103
+ api_key = self._auto_signup()
104
+
105
+ # Auto-detect mode
106
+ if mode == "auto":
107
+ mode = _detect_environment()
108
+
109
+ self.api_key = api_key
110
+ self.agent_id = agent_id
111
+ self.mode = mode
112
+ self.zero_log = zero_log
113
+ self._verbose = verbose
114
+ self._timeout = timeout
115
+ self._onprem_dir = Path(onprem_dir)
116
+ self.base_url = _get_base_url(mode, base_url)
117
+
118
+ self._headers = {
119
+ "X-API-Key": api_key,
120
+ "Content-Type": "application/json",
121
+ }
122
+ if zero_log:
123
+ self._headers["X-Zero-Log"] = "true"
124
+
125
+ self._sync_client: Optional[httpx.Client] = None
126
+ self._async_client: Optional[httpx.AsyncClient] = None
127
+
128
+ if verbose:
129
+ print(f"[CodeAstra] mode={mode} base={self.base_url} zero_log={zero_log}")
130
+
131
+ # On-premise: auto-generate deployment package
132
+ if mode in ("onprem", "hybrid"):
133
+ self._setup_onprem(mode)
134
+
135
+ # Auto-register executor if provided
136
+ if executor_url:
137
+ self._executor_url = executor_url
138
+ try:
139
+ self._post("/agent/executor", {
140
+ "execution_url": executor_url,
141
+ "action_type": "*",
142
+ "agent_id": agent_id,
143
+ "description": f"Auto-registered by SDK agent {agent_id} ({mode})",
144
+ })
145
+ if verbose:
146
+ print(f"[CodeAstra] Executor auto-registered: {executor_url}")
147
+ except Exception as e:
148
+ if verbose:
149
+ print(f"[CodeAstra] Executor registration skipped: {e}")
150
+
151
+ # ── Auto-signup ───────────────────────────────────────────────────────────
152
+
153
+ def _auto_signup(self) -> str:
154
+ """Auto-create account on first use. Saves key to ~/.codeastra/credentials."""
155
+ creds_path = Path.home() / ".codeastra" / "credentials"
156
+
157
+ if creds_path.exists():
158
+ try:
159
+ data = json.loads(creds_path.read_text())
160
+ key = data.get("api_key")
161
+ if key:
162
+ return key
163
+ except Exception:
164
+ pass
165
+
166
+ import uuid
167
+ email = os.environ.get("CODEASTRA_EMAIL", f"user-{uuid.uuid4().hex[:8]}@codeastra.local")
168
+ password = os.environ.get("CODEASTRA_PASSWORD", uuid.uuid4().hex)
169
+ name = os.environ.get("CODEASTRA_NAME", f"SDK User {uuid.uuid4().hex[:6]}")
170
+
171
+ try:
172
+ r = httpx.post(f"{_DEFAULT_BASE}/auth/signup", json={
173
+ "name": name, "email": email, "password": password,
174
+ }, timeout=10)
175
+ if r.is_success:
176
+ data = r.json()
177
+ api_key = data.get("api_key")
178
+ if api_key:
179
+ creds_path.parent.mkdir(parents=True, exist_ok=True)
180
+ creds_path.write_text(json.dumps({
181
+ "api_key": api_key, "email": email, "password": password,
182
+ }))
183
+ print(f"[CodeAstra] Account created. Key saved to {creds_path}")
184
+ return api_key
185
+ except Exception:
186
+ pass
187
+
188
+ raise ValueError(
189
+ "No API key. Set CODEASTRA_API_KEY or pass api_key= "
190
+ "or sign up at https://app.codeastra.dev"
191
+ )
192
+
193
+ # ── On-premise setup ──────────────────────────────────────────────────────
194
+
195
+ def _setup_onprem(self, mode: str):
196
+ """Auto-generate on-premise deployment package if not already present."""
197
+ setup_sh = self._onprem_dir / "setup.sh"
198
+ if setup_sh.exists():
199
+ if self._verbose:
200
+ print(f"[CodeAstra] On-premise package at {self._onprem_dir}")
201
+ return
202
+
203
+ if self._verbose:
204
+ print(f"[CodeAstra] Generating on-premise package...")
205
+
206
+ try:
207
+ resp = self._post("/onprem/generate", {
208
+ "deployment_mode": "docker",
209
+ "llm_provider": "ollama",
210
+ "llm_model": "llama3",
211
+ "air_gapped": mode != "hybrid",
212
+ "name": f"codeastra-{self.agent_id}",
213
+ })
214
+
215
+ files = resp.get("files", {})
216
+ if files:
217
+ self._onprem_dir.mkdir(parents=True, exist_ok=True)
218
+ for filename, content in files.items():
219
+ fpath = self._onprem_dir / filename
220
+ fpath.write_text(content)
221
+ if setup_sh.exists():
222
+ setup_sh.chmod(0o755)
223
+ print(f"\n[CodeAstra] On-premise package ready: {self._onprem_dir}")
224
+ print(f" Run: cd {self._onprem_dir} && bash setup.sh\n")
225
+
226
+ except Exception as e:
227
+ if self._verbose:
228
+ print(f"[CodeAstra] On-premise setup warning: {e} — falling back to cloud")
229
+ self.base_url = _DEFAULT_BASE
230
+ self.mode = "cloud"
231
+
232
+ # ── HMAC verification ─────────────────────────────────────────────────────
233
+
234
+ @staticmethod
235
+ def verify_executor_call(payload: str, signature: str, secret: str) -> bool:
236
+ """
237
+ Verify an incoming executor call is genuinely from Codeastra.
238
+ Use in your executor endpoint to reject forged requests.
239
+
240
+ Usage:
241
+ @app.post("/execute")
242
+ def execute(request):
243
+ if not CodeAstraClient.verify_executor_call(
244
+ request.body, request.headers["X-Codeastra-Signature"], YOUR_SECRET
245
+ ):
246
+ raise HTTPException(401)
247
+ """
248
+ expected = "sha256=" + hmac.new(
249
+ secret.encode(),
250
+ payload.encode() if isinstance(payload, str) else payload,
251
+ hashlib.sha256
252
+ ).hexdigest()
253
+ return hmac.compare_digest(expected, signature)
254
+
255
+ # ── Audit verification ────────────────────────────────────────────────────
256
+
257
+ def verify_audit(self) -> dict:
258
+ """Verify tamper-proof audit chain integrity."""
259
+ try:
260
+ return self._get("/audit/secure/verify")
261
+ except Exception as e:
262
+ return {"verified": False, "error": str(e)}
263
+
264
+ def export_audit(self, output_path: str = "audit_report.json") -> str:
265
+ """Export full compliance audit report."""
266
+ try:
267
+ data = self._get("/audit/secure/export")
268
+ Path(output_path).write_text(json.dumps(data, indent=2))
269
+ return output_path
270
+ except Exception as e:
271
+ return str(e)
272
+
273
+ # ── Zero-log mode ─────────────────────────────────────────────────────────
274
+
275
+ def set_zero_log(self, enabled: bool = True):
276
+ """Enable/disable zero-logging mode."""
277
+ self.zero_log = enabled
278
+ if enabled:
279
+ self._headers["X-Zero-Log"] = "true"
280
+ else:
281
+ self._headers.pop("X-Zero-Log", None)
282
+ self._sync_client = None
283
+ self._async_client = None
284
+
285
+ # ── sync helpers ──────────────────────────────────────────────────────────
286
+
287
+ def _get_sync(self) -> httpx.Client:
288
+ if self._sync_client is None or self._sync_client.is_closed:
289
+ self._sync_client = httpx.Client(
290
+ headers=self._headers, timeout=self._timeout)
291
+ return self._sync_client
292
+
293
+ def _post(self, path: str, body: dict) -> dict:
294
+ r = self._get_sync().post(f"{self.base_url}{path}", json=body)
295
+ r.raise_for_status()
296
+ return r.json()
297
+
298
+ def _get(self, path: str, params: dict = None) -> dict:
299
+ r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
300
+ r.raise_for_status()
301
+ return r.json()
302
+
303
+ # ── async helpers ─────────────────────────────────────────────────────────
304
+
305
+ def _get_async(self) -> httpx.AsyncClient:
306
+ if self._async_client is None or self._async_client.is_closed:
307
+ self._async_client = httpx.AsyncClient(
308
+ headers=self._headers, timeout=self._timeout)
309
+ return self._async_client
310
+
311
+ async def _apost(self, path: str, body: dict) -> dict:
312
+ r = await self._get_async().post(f"{self.base_url}{path}", json=body)
313
+ r.raise_for_status()
314
+ return r.json()
315
+
316
+ async def _aget(self, path: str, params: dict = None) -> dict:
317
+ r = await self._get_async().get(
318
+ f"{self.base_url}{path}", params=params or {})
319
+ r.raise_for_status()
320
+ return r.json()
321
+
322
+ # ── public sync API ───────────────────────────────────────────────────────
323
+
324
+ def tokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
325
+ resp = self._post("/vault/store", {
326
+ "data": data, "agent_id": self.agent_id,
327
+ "classification": classification, "ttl_hours": ttl_hours,
328
+ })
329
+ return resp.get("tokens", {})
330
+
331
+ def execute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
332
+ body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
333
+ if pipeline_id:
334
+ body["pipeline_id"] = pipeline_id
335
+ return self._post("/pipeline/action", body)
336
+ return self._post("/agent/action", body)
337
+
338
+ def grant(self, receiving_agent: str, tokens: list, allowed_actions: list = [],
339
+ pipeline_id: str = None, purpose: str = None) -> dict:
340
+ return self._post("/vault/grant", {
341
+ "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
342
+ "tokens": tokens, "allowed_actions": allowed_actions,
343
+ "pipeline_id": pipeline_id, "purpose": purpose,
344
+ })
345
+
346
+ def audit(self, pipeline_id: str = None, token: str = None) -> list:
347
+ params = {}
348
+ if pipeline_id: params["pipeline_id"] = pipeline_id
349
+ if token: params["token"] = token
350
+ return self._get("/pipeline/audit", params).get("audit", [])
351
+
352
+ def stats(self) -> dict:
353
+ return self._get("/vault/stats")
354
+
355
+ # ── public async API ──────────────────────────────────────────────────────
356
+
357
+ async def atokenize(self, data: dict, classification: str = "pii", ttl_hours: int = 24) -> dict:
358
+ resp = await self._apost("/vault/store", {
359
+ "data": data, "agent_id": self.agent_id,
360
+ "classification": classification, "ttl_hours": ttl_hours,
361
+ })
362
+ return resp.get("tokens", {})
363
+
364
+ async def aexecute(self, action_type: str, params: dict, pipeline_id: str = None) -> dict:
365
+ body = {"agent_id": self.agent_id, "action_type": action_type, "params": params}
366
+ if pipeline_id:
367
+ body["pipeline_id"] = pipeline_id
368
+ return await self._apost("/pipeline/action", body)
369
+ return await self._apost("/agent/action", body)
370
+
371
+ async def agrant(self, receiving_agent: str, tokens: list,
372
+ allowed_actions: list = [], pipeline_id: str = None) -> dict:
373
+ return await self._apost("/vault/grant", {
374
+ "granting_agent": self.agent_id, "receiving_agent": receiving_agent,
375
+ "tokens": tokens, "allowed_actions": allowed_actions, "pipeline_id": pipeline_id,
376
+ })
377
+
378
+ # ── utility ───────────────────────────────────────────────────────────────
379
+
380
+ @staticmethod
381
+ def extract_tokens(obj: Any) -> list:
382
+ text = json.dumps(obj) if not isinstance(obj, str) else obj
383
+ return TOKEN_RE.findall(text)
384
+
385
+ @staticmethod
386
+ def contains_token(val: Any) -> bool:
387
+ text = json.dumps(val) if not isinstance(val, str) else str(val)
388
+ return bool(TOKEN_RE.search(text))
389
+
390
+ @staticmethod
391
+ def is_token(val: str) -> bool:
392
+ return bool(TOKEN_RE.fullmatch(val.strip()))
393
+
394
+
395
+ # ── smart tokens (v4.2) ───────────────────────────────────────────────────
396
+
397
+ def smart_tokenize(
398
+ self,
399
+ real_value: str,
400
+ data_type: str,
401
+ allowed_actions: list = [],
402
+ allowed_targets: list = [],
403
+ allowed_fields: list = [],
404
+ max_uses: int = 1,
405
+ ttl_seconds: int = 86400,
406
+ semantic_label: str = None,
407
+ ) -> dict:
408
+ """
409
+ Mint a smart token — policy-bound and semantically meaningful.
410
+
411
+ The agent receives meaning (what the data is, where it can go).
412
+ The real value is vault-protected forever.
413
+ The trusted executor reveals it only at the last mile.
414
+
415
+ Usage:
416
+ token = client.smart_tokenize(
417
+ real_value = "John Smith",
418
+ data_type = "patient_name",
419
+ allowed_actions = ["fill_form"],
420
+ allowed_fields = ["first_name"],
421
+ max_uses = 1,
422
+ ttl_seconds = 30,
423
+ )
424
+ # → {"token_id": "tok_PATI_a1b2c3", "data_type": "patient_name", ...}
425
+ # Agent gets this. Never sees "John Smith".
426
+ """
427
+ return self._post("/vault/smart-token", {
428
+ "real_value": real_value,
429
+ "data_type": data_type,
430
+ "agent_id": self.agent_id,
431
+ "allowed_actions": allowed_actions,
432
+ "allowed_targets": allowed_targets,
433
+ "allowed_fields": allowed_fields,
434
+ "max_uses": max_uses,
435
+ "ttl_seconds": ttl_seconds,
436
+ "semantic_label": semantic_label,
437
+ })
438
+
439
+ def smart_tokenize_batch(self, tokens: list) -> list:
440
+ """Mint multiple smart tokens in one call."""
441
+ resp = self._post("/vault/smart-token/batch", {
442
+ "agent_id": self.agent_id,
443
+ "tokens": tokens,
444
+ })
445
+ return resp.get("tokens", [])
446
+
447
+ def smart_token_info(self, token_id: str) -> dict:
448
+ """Get smart token metadata. Safe for agent — never returns real value."""
449
+ return self._get(f"/vault/smart-token/{token_id}")
450
+
451
+ def smart_token_execute(
452
+ self,
453
+ token_id: str,
454
+ action_type: str = None,
455
+ target_url: str = None,
456
+ field_name: str = None,
457
+ ) -> dict:
458
+ """
459
+ Policy-gated JIT reveal. Called by trusted executor — NEVER by agent.
460
+
461
+ Runs all 5 policy gates. If all pass, returns real value.
462
+ Token auto-revokes after max_uses reached.
463
+
464
+ Usage (in your executor endpoint):
465
+ result = client.smart_token_execute(
466
+ token_id = "tok_PATI_a1b2c3",
467
+ action_type = "fill_form",
468
+ target_url = "https://hospital.com/intake",
469
+ field_name = "first_name",
470
+ )
471
+ if result["authorized"]:
472
+ form.fill(field_name, result["real_value"])
473
+ # real value used here — agent never saw it
474
+ """
475
+ return self._post("/vault/smart-token/execute", {
476
+ "token_id": token_id,
477
+ "action_type": action_type,
478
+ "target_url": target_url,
479
+ "field_name": field_name,
480
+ "agent_id": self.agent_id,
481
+ })
482
+
483
+ def smart_token_revoke(self, token_id: str, reason: str = "manual") -> dict:
484
+ """Immediately revoke a smart token."""
485
+ try:
486
+ return self._get(f"/vault/smart-token/{token_id}/revoke")
487
+ except Exception:
488
+ return self._post(f"/vault/smart-token/{token_id}/revoke", {"reason": reason})
489
+
490
+ def smart_token_audit(self, token_id: str) -> list:
491
+ """Full reveal audit trail for a token."""
492
+ return self._get(f"/vault/smart-token/{token_id}/audit").get("audit", [])
493
+
494
+ def smart_token_types(self) -> list:
495
+ """List all supported data types for smart tokens."""
496
+ return self._get("/vault/smart-token-types").get("types", [])
497
+
498
+ async def asmart_tokenize(
499
+ self,
500
+ real_value: str,
501
+ data_type: str,
502
+ allowed_actions: list = [],
503
+ allowed_targets: list = [],
504
+ allowed_fields: list = [],
505
+ max_uses: int = 1,
506
+ ttl_seconds: int = 86400,
507
+ ) -> dict:
508
+ return await self._apost("/vault/smart-token", {
509
+ "real_value": real_value,
510
+ "data_type": data_type,
511
+ "agent_id": self.agent_id,
512
+ "allowed_actions": allowed_actions,
513
+ "allowed_targets": allowed_targets,
514
+ "allowed_fields": allowed_fields,
515
+ "max_uses": max_uses,
516
+ "ttl_seconds": ttl_seconds,
517
+ })
518
+
519
+ async def asmart_token_execute(
520
+ self,
521
+ token_id: str,
522
+ action_type: str = None,
523
+ target_url: str = None,
524
+ field_name: str = None,
525
+ ) -> dict:
526
+ return await self._apost("/vault/smart-token/execute", {
527
+ "token_id": token_id,
528
+ "action_type": action_type,
529
+ "target_url": target_url,
530
+ "field_name": field_name,
531
+ "agent_id": self.agent_id,
532
+ })
533
+
534
+
535
+ # ── blind RAG (v4.3) ──────────────────────────────────────────────────────
536
+
537
+ def rag_ingest(
538
+ self,
539
+ content: dict,
540
+ doc_type: str,
541
+ title: str = None,
542
+ source: str = None,
543
+ classification: str = "pii",
544
+ ) -> dict:
545
+ """
546
+ Tokenize a document and index it for blind semantic search.
547
+
548
+ Real values tokenized before indexing.
549
+ Agent can search and find — never sees real values.
550
+
551
+ Usage:
552
+ client.rag_ingest(
553
+ content = {"name": "John Smith", "age": "67",
554
+ "diagnosis": "diabetes", "risk": "high"},
555
+ doc_type = "patient_record",
556
+ )
557
+ """
558
+ return self._post("/rag/ingest", {
559
+ "content": content,
560
+ "doc_type": doc_type,
561
+ "agent_id": self.agent_id,
562
+ "title": title,
563
+ "source": source,
564
+ "classification": classification,
565
+ })
566
+
567
+ def rag_ingest_batch(self, documents: list) -> dict:
568
+ """Ingest multiple documents. Max 50 per call."""
569
+ return self._post("/rag/ingest/batch", {
570
+ "agent_id": self.agent_id,
571
+ "documents": documents,
572
+ })
573
+
574
+ def rag_search(
575
+ self,
576
+ query: str,
577
+ doc_type: str = None,
578
+ top_k: int = 5,
579
+ min_score: float = 0.3,
580
+ ) -> dict:
581
+ """
582
+ Semantic search over tokenized documents.
583
+ Returns token references — never real values.
584
+
585
+ Usage:
586
+ results = client.rag_search(
587
+ "find diabetic patients over 65 with high risk"
588
+ )
589
+ for r in results["results"]:
590
+ tokens = r["tokens"] # ["[CVT:NAME:A1B2]", ...]
591
+ # Pass tokens to executor to notify real patients
592
+ """
593
+ body = {"query": query, "top_k": top_k, "min_score": min_score}
594
+ if doc_type: body["doc_type"] = doc_type
595
+ return self._post("/rag/search", body)
596
+
597
+ def rag_delete(self, doc_id: str) -> dict:
598
+ """Delete a document from the blind RAG index."""
599
+ r = self._get_sync().delete(f"{self.base_url}/rag/document/{doc_id}")
600
+ r.raise_for_status()
601
+ return r.json()
602
+
603
+ def rag_stats(self) -> dict:
604
+ """Vault RAG statistics."""
605
+ return self._get("/rag/stats")
606
+
607
+ async def arag_ingest(
608
+ self,
609
+ content: dict,
610
+ doc_type: str,
611
+ title: str = None,
612
+ classification: str = "pii",
613
+ ) -> dict:
614
+ return await self._apost("/rag/ingest", {
615
+ "content": content, "doc_type": doc_type,
616
+ "agent_id": self.agent_id, "title": title,
617
+ "classification": classification,
618
+ })
619
+
620
+ async def arag_search(
621
+ self,
622
+ query: str,
623
+ doc_type: str = None,
624
+ top_k: int = 5,
625
+ min_score: float = 0.3,
626
+ ) -> dict:
627
+ body = {"query": query, "top_k": top_k, "min_score": min_score}
628
+ if doc_type: body["doc_type"] = doc_type
629
+ return await self._apost("/rag/search", body)
630
+
631
+
632
+ # ── policy-driven sensitivity (v4.4) ─────────────────────────────────────
633
+
634
+ def register_sensitive_type(
635
+ self,
636
+ fields: list,
637
+ prefixes: list = [],
638
+ doc_types: list = [],
639
+ ) -> dict:
640
+ """
641
+ Register custom sensitive field names for your tenant.
642
+ Once registered, these are ALWAYS tokenized automatically.
643
+
644
+ Usage:
645
+ client.register_sensitive_type(
646
+ fields = ["employee_badge", "case_ref", "policy_number"],
647
+ prefixes = ["EMP-", "LEGAL-", "POL-"],
648
+ doc_types = ["hr_record", "legal_filing"],
649
+ )
650
+ # Now employee_badge is always tokenized — no per-request config needed
651
+ """
652
+ return self._post("/policy/sensitivity/fields", {
653
+ "fields": fields,
654
+ "prefixes": prefixes,
655
+ "doc_types": doc_types,
656
+ })
657
+
658
+ def set_sensitivity_policy(
659
+ self,
660
+ sensitive_fields: list = None,
661
+ sensitive_prefixes: list = None,
662
+ sensitive_doc_types: list = None,
663
+ field_classifications: dict = None,
664
+ strict_mode: bool = None,
665
+ ) -> dict:
666
+ """
667
+ Set full sensitivity policy.
668
+
669
+ field_classifications: {
670
+ "employee_badge": "restricted", # always tokenize
671
+ "department": "internal", # keep but don't export
672
+ "office_floor": "public", # never tokenize
673
+ }
674
+ """
675
+ body = {}
676
+ if sensitive_fields is not None: body["sensitive_fields"] = sensitive_fields
677
+ if sensitive_prefixes is not None: body["sensitive_prefixes"] = sensitive_prefixes
678
+ if sensitive_doc_types is not None: body["sensitive_doc_types"] = sensitive_doc_types
679
+ if field_classifications is not None: body["field_classifications"] = field_classifications
680
+ if strict_mode is not None: body["strict_mode"] = strict_mode
681
+ return self._post("/policy/sensitivity", body)
682
+
683
+ def get_sensitivity_policy(self) -> dict:
684
+ """Get current sensitivity policy."""
685
+ return self._get("/policy/sensitivity")
686
+
687
+ def test_sensitivity(
688
+ self,
689
+ content: dict,
690
+ field_policy: dict = {},
691
+ sensitive_fields: list = [],
692
+ tokenize_all: bool = False,
693
+ ) -> dict:
694
+ """
695
+ Test how your policy classifies fields — without actually tokenizing.
696
+ Shows exactly what would be tokenized vs kept.
697
+
698
+ Usage:
699
+ result = client.test_sensitivity({
700
+ "employee_badge": "EMP-77291",
701
+ "name": "John Smith",
702
+ "department": "Oncology",
703
+ "age_range": "65-75",
704
+ })
705
+ print(result["would_tokenize"]) # employee_badge, name
706
+ print(result["would_keep"]) # department, age_range
707
+ """
708
+ return self._post("/policy/sensitivity/test", {
709
+ "content": content,
710
+ "field_policy": field_policy,
711
+ "sensitive_fields": sensitive_fields,
712
+ "tokenize_all": tokenize_all,
713
+ })
714
+
715
+ def smart_ingest(
716
+ self,
717
+ content: dict,
718
+ doc_type: str,
719
+ field_policy: dict = {},
720
+ sensitive_fields: list = [],
721
+ tokenize_all: bool = False,
722
+ title: str = None,
723
+ classification: str = "pii",
724
+ ) -> dict:
725
+ """
726
+ Ingest a document with full policy-driven sensitivity.
727
+ Combines RAG ingest + policy resolution in one call.
728
+
729
+ All three layers applied automatically:
730
+ - Built-in: known field names + patterns
731
+ - Per-request: field_policy + sensitive_fields
732
+ - Tenant policy: registered via register_sensitive_type()
733
+
734
+ Usage:
735
+ # Simple — built-in detection handles it
736
+ client.smart_ingest({"name": "John", "age": "67"}, "patient_record")
737
+
738
+ # With custom fields
739
+ client.smart_ingest(
740
+ content = {"employee_badge": "EMP-77291", "dept": "HR"},
741
+ doc_type = "hr_record",
742
+ sensitive_fields = ["employee_badge"],
743
+ )
744
+
745
+ # With field-level classification
746
+ client.smart_ingest(
747
+ content = {"badge": "EMP-77291", "floor": "3rd"},
748
+ doc_type = "employee_record",
749
+ field_policy = {"badge": "tokenize", "floor": "public"},
750
+ )
751
+ """
752
+ return self._post("/rag/ingest", {
753
+ "content": content,
754
+ "doc_type": doc_type,
755
+ "agent_id": self.agent_id,
756
+ "title": title,
757
+ "classification": classification,
758
+ "field_policy": field_policy,
759
+ "sensitive_fields": sensitive_fields,
760
+ "tokenize_all": tokenize_all,
761
+ })
762
+
763
+
764
+ # ── context-aware + k-anonymity (v4.5) ───────────────────────────────────
765
+
766
+ def set_context(
767
+ self,
768
+ industry: str = None,
769
+ data_scope: str = None,
770
+ classification_level: str = None,
771
+ extra_sensitive_fields: list = [],
772
+ safe_fields: list = [],
773
+ strict_mode: bool = False,
774
+ ) -> dict:
775
+ """
776
+ Register context-aware sensitivity rules.
777
+
778
+ Industry profiles auto-applied:
779
+ healthcare → diagnosis, medication, lab_result tokenized
780
+ fintech → salary, credit_score, transaction tokenized
781
+ legal → case_number, privilege, settlement tokenized
782
+ government → clearance_level, classification tokenized
783
+ hr → salary, performance_rating tokenized
784
+
785
+ Usage:
786
+ client.set_context(industry="healthcare", data_scope="patient_records")
787
+ # Now diagnosis, medication, lab_result etc. are always tokenized
788
+ # Even if not in built-in detection
789
+ """
790
+ return self._post("/policy/context", {
791
+ "industry": industry,
792
+ "data_scope": data_scope,
793
+ "classification_level": classification_level,
794
+ "extra_sensitive_fields": extra_sensitive_fields,
795
+ "safe_fields": safe_fields,
796
+ "context_strict_mode": strict_mode,
797
+ })
798
+
799
+ def set_anonymity(
800
+ self,
801
+ k_minimum: int = 5,
802
+ suppress_singleton: bool = True,
803
+ auto_bucket: bool = True,
804
+ detect_narrowing: bool = True,
805
+ quasi_identifiers: list = None,
806
+ ) -> dict:
807
+ """
808
+ Configure k-anonymity protection.
809
+
810
+ Protects against re-identification even when names are tokenized.
811
+ "67yo diabetic in zip 30314" → 1 result → suppressed (below k=5)
812
+ age:67 → auto-bucketed to 65-74
813
+ zip:30314 → auto-bucketed to 303xxx
814
+ Narrowing attacks → detected and blocked
815
+
816
+ Usage:
817
+ client.set_anonymity(k_minimum=5, auto_bucket=True)
818
+ """
819
+ body = {
820
+ "k_minimum": k_minimum,
821
+ "suppress_singleton": suppress_singleton,
822
+ "auto_bucket": auto_bucket,
823
+ "detect_narrowing": detect_narrowing,
824
+ }
825
+ if quasi_identifiers is not None:
826
+ body["quasi_identifiers"] = quasi_identifiers
827
+ return self._post("/policy/anonymity", body)
828
+
829
+ def test_context(
830
+ self,
831
+ content: dict,
832
+ context: dict,
833
+ field_policy: dict = {},
834
+ ) -> dict:
835
+ """
836
+ Test context-aware classification without ingesting.
837
+
838
+ Usage:
839
+ result = client.test_context(
840
+ content = {"diagnosis": "diabetes", "age": "67", "dept": "cardiology"},
841
+ context = {"industry": "healthcare"},
842
+ )
843
+ # would_tokenize: diagnosis (context-sensitive in healthcare)
844
+ # would_keep: dept (not sensitive)
845
+ """
846
+ return self._post("/policy/context/test", {
847
+ "content": content,
848
+ "context": context,
849
+ "field_policy": field_policy,
850
+ })
851
+
852
+ def smart_ingest_with_context(
853
+ self,
854
+ content: dict,
855
+ doc_type: str,
856
+ context: dict = {},
857
+ field_policy: dict = {},
858
+ sensitive_fields: list = [],
859
+ tokenize_all: bool = False,
860
+ title: str = None,
861
+ ) -> dict:
862
+ """
863
+ Full pipeline ingest — policy + context + k-anonymity protection.
864
+
865
+ Usage:
866
+ client.smart_ingest_with_context(
867
+ content = {"name": "John", "diagnosis": "diabetes", "age": "67"},
868
+ doc_type = "patient_record",
869
+ context = {"industry": "healthcare", "data_scope": "patient_records"},
870
+ )
871
+ """
872
+ return self._post("/rag/ingest", {
873
+ "content": content,
874
+ "doc_type": doc_type,
875
+ "agent_id": self.agent_id,
876
+ "title": title,
877
+ "context": context,
878
+ "field_policy": field_policy,
879
+ "sensitive_fields": sensitive_fields,
880
+ "tokenize_all": tokenize_all,
881
+ })
882
+
883
+ def info(self) -> dict:
884
+ return {
885
+ "mode": self.mode,
886
+ "base_url": self.base_url,
887
+ "agent_id": self.agent_id,
888
+ "zero_log": self.zero_log,
889
+ }
890
+
891
+ def close(self):
892
+ if self._sync_client: self._sync_client.close()
893
+
894
+ async def aclose(self):
895
+ if self._async_client: await self._async_client.aclose()
896
+
897
+ def __enter__(self): return self
898
+ def __exit__(self, *_): self.close()
899
+ async def __aenter__(self): return self
900
+ async def __aexit__(self, *_): await self.aclose()
901
+
902
+ def __repr__(self):
903
+ return f"CodeAstraClient(mode={self.mode!r}, agent_id={self.agent_id!r}, zero_log={self.zero_log})"
@@ -145,16 +145,29 @@ class BlindAgentMiddleware:
145
145
  def __init__(
146
146
  self,
147
147
  agent: Any,
148
- api_key: str,
149
- agent_id: str = "sdk-agent",
150
- base_url: str = "https://app.codeastra.dev",
151
- classification: str = "pii",
148
+ api_key: str = None,
149
+ agent_id: str = "sdk-agent",
150
+ base_url: str = None,
151
+ classification: str = "pii",
152
152
  pipeline_id: Optional[str] = None,
153
153
  on_tokenize: Optional[Callable] = None,
154
154
  verbose: bool = False,
155
+ mode: str = "auto", # auto | cloud | onprem | hybrid
156
+ zero_log: bool = False,
157
+ executor_url: str = None,
158
+ onprem_dir: str = "./codeastra-onprem",
155
159
  ):
156
160
  self._agent = agent
157
- self._client = CodeAstraClient(api_key, base_url, agent_id)
161
+ self._client = CodeAstraClient(
162
+ api_key = api_key,
163
+ base_url = base_url,
164
+ agent_id = agent_id,
165
+ mode = mode,
166
+ zero_log = zero_log,
167
+ executor_url = executor_url,
168
+ onprem_dir = onprem_dir,
169
+ verbose = verbose,
170
+ )
158
171
  self._classification = classification
159
172
  self._pipeline_id = pipeline_id
160
173
  self._on_tokenize = on_tokenize
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeastra
3
- Version: 1.0.1
3
+ Version: 1.5.0
4
4
  Summary: Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data.
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://codeastra.dev
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codeastra"
7
- version = "1.0.1"
7
+ version = "1.5.0"
8
8
  description = "Blind Agent SDK — drop-in middleware for LangChain, CrewAI, AutoGPT. Two lines makes any agent blind to real data."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,239 +0,0 @@
1
- """
2
- CodeAstraClient — low-level async/sync HTTP client for the Codeastra API.
3
- All SDK components use this. Customers can also use it directly.
4
- """
5
- from __future__ import annotations
6
-
7
- import re
8
- import json
9
- import asyncio
10
- import threading
11
- from typing import Any, Optional
12
-
13
- import httpx
14
-
15
- TOKEN_RE = re.compile(r'\[CVT:[A-Z]+:[A-F0-9]+\]')
16
-
17
- _DEFAULT_BASE = "https://app.codeastra.dev"
18
-
19
-
20
- class CodeAstraClient:
21
- """
22
- Thin wrapper around the Codeastra REST API.
23
-
24
- Usage:
25
- client = CodeAstraClient(api_key="sk-guard-xxx")
26
- tokens = client.tokenize({"name": "John Smith", "ssn": "123-45-6789"})
27
- # → {"name": "[CVT:NAME:A1B2]", "ssn": "[CVT:SSN:C3D4]"}
28
- """
29
-
30
- def __init__(
31
- self,
32
- api_key: str,
33
- base_url: str = _DEFAULT_BASE,
34
- agent_id: str = "sdk-agent",
35
- timeout: float = 10.0,
36
- executor_url: str = None, # optional: bring your own executor
37
- ):
38
- self.api_key = api_key
39
- self.base_url = base_url.rstrip("/")
40
- self.agent_id = agent_id
41
- self._headers = {
42
- "X-API-Key": api_key,
43
- "Content-Type": "application/json",
44
- }
45
- self._timeout = timeout
46
- self._executor_url = executor_url
47
- # Sync client (lazy)
48
- self._sync_client: Optional[httpx.Client] = None
49
- # Async client (lazy)
50
- self._async_client: Optional[httpx.AsyncClient] = None
51
- # Auto-register executor if provided
52
- if executor_url:
53
- try:
54
- self._post("/agent/executor", {
55
- "execution_url": executor_url,
56
- "action_type": "*",
57
- "agent_id": agent_id,
58
- "description": f"Auto-registered by SDK agent {agent_id}",
59
- })
60
- except Exception:
61
- pass # non-fatal — zero-config mode still works
62
-
63
- # ── sync helpers ──────────────────────────────────────────────────────────
64
-
65
- def _get_sync(self) -> httpx.Client:
66
- if self._sync_client is None or self._sync_client.is_closed:
67
- self._sync_client = httpx.Client(
68
- headers=self._headers, timeout=self._timeout)
69
- return self._sync_client
70
-
71
- def _post(self, path: str, body: dict) -> dict:
72
- r = self._get_sync().post(f"{self.base_url}{path}", json=body)
73
- r.raise_for_status()
74
- return r.json()
75
-
76
- def _get(self, path: str, params: dict = None) -> dict:
77
- r = self._get_sync().get(f"{self.base_url}{path}", params=params or {})
78
- r.raise_for_status()
79
- return r.json()
80
-
81
- # ── async helpers ─────────────────────────────────────────────────────────
82
-
83
- def _get_async(self) -> httpx.AsyncClient:
84
- if self._async_client is None or self._async_client.is_closed:
85
- self._async_client = httpx.AsyncClient(
86
- headers=self._headers, timeout=self._timeout)
87
- return self._async_client
88
-
89
- async def _apost(self, path: str, body: dict) -> dict:
90
- r = await self._get_async().post(f"{self.base_url}{path}", json=body)
91
- r.raise_for_status()
92
- return r.json()
93
-
94
- async def _aget(self, path: str, params: dict = None) -> dict:
95
- r = await self._get_async().get(
96
- f"{self.base_url}{path}", params=params or {})
97
- r.raise_for_status()
98
- return r.json()
99
-
100
- # ── public sync API ───────────────────────────────────────────────────────
101
-
102
- def tokenize(
103
- self,
104
- data: dict,
105
- classification: str = "pii",
106
- ttl_hours: int = 24,
107
- ) -> dict:
108
- """
109
- Store real data in vault. Returns token map.
110
- {"name": "John"} → {"name": "[CVT:NAME:A1B2]"}
111
- """
112
- resp = self._post("/vault/store", {
113
- "data": data,
114
- "agent_id": self.agent_id,
115
- "classification": classification,
116
- "ttl_hours": ttl_hours,
117
- })
118
- return resp.get("tokens", {})
119
-
120
- def execute(
121
- self,
122
- action_type: str,
123
- params: dict,
124
- pipeline_id: str = None,
125
- ) -> dict:
126
- """
127
- Submit an action with token params.
128
- Codeastra resolves tokens → real values → POSTs to your executor.
129
- Agent never sees real values.
130
- """
131
- body = {
132
- "agent_id": self.agent_id,
133
- "action_type": action_type,
134
- "params": params,
135
- }
136
- if pipeline_id:
137
- body["pipeline_id"] = pipeline_id
138
- return self._post("/pipeline/action", body)
139
- return self._post("/agent/action", body)
140
-
141
- def grant(
142
- self,
143
- receiving_agent: str,
144
- tokens: list[str],
145
- allowed_actions: list[str] = [],
146
- pipeline_id: str = None,
147
- purpose: str = None,
148
- ) -> dict:
149
- """Grant tokens to another agent in a pipeline."""
150
- return self._post("/vault/grant", {
151
- "granting_agent": self.agent_id,
152
- "receiving_agent": receiving_agent,
153
- "tokens": tokens,
154
- "allowed_actions": allowed_actions,
155
- "pipeline_id": pipeline_id,
156
- "purpose": purpose,
157
- })
158
-
159
- def audit(self, pipeline_id: str = None, token: str = None) -> list:
160
- """Get chain of custody for a pipeline or token."""
161
- params = {}
162
- if pipeline_id: params["pipeline_id"] = pipeline_id
163
- if token: params["token"] = token
164
- return self._get("/pipeline/audit", params).get("audit", [])
165
-
166
- # ── public async API ──────────────────────────────────────────────────────
167
-
168
- async def atokenize(
169
- self,
170
- data: dict,
171
- classification: str = "pii",
172
- ttl_hours: int = 24,
173
- ) -> dict:
174
- resp = await self._apost("/vault/store", {
175
- "data": data,
176
- "agent_id": self.agent_id,
177
- "classification": classification,
178
- "ttl_hours": ttl_hours,
179
- })
180
- return resp.get("tokens", {})
181
-
182
- async def aexecute(
183
- self,
184
- action_type: str,
185
- params: dict,
186
- pipeline_id: str = None,
187
- ) -> dict:
188
- body = {
189
- "agent_id": self.agent_id,
190
- "action_type": action_type,
191
- "params": params,
192
- }
193
- if pipeline_id:
194
- body["pipeline_id"] = pipeline_id
195
- return await self._apost("/pipeline/action", body)
196
- return await self._apost("/agent/action", body)
197
-
198
- async def agrant(
199
- self,
200
- receiving_agent: str,
201
- tokens: list[str],
202
- allowed_actions: list[str] = [],
203
- pipeline_id: str = None,
204
- ) -> dict:
205
- return await self._apost("/vault/grant", {
206
- "granting_agent": self.agent_id,
207
- "receiving_agent": receiving_agent,
208
- "tokens": tokens,
209
- "allowed_actions": allowed_actions,
210
- "pipeline_id": pipeline_id,
211
- })
212
-
213
- # ── utility ───────────────────────────────────────────────────────────────
214
-
215
- @staticmethod
216
- def extract_tokens(obj: Any) -> list[str]:
217
- """Extract all vault tokens from any string/dict/list."""
218
- text = json.dumps(obj) if not isinstance(obj, str) else obj
219
- return TOKEN_RE.findall(text)
220
-
221
- @staticmethod
222
- def contains_token(val: Any) -> bool:
223
- text = json.dumps(val) if not isinstance(val, str) else str(val)
224
- return bool(TOKEN_RE.search(text))
225
-
226
- @staticmethod
227
- def is_token(val: str) -> bool:
228
- return bool(TOKEN_RE.fullmatch(val.strip()))
229
-
230
- def close(self):
231
- if self._sync_client: self._sync_client.close()
232
-
233
- async def aclose(self):
234
- if self._async_client: await self._async_client.aclose()
235
-
236
- def __enter__(self): return self
237
- def __exit__(self, *_): self.close()
238
- async def __aenter__(self): return self
239
- async def __aexit__(self, *_): await self.aclose()
File without changes