cortexhub 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ """Policy loader for local Cedar policy bundles.
2
+
3
+ Architectural invariants (from AGENTS.md):
4
+ - MUST NOT make decisions
5
+ - MUST NOT evaluate policies
6
+ - ONLY loads and validates policy files
7
+ """
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import structlog
14
+
15
+ from cortexhub.errors import PolicyLoadError
16
+
17
+ logger = structlog.get_logger(__name__)
18
+
19
+
20
+ class PolicyBundle:
21
+ """Represents a loaded policy bundle."""
22
+
23
+ def __init__(
24
+ self,
25
+ policies: str,
26
+ schema: dict[str, Any],
27
+ metadata: dict[str, Any],
28
+ ):
29
+ """Initialize policy bundle.
30
+
31
+ Args:
32
+ policies: Cedar policies as string
33
+ schema: Cedar schema as dict
34
+ metadata: Bundle metadata
35
+ """
36
+ self.policies = policies
37
+ self.schema = schema
38
+ self.metadata = metadata
39
+ self.version = metadata.get("version", "unknown")
40
+ self.default_behavior = metadata.get("default_behavior", "allow_and_log")
41
+
42
+
43
+ class PolicyLoader:
44
+ """Loads Cedar policy bundles from local filesystem.
45
+
46
+ Responsibilities:
47
+ - Load policy files
48
+ - Validate structure
49
+ - Parse metadata
50
+
51
+ NOT responsible for:
52
+ - Making decisions
53
+ - Evaluating policies
54
+ """
55
+
56
+ def __init__(self, policies_dir: str = "./policies"):
57
+ """Initialize policy loader.
58
+
59
+ Args:
60
+ policies_dir: Directory containing Cedar policy bundle
61
+ """
62
+ self.policies_dir = Path(policies_dir)
63
+ logger.info("Policy loader initialized", policies_dir=str(self.policies_dir))
64
+
65
+ def load(self) -> PolicyBundle:
66
+ """Load policy bundle from filesystem.
67
+
68
+ Returns:
69
+ PolicyBundle with policies, schema, and metadata
70
+
71
+ Raises:
72
+ PolicyLoadError: If bundle cannot be loaded or is invalid
73
+ """
74
+ try:
75
+ # Load Cedar policies
76
+ policies_file = self.policies_dir / "cedar" / "policies.cedar"
77
+ if not policies_file.exists():
78
+ raise PolicyLoadError(
79
+ f"Policies file not found: {policies_file}",
80
+ policies_dir=str(self.policies_dir),
81
+ )
82
+
83
+ with open(policies_file) as f:
84
+ policies = f.read()
85
+
86
+ # Load Cedar schema
87
+ schema_file = self.policies_dir / "cedar" / "schema.json"
88
+ if not schema_file.exists():
89
+ raise PolicyLoadError(
90
+ f"Schema file not found: {schema_file}",
91
+ policies_dir=str(self.policies_dir),
92
+ )
93
+
94
+ with open(schema_file) as f:
95
+ schema = json.load(f)
96
+
97
+ # Load metadata
98
+ metadata_file = self.policies_dir / "metadata.json"
99
+ if not metadata_file.exists():
100
+ raise PolicyLoadError(
101
+ f"Metadata file not found: {metadata_file}",
102
+ policies_dir=str(self.policies_dir),
103
+ )
104
+
105
+ with open(metadata_file) as f:
106
+ metadata = json.load(f)
107
+
108
+ bundle = PolicyBundle(policies=policies, schema=schema, metadata=metadata)
109
+
110
+ logger.info(
111
+ "Policy bundle loaded",
112
+ version=bundle.version,
113
+ default_behavior=bundle.default_behavior,
114
+ policies_size=len(policies),
115
+ )
116
+
117
+ return bundle
118
+
119
+ except PolicyLoadError:
120
+ raise
121
+ except Exception as e:
122
+ raise PolicyLoadError(
123
+ f"Failed to load policy bundle: {e}",
124
+ policies_dir=str(self.policies_dir),
125
+ ) from e
126
+
127
+ def validate_bundle(self, bundle: PolicyBundle) -> None:
128
+ """Validate policy bundle structure.
129
+
130
+ Args:
131
+ bundle: Policy bundle to validate
132
+
133
+ Raises:
134
+ PolicyLoadError: If bundle is invalid
135
+ """
136
+ # Basic validation
137
+ if not bundle.policies:
138
+ raise PolicyLoadError(
139
+ "Policy bundle contains no policies",
140
+ policies_dir=str(self.policies_dir),
141
+ )
142
+
143
+ if not bundle.schema:
144
+ raise PolicyLoadError(
145
+ "Policy bundle contains no schema",
146
+ policies_dir=str(self.policies_dir),
147
+ )
148
+
149
+ # Validate metadata
150
+ required_metadata_fields = ["version", "default_behavior"]
151
+ for field in required_metadata_fields:
152
+ if field not in bundle.metadata:
153
+ raise PolicyLoadError(
154
+ f"Metadata missing required field: {field}",
155
+ policies_dir=str(self.policies_dir),
156
+ )
157
+
158
+ logger.info("Policy bundle validated successfully")
@@ -0,0 +1,123 @@
1
+ """Core data models for authorization requests.
2
+
3
+ Critical: DO NOT simplify or flatten these models.
4
+ The structure is intentional and future-proof.
5
+ """
6
+
7
+ import uuid
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+
14
+ class Principal(BaseModel):
15
+ """Entity requesting to perform an action (e.g., an AI agent).
16
+
17
+ Examples:
18
+ Principal(type="Agent", id="customer_support")
19
+ Principal(type="User", id="user_12345")
20
+ Principal(type="Service", id="payment_processor")
21
+ """
22
+
23
+ type: str # e.g., "Agent", "User", "Service"
24
+ id: str # e.g., "customer_support", "user_12345"
25
+
26
+
27
+ class Action(BaseModel):
28
+ """Action being requested (e.g., tool invocation).
29
+
30
+ Examples:
31
+ Action(type="tool.invoke", name="send_email")
32
+ Action(type="llm.call", name="gpt-4")
33
+ Action(type="data.read", name="customer_records")
34
+ """
35
+
36
+ type: str # e.g., "tool.invoke", "llm.call", "data.read"
37
+ name: str # e.g., "send_email", "gpt-4", "customer_records"
38
+
39
+
40
+ class Resource(BaseModel):
41
+ """Resource being accessed (e.g., a tool, database, API).
42
+
43
+ Examples:
44
+ Resource(type="Tool", id="send_email")
45
+ Resource(type="Database", id="customer_db")
46
+ Resource(type="API", id="payment_gateway")
47
+ """
48
+
49
+ type: str # e.g., "Tool", "Database", "API"
50
+ id: str # e.g., "send_email", "customer_db"
51
+
52
+
53
+ class RuntimeContext(BaseModel):
54
+ """Runtime context about the framework and execution environment."""
55
+
56
+ framework: str # e.g., "langchain", "openai_agents"
57
+ framework_version: str | None = None
58
+ confidence: float | None = None # Optional confidence score from LLM
59
+
60
+
61
+ class Metadata(BaseModel):
62
+ """Tracing metadata for debugging and audit."""
63
+
64
+ trace_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
65
+ session_id: str | None = None
66
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
67
+
68
+
69
+ class AuthorizationRequest(BaseModel):
70
+ """Complete authorization request sent to the policy engine.
71
+
72
+ This is the heart of the system. DO NOT simplify or flatten it.
73
+
74
+ The context dict contains reserved top-level keys:
75
+ - args: Tool arguments (dict)
76
+ - runtime: RuntimeContext (framework info)
77
+ - metadata: Metadata (trace_id, session_id, timestamp)
78
+
79
+ Example:
80
+ AuthorizationRequest(
81
+ principal=Principal(type="Agent", id="customer_support"),
82
+ action=Action(type="tool.invoke", name="send_email"),
83
+ resource=Resource(type="Tool", id="send_email"),
84
+ context={
85
+ "args": {"to": "user@example.com", "body": "Hello"},
86
+ "runtime": {"framework": "langchain"},
87
+ "metadata": {"trace_id": "abc-123", "session_id": "sess-456"},
88
+ },
89
+ )
90
+ """
91
+
92
+ principal: Principal
93
+ action: Action
94
+ resource: Resource
95
+ context: dict[str, Any] # Reserved keys: args, runtime, metadata
96
+
97
+ @property
98
+ def trace_id(self) -> str:
99
+ """Extract trace ID for logging/debugging."""
100
+ metadata = self.context.get("metadata", {})
101
+ if isinstance(metadata, dict):
102
+ return metadata.get("trace_id", "unknown")
103
+ if isinstance(metadata, Metadata):
104
+ return metadata.trace_id
105
+ return "unknown"
106
+
107
+ @property
108
+ def args(self) -> dict[str, Any]:
109
+ """Extract args from context."""
110
+ return self.context.get("args", {})
111
+
112
+ def with_enriched_context(self, **kwargs) -> "AuthorizationRequest":
113
+ """Return a new request with additional context.
114
+
115
+ Does NOT mutate the original request.
116
+ """
117
+ new_context = {**self.context, **kwargs}
118
+ return AuthorizationRequest(
119
+ principal=self.principal,
120
+ action=self.action,
121
+ resource=self.resource,
122
+ context=new_context,
123
+ )
@@ -0,0 +1,183 @@
1
+ """Policy Sync - downloads policy bundles from CortexHub cloud.
2
+
3
+ This is the INBOUND flow: policies created by security/compliance team
4
+ in the cloud UI are synced to the SDK for local enforcement.
5
+
6
+ Key invariants:
7
+ - Policies are PULLED by SDK, never pushed
8
+ - SDK can operate 100% offline with local policies
9
+ - Cloud policies override local policies when connected
10
+ """
11
+
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import httpx
17
+ import structlog
18
+
19
+ logger = structlog.get_logger(__name__)
20
+
21
+
22
+ class PolicySync:
23
+ """Syncs policy bundles from CortexHub cloud.
24
+
25
+ Flow:
26
+ 1. Security team creates policies in cloud UI
27
+ 2. SDK periodically pulls policy bundle
28
+ 3. SDK enforces policies locally
29
+ 4. No customer data involved in sync
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ api_key: str | None,
35
+ backend_url: str,
36
+ local_policies_dir: str = "./policies",
37
+ auto_sync: bool = True,
38
+ sync_interval_seconds: int = 300, # 5 minutes
39
+ ):
40
+ """Initialize policy sync.
41
+
42
+ Args:
43
+ api_key: API key for authentication
44
+ backend_url: Backend API URL
45
+ local_policies_dir: Directory for local policy cache
46
+ auto_sync: Whether to auto-sync on init
47
+ sync_interval_seconds: How often to sync (if background sync enabled)
48
+ """
49
+ self.api_key = api_key
50
+ self.backend_url = backend_url.rstrip("/")
51
+ self.local_policies_dir = Path(local_policies_dir)
52
+ self.auto_sync = auto_sync
53
+ self.sync_interval_seconds = sync_interval_seconds
54
+
55
+ self._client: httpx.Client | None = None
56
+ self._last_sync_version: str | None = None
57
+
58
+ if api_key:
59
+ self._client = httpx.Client(
60
+ base_url=self.backend_url,
61
+ headers={"X-API-Key": api_key},
62
+ timeout=30.0,
63
+ )
64
+
65
+ # Ensure local directory exists
66
+ self.local_policies_dir.mkdir(parents=True, exist_ok=True)
67
+
68
+ # Auto-sync on init if enabled
69
+ if auto_sync and api_key:
70
+ self.sync()
71
+
72
+ def sync(self) -> bool:
73
+ """Sync policies from cloud.
74
+
75
+ Returns:
76
+ True if sync successful or no update needed
77
+ """
78
+ if not self.api_key or not self._client:
79
+ logger.debug("No API key - using local policies only")
80
+ return False
81
+
82
+ try:
83
+ # Check for updates first (lightweight)
84
+ current_version = self._get_remote_version()
85
+
86
+ if current_version == self._last_sync_version:
87
+ logger.debug("Policies up to date", version=current_version)
88
+ return True
89
+
90
+ # Download full bundle
91
+ bundle = self._download_bundle()
92
+
93
+ if bundle:
94
+ self._save_bundle(bundle)
95
+ self._last_sync_version = current_version
96
+ logger.info(
97
+ "Policies synced from cloud",
98
+ version=current_version,
99
+ policy_count=len(bundle.get("policies", [])),
100
+ )
101
+ return True
102
+
103
+ return False
104
+
105
+ except httpx.ConnectError:
106
+ logger.warning("Backend unreachable - using local policies")
107
+ return False
108
+ except Exception as e:
109
+ logger.error("Policy sync error", error=str(e))
110
+ return False
111
+
112
+ def _get_remote_version(self) -> str | None:
113
+ """Get current policy bundle version from cloud."""
114
+ if not self._client:
115
+ return None
116
+
117
+ try:
118
+ response = self._client.get("/policies/version")
119
+ if response.status_code == 200:
120
+ return response.json().get("version")
121
+ except Exception:
122
+ pass
123
+
124
+ return None
125
+
126
+ def _download_bundle(self) -> dict[str, Any] | None:
127
+ """Download full policy bundle from cloud."""
128
+ if not self._client:
129
+ return None
130
+
131
+ try:
132
+ response = self._client.get("/policies/bundle")
133
+ if response.status_code == 200:
134
+ return response.json()
135
+ except Exception as e:
136
+ logger.error("Failed to download policy bundle", error=str(e))
137
+
138
+ return None
139
+
140
+ def _save_bundle(self, bundle: dict[str, Any]) -> None:
141
+ """Save policy bundle to local cache."""
142
+ # Save Cedar policies
143
+ cedar_dir = self.local_policies_dir / "cedar"
144
+ cedar_dir.mkdir(parents=True, exist_ok=True)
145
+
146
+ # Save main policies file
147
+ policies_content = bundle.get("policies_cedar", "")
148
+ if policies_content:
149
+ (cedar_dir / "policies.cedar").write_text(policies_content)
150
+
151
+ # Save schema
152
+ schema = bundle.get("schema", {})
153
+ if schema:
154
+ (cedar_dir / "schema.json").write_text(json.dumps(schema, indent=2))
155
+
156
+ # Save metadata
157
+ metadata = {
158
+ "version": bundle.get("version"),
159
+ "synced_at": bundle.get("synced_at"),
160
+ "policy_count": len(bundle.get("policies", [])),
161
+ }
162
+ (self.local_policies_dir / "metadata.json").write_text(json.dumps(metadata, indent=2))
163
+
164
+ logger.debug(
165
+ "Policy bundle saved locally",
166
+ path=str(self.local_policies_dir),
167
+ )
168
+
169
+ def get_local_version(self) -> str | None:
170
+ """Get version of locally cached policies."""
171
+ metadata_file = self.local_policies_dir / "metadata.json"
172
+ if metadata_file.exists():
173
+ try:
174
+ metadata = json.loads(metadata_file.read_text())
175
+ return metadata.get("version")
176
+ except Exception:
177
+ pass
178
+ return None
179
+
180
+ def close(self):
181
+ """Close the HTTP client."""
182
+ if self._client:
183
+ self._client.close()
@@ -0,0 +1,40 @@
1
+ """CortexHub Telemetry - OpenTelemetry-based Governance Telemetry.
2
+
3
+ CortexHub uses OpenTelemetry (OTel) for telemetry, providing:
4
+ - Industry-standard OTLP protocol
5
+ - Proper batching, retry, and backpressure
6
+ - Interoperability with observability tools
7
+ - AI/LLM semantic conventions
8
+
9
+ Privacy Mode (default: enabled):
10
+ - Tool name, description, argument names (NOT values)
11
+ - PII/secret types detected (NOT actual data)
12
+ - Agent ID, framework
13
+
14
+ What is NEVER sent (when privacy=True):
15
+ - Raw argument values
16
+ - Prompts or LLM responses
17
+ - PII literals (emails, SSNs, names, etc.)
18
+ - Secrets (API keys, passwords, tokens)
19
+ - Customer data
20
+
21
+ Telemetry Modes:
22
+ - privacy=True (DEFAULT): Metadata only, production-safe
23
+ - privacy=False: Raw data included, for dev/staging testing only
24
+ """
25
+
26
+ # OTel-based telemetry
27
+ from cortexhub.telemetry.otel import (
28
+ OTelTelemetry,
29
+ init_telemetry,
30
+ get_telemetry,
31
+ shutdown_telemetry,
32
+ )
33
+
34
+ __all__ = [
35
+ # OTel-based telemetry
36
+ "OTelTelemetry",
37
+ "init_telemetry",
38
+ "get_telemetry",
39
+ "shutdown_telemetry",
40
+ ]