proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Cloud Storage exporter for Proxilion audit logs.
|
|
3
|
+
|
|
4
|
+
Supports exporting audit logs to:
|
|
5
|
+
- Google Cloud Storage
|
|
6
|
+
- BigQuery (for analytics)
|
|
7
|
+
|
|
8
|
+
Uses google-cloud-storage if available, falls back to urllib with OAuth2.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
import urllib.error
|
|
17
|
+
import urllib.parse
|
|
18
|
+
import urllib.request
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from proxilion.audit.events import AuditEventV2
|
|
23
|
+
from proxilion.audit.exporters.cloud_base import (
|
|
24
|
+
BaseCloudExporter,
|
|
25
|
+
CloudExporterConfig,
|
|
26
|
+
ExportBatch,
|
|
27
|
+
ExportResult,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Check for google-cloud-storage availability
|
|
33
|
+
try:
|
|
34
|
+
from google.cloud import storage as gcs
|
|
35
|
+
from google.oauth2 import service_account
|
|
36
|
+
HAS_GCS = True
|
|
37
|
+
except ImportError:
|
|
38
|
+
HAS_GCS = False
|
|
39
|
+
|
|
40
|
+
# Check for google-cloud-bigquery availability
|
|
41
|
+
try:
|
|
42
|
+
from google.cloud import bigquery
|
|
43
|
+
HAS_BIGQUERY = True
|
|
44
|
+
except ImportError:
|
|
45
|
+
HAS_BIGQUERY = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class GCSExporter(BaseCloudExporter):
|
|
49
|
+
"""
|
|
50
|
+
Export audit logs to Google Cloud Storage.
|
|
51
|
+
|
|
52
|
+
Uses google-cloud-storage if installed, otherwise falls back to
|
|
53
|
+
urllib with Application Default Credentials or service account.
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> config = CloudExporterConfig(
|
|
57
|
+
... provider="gcp",
|
|
58
|
+
... bucket_name="my-audit-logs",
|
|
59
|
+
... prefix="proxilion/prod/",
|
|
60
|
+
... )
|
|
61
|
+
>>> exporter = GCSExporter(config)
|
|
62
|
+
>>> result = exporter.export(events)
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, config: CloudExporterConfig) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Initialize the GCS exporter.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
config: Exporter configuration.
|
|
71
|
+
"""
|
|
72
|
+
super().__init__(config)
|
|
73
|
+
self._client = None
|
|
74
|
+
self._bucket = None
|
|
75
|
+
self._access_token: str | None = None
|
|
76
|
+
self._token_expiry: float = 0
|
|
77
|
+
self._initialize_client()
|
|
78
|
+
|
|
79
|
+
def _initialize_client(self) -> None:
|
|
80
|
+
"""Initialize the GCS client."""
|
|
81
|
+
if HAS_GCS:
|
|
82
|
+
self._init_gcs_client()
|
|
83
|
+
else:
|
|
84
|
+
self._init_urllib_client()
|
|
85
|
+
|
|
86
|
+
def _init_gcs_client(self) -> None:
|
|
87
|
+
"""Initialize google-cloud-storage client."""
|
|
88
|
+
if self.config.credentials_path:
|
|
89
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
90
|
+
self.config.credentials_path,
|
|
91
|
+
scopes=["https://www.googleapis.com/auth/devstorage.read_write"],
|
|
92
|
+
)
|
|
93
|
+
self._client = gcs.Client(credentials=credentials)
|
|
94
|
+
else:
|
|
95
|
+
# Use Application Default Credentials
|
|
96
|
+
self._client = gcs.Client()
|
|
97
|
+
|
|
98
|
+
self._bucket = self._client.bucket(self.config.bucket_name)
|
|
99
|
+
|
|
100
|
+
def _init_urllib_client(self) -> None:
|
|
101
|
+
"""Initialize urllib-based client."""
|
|
102
|
+
# For urllib fallback, we need credentials
|
|
103
|
+
if self.config.credentials_path:
|
|
104
|
+
self._load_service_account()
|
|
105
|
+
|
|
106
|
+
def _load_service_account(self) -> None:
|
|
107
|
+
"""Load service account credentials from file."""
|
|
108
|
+
try:
|
|
109
|
+
with open(self.config.credentials_path) as f:
|
|
110
|
+
self._service_account = json.load(f)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.warning(f"Failed to load service account: {e}")
|
|
113
|
+
self._service_account = None
|
|
114
|
+
|
|
115
|
+
def _get_access_token(self) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Get OAuth2 access token.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Access token string.
|
|
121
|
+
"""
|
|
122
|
+
# Check if we have a cached valid token
|
|
123
|
+
if self._access_token and time.time() < self._token_expiry:
|
|
124
|
+
return self._access_token
|
|
125
|
+
|
|
126
|
+
if not hasattr(self, "_service_account") or not self._service_account:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
"GCP credentials not configured. Set credentials_path or use "
|
|
129
|
+
"Application Default Credentials with google-cloud-storage."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Create JWT for service account
|
|
133
|
+
from base64 import urlsafe_b64encode
|
|
134
|
+
|
|
135
|
+
now = int(time.time())
|
|
136
|
+
expiry = now + 3600 # 1 hour
|
|
137
|
+
|
|
138
|
+
header = {"alg": "RS256", "typ": "JWT"}
|
|
139
|
+
payload = {
|
|
140
|
+
"iss": self._service_account["client_email"],
|
|
141
|
+
"scope": "https://www.googleapis.com/auth/devstorage.read_write",
|
|
142
|
+
"aud": "https://oauth2.googleapis.com/token",
|
|
143
|
+
"iat": now,
|
|
144
|
+
"exp": expiry,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# Encode header and payload
|
|
148
|
+
header_b64 = urlsafe_b64encode(json.dumps(header).encode()).rstrip(b"=")
|
|
149
|
+
payload_b64 = urlsafe_b64encode(json.dumps(payload).encode()).rstrip(b"=")
|
|
150
|
+
signing_input = header_b64 + b"." + payload_b64
|
|
151
|
+
|
|
152
|
+
# Sign with RSA-SHA256
|
|
153
|
+
try:
|
|
154
|
+
from cryptography.hazmat.primitives import hashes, serialization
|
|
155
|
+
from cryptography.hazmat.primitives.asymmetric import padding
|
|
156
|
+
|
|
157
|
+
private_key = serialization.load_pem_private_key(
|
|
158
|
+
self._service_account["private_key"].encode(),
|
|
159
|
+
password=None,
|
|
160
|
+
)
|
|
161
|
+
signature = private_key.sign(
|
|
162
|
+
signing_input,
|
|
163
|
+
padding.PKCS1v15(),
|
|
164
|
+
hashes.SHA256(),
|
|
165
|
+
)
|
|
166
|
+
signature_b64 = urlsafe_b64encode(signature).rstrip(b"=")
|
|
167
|
+
except ImportError as e:
|
|
168
|
+
raise ImportError(
|
|
169
|
+
"cryptography package required for service account auth. "
|
|
170
|
+
"Install with: pip install cryptography"
|
|
171
|
+
) from e
|
|
172
|
+
|
|
173
|
+
jwt = signing_input + b"." + signature_b64
|
|
174
|
+
|
|
175
|
+
# Exchange JWT for access token
|
|
176
|
+
token_url = "https://oauth2.googleapis.com/token"
|
|
177
|
+
data = urllib.parse.urlencode({
|
|
178
|
+
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
|
|
179
|
+
"assertion": jwt.decode(),
|
|
180
|
+
}).encode()
|
|
181
|
+
|
|
182
|
+
request = urllib.request.Request(token_url, data=data)
|
|
183
|
+
request.add_header("Content-Type", "application/x-www-form-urlencoded")
|
|
184
|
+
|
|
185
|
+
with urllib.request.urlopen(request, timeout=30) as response:
|
|
186
|
+
token_data = json.loads(response.read())
|
|
187
|
+
self._access_token = token_data["access_token"]
|
|
188
|
+
self._token_expiry = time.time() + token_data.get("expires_in", 3600) - 60
|
|
189
|
+
|
|
190
|
+
return self._access_token
|
|
191
|
+
|
|
192
|
+
def export_batch(self, batch: ExportBatch) -> ExportResult:
|
|
193
|
+
"""
|
|
194
|
+
Export a batch to GCS.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
batch: The batch to export.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
ExportResult with success/failure information.
|
|
201
|
+
"""
|
|
202
|
+
start_time = time.time()
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# Prepare data
|
|
206
|
+
data = batch.to_bytes(self.config.compression)
|
|
207
|
+
key = self.generate_key(batch.created_at, batch.batch_id)
|
|
208
|
+
checksum = self.compute_checksum(data)
|
|
209
|
+
|
|
210
|
+
# Upload with retry
|
|
211
|
+
self.with_retry(self._upload_object, key, data)
|
|
212
|
+
|
|
213
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
214
|
+
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Exported {batch.event_count} events to gs://{self.config.bucket_name}/{key}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return ExportResult(
|
|
220
|
+
success=True,
|
|
221
|
+
events_exported=batch.event_count,
|
|
222
|
+
batch_id=batch.batch_id,
|
|
223
|
+
destination=f"gs://{self.config.bucket_name}/{key}",
|
|
224
|
+
duration_ms=duration_ms,
|
|
225
|
+
bytes_written=len(data),
|
|
226
|
+
checksum=checksum,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
except Exception as e:
|
|
230
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
231
|
+
logger.error(f"Failed to export batch {batch.batch_id}: {e}")
|
|
232
|
+
|
|
233
|
+
return ExportResult(
|
|
234
|
+
success=False,
|
|
235
|
+
events_exported=0,
|
|
236
|
+
batch_id=batch.batch_id,
|
|
237
|
+
error=str(e),
|
|
238
|
+
duration_ms=duration_ms,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def _upload_object(self, key: str, data: bytes) -> None:
|
|
242
|
+
"""
|
|
243
|
+
Upload an object to GCS.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
key: Object key.
|
|
247
|
+
data: Object data.
|
|
248
|
+
"""
|
|
249
|
+
if HAS_GCS:
|
|
250
|
+
self._upload_gcs(key, data)
|
|
251
|
+
else:
|
|
252
|
+
self._upload_urllib(key, data)
|
|
253
|
+
|
|
254
|
+
def _upload_gcs(self, key: str, data: bytes) -> None:
|
|
255
|
+
"""Upload using google-cloud-storage."""
|
|
256
|
+
blob = self._bucket.blob(key)
|
|
257
|
+
blob.upload_from_string(
|
|
258
|
+
data,
|
|
259
|
+
content_type=self.get_content_type(),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _upload_urllib(self, key: str, data: bytes) -> None:
|
|
263
|
+
"""Upload using urllib with OAuth2."""
|
|
264
|
+
access_token = self._get_access_token()
|
|
265
|
+
|
|
266
|
+
# Build upload URL
|
|
267
|
+
url = (
|
|
268
|
+
f"https://storage.googleapis.com/upload/storage/v1/b/"
|
|
269
|
+
f"{urllib.parse.quote(self.config.bucket_name)}/o"
|
|
270
|
+
f"?uploadType=media&name={urllib.parse.quote(key)}"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
headers = {
|
|
274
|
+
"Authorization": f"Bearer {access_token}",
|
|
275
|
+
"Content-Type": self.get_content_type(),
|
|
276
|
+
"Content-Length": str(len(data)),
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
request = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
with urllib.request.urlopen(
|
|
283
|
+
request, timeout=self.config.read_timeout
|
|
284
|
+
) as response:
|
|
285
|
+
if response.status not in (200, 201):
|
|
286
|
+
raise ValueError(f"GCS upload failed with status {response.status}")
|
|
287
|
+
except urllib.error.HTTPError as e:
|
|
288
|
+
raise ValueError(f"GCS upload failed: {e.code} {e.reason}") from e
|
|
289
|
+
|
|
290
|
+
def health_check(self) -> bool:
|
|
291
|
+
"""
|
|
292
|
+
Check if we can connect to GCS.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
True if healthy.
|
|
296
|
+
"""
|
|
297
|
+
try:
|
|
298
|
+
if HAS_GCS:
|
|
299
|
+
self._bucket.reload()
|
|
300
|
+
else:
|
|
301
|
+
# Try to get bucket metadata
|
|
302
|
+
access_token = self._get_access_token()
|
|
303
|
+
url = (
|
|
304
|
+
f"https://storage.googleapis.com/storage/v1/b/"
|
|
305
|
+
f"{urllib.parse.quote(self.config.bucket_name)}"
|
|
306
|
+
)
|
|
307
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
308
|
+
request = urllib.request.Request(url, headers=headers)
|
|
309
|
+
|
|
310
|
+
with urllib.request.urlopen(request, timeout=10) as response:
|
|
311
|
+
return response.status == 200
|
|
312
|
+
|
|
313
|
+
return True
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.warning(f"GCS health check failed: {e}")
|
|
316
|
+
return False
|
|
317
|
+
|
|
318
|
+
def list_exports(
|
|
319
|
+
self,
|
|
320
|
+
start_date: datetime | None = None,
|
|
321
|
+
end_date: datetime | None = None,
|
|
322
|
+
max_results: int = 1000,
|
|
323
|
+
) -> list[str]:
|
|
324
|
+
"""
|
|
325
|
+
List exported files in the bucket.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
start_date: Filter to exports after this date.
|
|
329
|
+
end_date: Filter to exports before this date.
|
|
330
|
+
max_results: Maximum number of results to return.
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
List of object names.
|
|
334
|
+
"""
|
|
335
|
+
if not HAS_GCS:
|
|
336
|
+
raise NotImplementedError("list_exports requires google-cloud-storage")
|
|
337
|
+
|
|
338
|
+
prefix = self.config.prefix
|
|
339
|
+
if start_date:
|
|
340
|
+
prefix += f"{start_date.year:04d}/"
|
|
341
|
+
|
|
342
|
+
blobs = self._bucket.list_blobs(prefix=prefix, max_results=max_results)
|
|
343
|
+
|
|
344
|
+
names = []
|
|
345
|
+
for blob in blobs:
|
|
346
|
+
# Filter by date if needed
|
|
347
|
+
if end_date:
|
|
348
|
+
parts = blob.name.split("/")
|
|
349
|
+
if len(parts) >= 4:
|
|
350
|
+
try:
|
|
351
|
+
year = int(parts[-4])
|
|
352
|
+
month = int(parts[-3])
|
|
353
|
+
day = int(parts[-2])
|
|
354
|
+
blob_date = datetime(year, month, day, tzinfo=timezone.utc)
|
|
355
|
+
if blob_date > end_date:
|
|
356
|
+
continue
|
|
357
|
+
except (ValueError, IndexError):
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
names.append(blob.name)
|
|
361
|
+
|
|
362
|
+
if len(names) >= max_results:
|
|
363
|
+
break
|
|
364
|
+
|
|
365
|
+
return names
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class BigQueryExporter(BaseCloudExporter):
|
|
369
|
+
"""
|
|
370
|
+
Export audit logs directly to BigQuery.
|
|
371
|
+
|
|
372
|
+
Provides streaming insert for real-time analytics.
|
|
373
|
+
Requires google-cloud-bigquery package.
|
|
374
|
+
|
|
375
|
+
Example:
|
|
376
|
+
>>> config = CloudExporterConfig(
|
|
377
|
+
... provider="gcp",
|
|
378
|
+
... bucket_name="my-project.my_dataset.audit_logs",
|
|
379
|
+
... )
|
|
380
|
+
>>> exporter = BigQueryExporter(config)
|
|
381
|
+
>>> result = exporter.export(events)
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def __init__(
|
|
385
|
+
self,
|
|
386
|
+
config: CloudExporterConfig,
|
|
387
|
+
project_id: str | None = None,
|
|
388
|
+
dataset_id: str | None = None,
|
|
389
|
+
table_id: str | None = None,
|
|
390
|
+
create_table: bool = True,
|
|
391
|
+
) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Initialize the BigQuery exporter.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
config: Exporter configuration.
|
|
397
|
+
project_id: GCP project ID.
|
|
398
|
+
dataset_id: BigQuery dataset ID.
|
|
399
|
+
table_id: BigQuery table ID.
|
|
400
|
+
create_table: Create table if it doesn't exist.
|
|
401
|
+
"""
|
|
402
|
+
super().__init__(config)
|
|
403
|
+
|
|
404
|
+
if not HAS_BIGQUERY:
|
|
405
|
+
raise ImportError(
|
|
406
|
+
"google-cloud-bigquery required for BigQueryExporter. "
|
|
407
|
+
"Install with: pip install google-cloud-bigquery"
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# Parse table reference from bucket_name if in format project.dataset.table
|
|
411
|
+
if "." in config.bucket_name:
|
|
412
|
+
parts = config.bucket_name.split(".")
|
|
413
|
+
self.project_id = parts[0] if len(parts) > 0 else project_id
|
|
414
|
+
self.dataset_id = parts[1] if len(parts) > 1 else dataset_id
|
|
415
|
+
self.table_id = parts[2] if len(parts) > 2 else table_id
|
|
416
|
+
else:
|
|
417
|
+
self.project_id = project_id
|
|
418
|
+
self.dataset_id = dataset_id
|
|
419
|
+
self.table_id = table_id or config.bucket_name
|
|
420
|
+
|
|
421
|
+
self.create_table = create_table
|
|
422
|
+
self._client = None
|
|
423
|
+
self._table = None
|
|
424
|
+
self._initialize_client()
|
|
425
|
+
|
|
426
|
+
def _initialize_client(self) -> None:
|
|
427
|
+
"""Initialize BigQuery client."""
|
|
428
|
+
if self.config.credentials_path:
|
|
429
|
+
from google.oauth2 import service_account
|
|
430
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
431
|
+
self.config.credentials_path,
|
|
432
|
+
)
|
|
433
|
+
self._client = bigquery.Client(
|
|
434
|
+
credentials=credentials,
|
|
435
|
+
project=self.project_id,
|
|
436
|
+
)
|
|
437
|
+
else:
|
|
438
|
+
self._client = bigquery.Client(project=self.project_id)
|
|
439
|
+
|
|
440
|
+
# Get or create table
|
|
441
|
+
table_ref = f"{self.project_id}.{self.dataset_id}.{self.table_id}"
|
|
442
|
+
|
|
443
|
+
try:
|
|
444
|
+
self._table = self._client.get_table(table_ref)
|
|
445
|
+
except Exception:
|
|
446
|
+
if self.create_table:
|
|
447
|
+
self._create_table(table_ref)
|
|
448
|
+
else:
|
|
449
|
+
raise
|
|
450
|
+
|
|
451
|
+
def _create_table(self, table_ref: str) -> None:
|
|
452
|
+
"""Create the BigQuery table."""
|
|
453
|
+
schema = [
|
|
454
|
+
bigquery.SchemaField("event_id", "STRING", mode="REQUIRED"),
|
|
455
|
+
bigquery.SchemaField("timestamp", "TIMESTAMP", mode="REQUIRED"),
|
|
456
|
+
bigquery.SchemaField("sequence_number", "INTEGER"),
|
|
457
|
+
bigquery.SchemaField("event_type", "STRING"),
|
|
458
|
+
bigquery.SchemaField("user_id", "STRING"),
|
|
459
|
+
bigquery.SchemaField("user_roles", "STRING", mode="REPEATED"),
|
|
460
|
+
bigquery.SchemaField("session_id", "STRING"),
|
|
461
|
+
bigquery.SchemaField("agent_id", "STRING"),
|
|
462
|
+
bigquery.SchemaField("tool_name", "STRING"),
|
|
463
|
+
bigquery.SchemaField("tool_arguments", "JSON"),
|
|
464
|
+
bigquery.SchemaField("authorization_allowed", "BOOLEAN"),
|
|
465
|
+
bigquery.SchemaField("authorization_reason", "STRING"),
|
|
466
|
+
bigquery.SchemaField("policies_evaluated", "STRING", mode="REPEATED"),
|
|
467
|
+
bigquery.SchemaField("event_hash", "STRING"),
|
|
468
|
+
bigquery.SchemaField("previous_hash", "STRING"),
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
table = bigquery.Table(table_ref, schema=schema)
|
|
472
|
+
|
|
473
|
+
# Enable partitioning by timestamp
|
|
474
|
+
table.time_partitioning = bigquery.TimePartitioning(
|
|
475
|
+
type_=bigquery.TimePartitioningType.DAY,
|
|
476
|
+
field="timestamp",
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
self._table = self._client.create_table(table)
|
|
480
|
+
logger.info(f"Created BigQuery table: {table_ref}")
|
|
481
|
+
|
|
482
|
+
def _event_to_row(self, event: AuditEventV2) -> dict[str, Any]:
|
|
483
|
+
"""Convert an audit event to a BigQuery row."""
|
|
484
|
+
data = event.data
|
|
485
|
+
return {
|
|
486
|
+
"event_id": event.event_id,
|
|
487
|
+
"timestamp": event.timestamp.isoformat(),
|
|
488
|
+
"sequence_number": event.sequence_number,
|
|
489
|
+
"event_type": data.event_type.value,
|
|
490
|
+
"user_id": data.user_id,
|
|
491
|
+
"user_roles": data.user_roles or [],
|
|
492
|
+
"session_id": data.session_id,
|
|
493
|
+
"agent_id": data.agent_id,
|
|
494
|
+
"tool_name": data.tool_name,
|
|
495
|
+
"tool_arguments": json.dumps(data.tool_arguments) if data.tool_arguments else None,
|
|
496
|
+
"authorization_allowed": data.authorization_allowed,
|
|
497
|
+
"authorization_reason": data.authorization_reason,
|
|
498
|
+
"policies_evaluated": data.policies_evaluated or [],
|
|
499
|
+
"event_hash": event.event_hash,
|
|
500
|
+
"previous_hash": event.previous_hash,
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
def export_batch(self, batch: ExportBatch) -> ExportResult:
|
|
504
|
+
"""
|
|
505
|
+
Export a batch to BigQuery.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
batch: The batch to export.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
ExportResult with success/failure information.
|
|
512
|
+
"""
|
|
513
|
+
start_time = time.time()
|
|
514
|
+
|
|
515
|
+
try:
|
|
516
|
+
# Convert events to rows
|
|
517
|
+
rows = [self._event_to_row(event) for event in batch.events]
|
|
518
|
+
|
|
519
|
+
# Insert rows
|
|
520
|
+
errors = self._client.insert_rows_json(self._table, rows)
|
|
521
|
+
|
|
522
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
523
|
+
|
|
524
|
+
if errors:
|
|
525
|
+
error_msg = f"BigQuery insert errors: {errors}"
|
|
526
|
+
logger.error(error_msg)
|
|
527
|
+
return ExportResult(
|
|
528
|
+
success=False,
|
|
529
|
+
events_exported=0,
|
|
530
|
+
batch_id=batch.batch_id,
|
|
531
|
+
error=error_msg,
|
|
532
|
+
duration_ms=duration_ms,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
table_ref = f"{self.project_id}.{self.dataset_id}.{self.table_id}"
|
|
536
|
+
logger.info(f"Exported {batch.event_count} events to {table_ref}")
|
|
537
|
+
|
|
538
|
+
return ExportResult(
|
|
539
|
+
success=True,
|
|
540
|
+
events_exported=batch.event_count,
|
|
541
|
+
batch_id=batch.batch_id,
|
|
542
|
+
destination=table_ref,
|
|
543
|
+
duration_ms=duration_ms,
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
except Exception as e:
|
|
547
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
548
|
+
logger.error(f"Failed to export batch {batch.batch_id}: {e}")
|
|
549
|
+
|
|
550
|
+
return ExportResult(
|
|
551
|
+
success=False,
|
|
552
|
+
events_exported=0,
|
|
553
|
+
batch_id=batch.batch_id,
|
|
554
|
+
error=str(e),
|
|
555
|
+
duration_ms=duration_ms,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def health_check(self) -> bool:
|
|
559
|
+
"""
|
|
560
|
+
Check if we can connect to BigQuery.
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
True if healthy.
|
|
564
|
+
"""
|
|
565
|
+
try:
|
|
566
|
+
self._client.get_table(self._table)
|
|
567
|
+
return True
|
|
568
|
+
except Exception as e:
|
|
569
|
+
logger.warning(f"BigQuery health check failed: {e}")
|
|
570
|
+
return False
|