proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Azure Blob Storage exporter for Proxilion audit logs.
|
|
3
|
+
|
|
4
|
+
Supports exporting audit logs to:
|
|
5
|
+
- Azure Blob Storage
|
|
6
|
+
- Azure Data Lake Storage Gen2 (ADLS)
|
|
7
|
+
|
|
8
|
+
Uses azure-storage-blob if available, falls back to urllib with SAS/connection string.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import hmac
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import time
|
|
19
|
+
import urllib.error
|
|
20
|
+
import urllib.parse
|
|
21
|
+
import urllib.request
|
|
22
|
+
from base64 import b64decode, b64encode
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
|
|
25
|
+
from proxilion.audit.exporters.cloud_base import (
|
|
26
|
+
BaseCloudExporter,
|
|
27
|
+
CloudExporterConfig,
|
|
28
|
+
ExportBatch,
|
|
29
|
+
ExportResult,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# Check for azure-storage-blob availability
|
|
35
|
+
try:
|
|
36
|
+
from azure.identity import DefaultAzureCredential
|
|
37
|
+
from azure.storage.blob import BlobServiceClient
|
|
38
|
+
HAS_AZURE_STORAGE = True
|
|
39
|
+
except ImportError:
|
|
40
|
+
HAS_AZURE_STORAGE = False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AzureBlobExporter(BaseCloudExporter):
|
|
44
|
+
"""
|
|
45
|
+
Export audit logs to Azure Blob Storage.
|
|
46
|
+
|
|
47
|
+
Uses azure-storage-blob if installed, otherwise falls back to
|
|
48
|
+
urllib with connection string or SAS token.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> config = CloudExporterConfig(
|
|
52
|
+
... provider="azure",
|
|
53
|
+
... bucket_name="audit-logs", # Container name
|
|
54
|
+
... prefix="proxilion/prod/",
|
|
55
|
+
... )
|
|
56
|
+
>>> exporter = AzureBlobExporter(config)
|
|
57
|
+
>>> result = exporter.export(events)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, config: CloudExporterConfig) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Initialize the Azure Blob exporter.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
config: Exporter configuration.
|
|
66
|
+
"""
|
|
67
|
+
super().__init__(config)
|
|
68
|
+
self._client = None
|
|
69
|
+
self._container_client = None
|
|
70
|
+
self._connection_string: str | None = None
|
|
71
|
+
self._account_name: str | None = None
|
|
72
|
+
self._account_key: str | None = None
|
|
73
|
+
self._sas_token: str | None = None
|
|
74
|
+
self._initialize_client()
|
|
75
|
+
|
|
76
|
+
def _initialize_client(self) -> None:
|
|
77
|
+
"""Initialize the Azure Blob client."""
|
|
78
|
+
if HAS_AZURE_STORAGE:
|
|
79
|
+
self._init_azure_sdk_client()
|
|
80
|
+
else:
|
|
81
|
+
self._init_urllib_client()
|
|
82
|
+
|
|
83
|
+
def _init_azure_sdk_client(self) -> None:
|
|
84
|
+
"""Initialize azure-storage-blob client."""
|
|
85
|
+
# Try connection string first (from env or credentials file)
|
|
86
|
+
connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
|
|
87
|
+
|
|
88
|
+
if self.config.credentials_path:
|
|
89
|
+
creds = self._load_credentials_file(self.config.credentials_path)
|
|
90
|
+
connection_string = creds.get("connection_string", connection_string)
|
|
91
|
+
|
|
92
|
+
if connection_string:
|
|
93
|
+
self._client = BlobServiceClient.from_connection_string(connection_string)
|
|
94
|
+
elif self.config.use_instance_credentials:
|
|
95
|
+
# Use DefaultAzureCredential (managed identity, etc.)
|
|
96
|
+
account_url = self._get_account_url()
|
|
97
|
+
credential = DefaultAzureCredential()
|
|
98
|
+
self._client = BlobServiceClient(account_url, credential=credential)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
"Azure credentials not configured. Set AZURE_STORAGE_CONNECTION_STRING "
|
|
102
|
+
"or use credentials_path or use_instance_credentials."
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
self._container_client = self._client.get_container_client(
|
|
106
|
+
self.config.bucket_name
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def _init_urllib_client(self) -> None:
|
|
110
|
+
"""Initialize urllib-based client."""
|
|
111
|
+
# Load connection string or credentials
|
|
112
|
+
self._connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
|
|
113
|
+
|
|
114
|
+
if self.config.credentials_path:
|
|
115
|
+
creds = self._load_credentials_file(self.config.credentials_path)
|
|
116
|
+
self._connection_string = creds.get("connection_string", self._connection_string)
|
|
117
|
+
self._account_name = creds.get("account_name")
|
|
118
|
+
self._account_key = creds.get("account_key")
|
|
119
|
+
self._sas_token = creds.get("sas_token")
|
|
120
|
+
|
|
121
|
+
# Parse connection string if provided
|
|
122
|
+
if self._connection_string:
|
|
123
|
+
self._parse_connection_string()
|
|
124
|
+
|
|
125
|
+
def _load_credentials_file(self, path: str) -> dict[str, str]:
|
|
126
|
+
"""Load credentials from a JSON file."""
|
|
127
|
+
try:
|
|
128
|
+
with open(path) as f:
|
|
129
|
+
return json.load(f)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.warning(f"Failed to load credentials from {path}: {e}")
|
|
132
|
+
return {}
|
|
133
|
+
|
|
134
|
+
def _parse_connection_string(self) -> None:
|
|
135
|
+
"""Parse Azure connection string to extract credentials."""
|
|
136
|
+
if not self._connection_string:
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
parts = {}
|
|
140
|
+
for part in self._connection_string.split(";"):
|
|
141
|
+
if "=" in part:
|
|
142
|
+
key, value = part.split("=", 1)
|
|
143
|
+
parts[key] = value
|
|
144
|
+
|
|
145
|
+
self._account_name = parts.get("AccountName")
|
|
146
|
+
self._account_key = parts.get("AccountKey")
|
|
147
|
+
|
|
148
|
+
def _get_account_url(self) -> str:
|
|
149
|
+
"""Get the blob service account URL."""
|
|
150
|
+
# Try to get from endpoint URL config
|
|
151
|
+
if self.config.endpoint_url:
|
|
152
|
+
return self.config.endpoint_url
|
|
153
|
+
|
|
154
|
+
# Try to get account name from environment or credentials
|
|
155
|
+
account_name = self._account_name or os.environ.get("AZURE_STORAGE_ACCOUNT")
|
|
156
|
+
|
|
157
|
+
if not account_name:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Azure storage account name not configured. Set endpoint_url, "
|
|
160
|
+
"AZURE_STORAGE_ACCOUNT, or provide in credentials."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return f"https://{account_name}.blob.core.windows.net"
|
|
164
|
+
|
|
165
|
+
def export_batch(self, batch: ExportBatch) -> ExportResult:
|
|
166
|
+
"""
|
|
167
|
+
Export a batch to Azure Blob Storage.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
batch: The batch to export.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
ExportResult with success/failure information.
|
|
174
|
+
"""
|
|
175
|
+
start_time = time.time()
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
# Prepare data
|
|
179
|
+
data = batch.to_bytes(self.config.compression)
|
|
180
|
+
key = self.generate_key(batch.created_at, batch.batch_id)
|
|
181
|
+
checksum = self.compute_checksum(data)
|
|
182
|
+
|
|
183
|
+
# Upload with retry
|
|
184
|
+
self.with_retry(self._upload_blob, key, data)
|
|
185
|
+
|
|
186
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
187
|
+
|
|
188
|
+
account_url = self._get_account_url()
|
|
189
|
+
destination = f"{account_url}/{self.config.bucket_name}/{key}"
|
|
190
|
+
|
|
191
|
+
logger.info(
|
|
192
|
+
f"Exported {batch.event_count} events to {destination}"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
return ExportResult(
|
|
196
|
+
success=True,
|
|
197
|
+
events_exported=batch.event_count,
|
|
198
|
+
batch_id=batch.batch_id,
|
|
199
|
+
destination=destination,
|
|
200
|
+
duration_ms=duration_ms,
|
|
201
|
+
bytes_written=len(data),
|
|
202
|
+
checksum=checksum,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
except Exception as e:
|
|
206
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
207
|
+
logger.error(f"Failed to export batch {batch.batch_id}: {e}")
|
|
208
|
+
|
|
209
|
+
return ExportResult(
|
|
210
|
+
success=False,
|
|
211
|
+
events_exported=0,
|
|
212
|
+
batch_id=batch.batch_id,
|
|
213
|
+
error=str(e),
|
|
214
|
+
duration_ms=duration_ms,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def _upload_blob(self, blob_name: str, data: bytes) -> None:
|
|
218
|
+
"""
|
|
219
|
+
Upload a blob to Azure Storage.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
blob_name: Blob name (path within container).
|
|
223
|
+
data: Blob data.
|
|
224
|
+
"""
|
|
225
|
+
if HAS_AZURE_STORAGE:
|
|
226
|
+
self._upload_azure_sdk(blob_name, data)
|
|
227
|
+
else:
|
|
228
|
+
self._upload_urllib(blob_name, data)
|
|
229
|
+
|
|
230
|
+
def _upload_azure_sdk(self, blob_name: str, data: bytes) -> None:
|
|
231
|
+
"""Upload using azure-storage-blob."""
|
|
232
|
+
blob_client = self._container_client.get_blob_client(blob_name)
|
|
233
|
+
blob_client.upload_blob(
|
|
234
|
+
data,
|
|
235
|
+
overwrite=True,
|
|
236
|
+
content_settings={
|
|
237
|
+
"content_type": self.get_content_type(),
|
|
238
|
+
},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def _upload_urllib(self, blob_name: str, data: bytes) -> None:
|
|
242
|
+
"""Upload using urllib with Shared Key or SAS token."""
|
|
243
|
+
if not self._account_name:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
"Azure credentials not configured. Set AZURE_STORAGE_CONNECTION_STRING "
|
|
246
|
+
"or provide account credentials."
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Build URL
|
|
250
|
+
url = (
|
|
251
|
+
f"https://{self._account_name}.blob.core.windows.net/"
|
|
252
|
+
f"{self.config.bucket_name}/{blob_name}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if self._sas_token:
|
|
256
|
+
# Use SAS token
|
|
257
|
+
url = f"{url}?{self._sas_token}"
|
|
258
|
+
headers = self._get_blob_headers(data)
|
|
259
|
+
elif self._account_key:
|
|
260
|
+
# Use Shared Key authentication
|
|
261
|
+
headers = self._sign_request(blob_name, data)
|
|
262
|
+
else:
|
|
263
|
+
raise ValueError("No authentication method available")
|
|
264
|
+
|
|
265
|
+
request = urllib.request.Request(url, data=data, headers=headers, method="PUT")
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
with urllib.request.urlopen(
|
|
269
|
+
request, timeout=self.config.read_timeout
|
|
270
|
+
) as response:
|
|
271
|
+
if response.status not in (200, 201):
|
|
272
|
+
raise ValueError(f"Azure upload failed with status {response.status}")
|
|
273
|
+
except urllib.error.HTTPError as e:
|
|
274
|
+
raise ValueError(f"Azure upload failed: {e.code} {e.reason}") from e
|
|
275
|
+
|
|
276
|
+
def _get_blob_headers(self, data: bytes) -> dict[str, str]:
|
|
277
|
+
"""Get basic headers for blob upload."""
|
|
278
|
+
return {
|
|
279
|
+
"Content-Type": self.get_content_type(),
|
|
280
|
+
"Content-Length": str(len(data)),
|
|
281
|
+
"x-ms-blob-type": "BlockBlob",
|
|
282
|
+
"x-ms-version": "2020-10-02",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
def _sign_request(self, blob_name: str, data: bytes) -> dict[str, str]:
|
|
286
|
+
"""
|
|
287
|
+
Sign a request using Azure Shared Key.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
blob_name: Blob name.
|
|
291
|
+
data: Request data.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Dict of headers including Authorization.
|
|
295
|
+
"""
|
|
296
|
+
# Current time in RFC 1123 format
|
|
297
|
+
now = datetime.now(timezone.utc)
|
|
298
|
+
x_ms_date = now.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
|
299
|
+
x_ms_version = "2020-10-02"
|
|
300
|
+
|
|
301
|
+
# Build canonical headers
|
|
302
|
+
content_length = str(len(data))
|
|
303
|
+
content_type = self.get_content_type()
|
|
304
|
+
|
|
305
|
+
canonical_headers = (
|
|
306
|
+
f"x-ms-blob-type:BlockBlob\n"
|
|
307
|
+
f"x-ms-date:{x_ms_date}\n"
|
|
308
|
+
f"x-ms-version:{x_ms_version}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Build canonical resource
|
|
312
|
+
canonical_resource = f"/{self._account_name}/{self.config.bucket_name}/{blob_name}"
|
|
313
|
+
|
|
314
|
+
# Build string to sign
|
|
315
|
+
string_to_sign = (
|
|
316
|
+
f"PUT\n"
|
|
317
|
+
f"\n" # Content-Encoding
|
|
318
|
+
f"\n" # Content-Language
|
|
319
|
+
f"{content_length}\n"
|
|
320
|
+
f"\n" # Content-MD5
|
|
321
|
+
f"{content_type}\n"
|
|
322
|
+
f"\n" # Date
|
|
323
|
+
f"\n" # If-Modified-Since
|
|
324
|
+
f"\n" # If-Match
|
|
325
|
+
f"\n" # If-None-Match
|
|
326
|
+
f"\n" # If-Unmodified-Since
|
|
327
|
+
f"\n" # Range
|
|
328
|
+
f"{canonical_headers}\n"
|
|
329
|
+
f"{canonical_resource}"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Sign with HMAC-SHA256
|
|
333
|
+
key = b64decode(self._account_key)
|
|
334
|
+
signature = b64encode(
|
|
335
|
+
hmac.new(key, string_to_sign.encode(), hashlib.sha256).digest()
|
|
336
|
+
).decode()
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
"Authorization": f"SharedKey {self._account_name}:{signature}",
|
|
340
|
+
"Content-Type": content_type,
|
|
341
|
+
"Content-Length": content_length,
|
|
342
|
+
"x-ms-blob-type": "BlockBlob",
|
|
343
|
+
"x-ms-date": x_ms_date,
|
|
344
|
+
"x-ms-version": x_ms_version,
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
def health_check(self) -> bool:
|
|
348
|
+
"""
|
|
349
|
+
Check if we can connect to Azure Blob Storage.
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
True if healthy.
|
|
353
|
+
"""
|
|
354
|
+
try:
|
|
355
|
+
if HAS_AZURE_STORAGE:
|
|
356
|
+
self._container_client.get_container_properties()
|
|
357
|
+
else:
|
|
358
|
+
# Try to get container properties
|
|
359
|
+
url = (
|
|
360
|
+
f"https://{self._account_name}.blob.core.windows.net/"
|
|
361
|
+
f"{self.config.bucket_name}?restype=container"
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if self._sas_token:
|
|
365
|
+
url = f"{url}&{self._sas_token}"
|
|
366
|
+
headers = {"x-ms-version": "2020-10-02"}
|
|
367
|
+
else:
|
|
368
|
+
headers = self._sign_container_request()
|
|
369
|
+
|
|
370
|
+
request = urllib.request.Request(url, headers=headers)
|
|
371
|
+
with urllib.request.urlopen(request, timeout=10) as response:
|
|
372
|
+
return response.status == 200
|
|
373
|
+
|
|
374
|
+
return True
|
|
375
|
+
except Exception as e:
|
|
376
|
+
logger.warning(f"Azure health check failed: {e}")
|
|
377
|
+
return False
|
|
378
|
+
|
|
379
|
+
def _sign_container_request(self) -> dict[str, str]:
|
|
380
|
+
"""Sign a GET container request."""
|
|
381
|
+
now = datetime.now(timezone.utc)
|
|
382
|
+
x_ms_date = now.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
|
383
|
+
x_ms_version = "2020-10-02"
|
|
384
|
+
|
|
385
|
+
canonical_headers = (
|
|
386
|
+
f"x-ms-date:{x_ms_date}\n"
|
|
387
|
+
f"x-ms-version:{x_ms_version}"
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
canonical_resource = (
|
|
391
|
+
f"/{self._account_name}/{self.config.bucket_name}\n"
|
|
392
|
+
f"restype:container"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Build string to sign (GET + 11 empty headers + canonical headers + resource)
|
|
396
|
+
empty_headers = "\n" * 11
|
|
397
|
+
string_to_sign = (
|
|
398
|
+
f"GET\n"
|
|
399
|
+
f"{empty_headers}"
|
|
400
|
+
f"{canonical_headers}\n"
|
|
401
|
+
f"{canonical_resource}"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
key = b64decode(self._account_key)
|
|
405
|
+
signature = b64encode(
|
|
406
|
+
hmac.new(key, string_to_sign.encode(), hashlib.sha256).digest()
|
|
407
|
+
).decode()
|
|
408
|
+
|
|
409
|
+
return {
|
|
410
|
+
"Authorization": f"SharedKey {self._account_name}:{signature}",
|
|
411
|
+
"x-ms-date": x_ms_date,
|
|
412
|
+
"x-ms-version": x_ms_version,
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
def list_exports(
|
|
416
|
+
self,
|
|
417
|
+
start_date: datetime | None = None,
|
|
418
|
+
end_date: datetime | None = None,
|
|
419
|
+
max_results: int = 1000,
|
|
420
|
+
) -> list[str]:
|
|
421
|
+
"""
|
|
422
|
+
List exported blobs in the container.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
start_date: Filter to exports after this date.
|
|
426
|
+
end_date: Filter to exports before this date.
|
|
427
|
+
max_results: Maximum number of results to return.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
List of blob names.
|
|
431
|
+
"""
|
|
432
|
+
if not HAS_AZURE_STORAGE:
|
|
433
|
+
raise NotImplementedError("list_exports requires azure-storage-blob")
|
|
434
|
+
|
|
435
|
+
prefix = self.config.prefix
|
|
436
|
+
if start_date:
|
|
437
|
+
prefix += f"{start_date.year:04d}/"
|
|
438
|
+
|
|
439
|
+
blobs = self._container_client.list_blobs(
|
|
440
|
+
name_starts_with=prefix,
|
|
441
|
+
results_per_page=max_results,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
names = []
|
|
445
|
+
for blob in blobs:
|
|
446
|
+
# Filter by date if needed
|
|
447
|
+
if end_date:
|
|
448
|
+
parts = blob.name.split("/")
|
|
449
|
+
if len(parts) >= 4:
|
|
450
|
+
try:
|
|
451
|
+
year = int(parts[-4])
|
|
452
|
+
month = int(parts[-3])
|
|
453
|
+
day = int(parts[-2])
|
|
454
|
+
blob_date = datetime(year, month, day, tzinfo=timezone.utc)
|
|
455
|
+
if blob_date > end_date:
|
|
456
|
+
continue
|
|
457
|
+
except (ValueError, IndexError):
|
|
458
|
+
pass
|
|
459
|
+
|
|
460
|
+
names.append(blob.name)
|
|
461
|
+
|
|
462
|
+
if len(names) >= max_results:
|
|
463
|
+
break
|
|
464
|
+
|
|
465
|
+
return names
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
class AzureDataLakeExporter(AzureBlobExporter):
|
|
469
|
+
"""
|
|
470
|
+
Export audit logs to Azure Data Lake Storage Gen2.
|
|
471
|
+
|
|
472
|
+
Extends AzureBlobExporter with:
|
|
473
|
+
- Hierarchical namespace support
|
|
474
|
+
- Hive-style partition naming
|
|
475
|
+
- Integration hints for Azure Synapse Analytics
|
|
476
|
+
|
|
477
|
+
Example:
|
|
478
|
+
>>> config = CloudExporterConfig(
|
|
479
|
+
... provider="azure",
|
|
480
|
+
... bucket_name="audit-filesystem", # ADLS filesystem
|
|
481
|
+
... prefix="proxilion/prod/",
|
|
482
|
+
... )
|
|
483
|
+
>>> exporter = AzureDataLakeExporter(config)
|
|
484
|
+
"""
|
|
485
|
+
|
|
486
|
+
def __init__(
|
|
487
|
+
self,
|
|
488
|
+
config: CloudExporterConfig,
|
|
489
|
+
use_hive_partitions: bool = True,
|
|
490
|
+
) -> None:
|
|
491
|
+
"""
|
|
492
|
+
Initialize the Data Lake exporter.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
config: Exporter configuration.
|
|
496
|
+
use_hive_partitions: Use Hive-style partition naming.
|
|
497
|
+
"""
|
|
498
|
+
super().__init__(config)
|
|
499
|
+
self.use_hive_partitions = use_hive_partitions
|
|
500
|
+
|
|
501
|
+
def generate_key(
|
|
502
|
+
self,
|
|
503
|
+
timestamp: datetime | None = None,
|
|
504
|
+
batch_id: str | None = None,
|
|
505
|
+
) -> str:
|
|
506
|
+
"""
|
|
507
|
+
Generate a path with Hive-style partitioning.
|
|
508
|
+
|
|
509
|
+
Format: {prefix}/year=YYYY/month=MM/day=DD/hour=HH/{batch_id}.{ext}
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
timestamp: Timestamp for partitioning.
|
|
513
|
+
batch_id: Unique batch identifier.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
The generated path.
|
|
517
|
+
"""
|
|
518
|
+
if timestamp is None:
|
|
519
|
+
timestamp = datetime.now(timezone.utc)
|
|
520
|
+
|
|
521
|
+
if batch_id is None:
|
|
522
|
+
with self._lock:
|
|
523
|
+
self._batch_counter += 1
|
|
524
|
+
batch_id = f"{timestamp.strftime('%Y%m%d%H%M%S')}_{self._batch_counter:06d}"
|
|
525
|
+
|
|
526
|
+
# Determine file extension
|
|
527
|
+
ext = self.config.format.value
|
|
528
|
+
if self.config.compression.value != "none":
|
|
529
|
+
ext += f".{self.config.compression.value}"
|
|
530
|
+
|
|
531
|
+
# Build partitioned path
|
|
532
|
+
if self.use_hive_partitions:
|
|
533
|
+
key = (
|
|
534
|
+
f"{self.config.prefix}"
|
|
535
|
+
f"year={timestamp.year:04d}/"
|
|
536
|
+
f"month={timestamp.month:02d}/"
|
|
537
|
+
f"day={timestamp.day:02d}/"
|
|
538
|
+
f"hour={timestamp.hour:02d}/"
|
|
539
|
+
f"{batch_id}.{ext}"
|
|
540
|
+
)
|
|
541
|
+
else:
|
|
542
|
+
key = super().generate_key(timestamp, batch_id)
|
|
543
|
+
|
|
544
|
+
return key
|
|
545
|
+
|
|
546
|
+
def get_synapse_table_sql(
|
|
547
|
+
self,
|
|
548
|
+
table_name: str,
|
|
549
|
+
schema: str = "dbo",
|
|
550
|
+
) -> str:
|
|
551
|
+
"""
|
|
552
|
+
Generate Azure Synapse Analytics CREATE EXTERNAL TABLE SQL.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
table_name: Name for the external table.
|
|
556
|
+
schema: SQL schema name.
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
CREATE EXTERNAL TABLE SQL statement.
|
|
560
|
+
"""
|
|
561
|
+
location = f"abfss://{self.config.bucket_name}@{self._account_name}.dfs.core.windows.net/{self.config.prefix}"
|
|
562
|
+
|
|
563
|
+
sql = f"""
|
|
564
|
+
-- Create data source (run once)
|
|
565
|
+
CREATE EXTERNAL DATA SOURCE AuditDataLake
|
|
566
|
+
WITH (
|
|
567
|
+
LOCATION = '{location}',
|
|
568
|
+
CREDENTIAL = [YourCredential]
|
|
569
|
+
);
|
|
570
|
+
|
|
571
|
+
-- Create file format
|
|
572
|
+
CREATE EXTERNAL FILE FORMAT AuditJsonFormat
|
|
573
|
+
WITH (
|
|
574
|
+
FORMAT_TYPE = DELIMITEDTEXT,
|
|
575
|
+
FORMAT_OPTIONS (
|
|
576
|
+
FIELD_TERMINATOR = '|', -- Not used for JSON but required
|
|
577
|
+
STRING_DELIMITER = '',
|
|
578
|
+
FIRST_ROW = 1
|
|
579
|
+
)
|
|
580
|
+
);
|
|
581
|
+
|
|
582
|
+
-- Create external table
|
|
583
|
+
CREATE EXTERNAL TABLE [{schema}].[{table_name}] (
|
|
584
|
+
[event_id] NVARCHAR(100),
|
|
585
|
+
[timestamp] DATETIME2,
|
|
586
|
+
[sequence_number] BIGINT,
|
|
587
|
+
[event_type] NVARCHAR(100),
|
|
588
|
+
[user_id] NVARCHAR(200),
|
|
589
|
+
[user_roles] NVARCHAR(1000),
|
|
590
|
+
[session_id] NVARCHAR(200),
|
|
591
|
+
[agent_id] NVARCHAR(200),
|
|
592
|
+
[tool_name] NVARCHAR(200),
|
|
593
|
+
[tool_arguments] NVARCHAR(MAX),
|
|
594
|
+
[authorization_allowed] BIT,
|
|
595
|
+
[authorization_reason] NVARCHAR(500),
|
|
596
|
+
[policies_evaluated] NVARCHAR(1000),
|
|
597
|
+
[event_hash] NVARCHAR(100),
|
|
598
|
+
[previous_hash] NVARCHAR(100)
|
|
599
|
+
)
|
|
600
|
+
WITH (
|
|
601
|
+
LOCATION = '',
|
|
602
|
+
DATA_SOURCE = AuditDataLake,
|
|
603
|
+
FILE_FORMAT = AuditJsonFormat,
|
|
604
|
+
REJECT_TYPE = VALUE,
|
|
605
|
+
REJECT_VALUE = 0
|
|
606
|
+
);
|
|
607
|
+
"""
|
|
608
|
+
return sql.strip()
|