proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,608 @@
1
+ """
2
+ Azure Blob Storage exporter for Proxilion audit logs.
3
+
4
+ Supports exporting audit logs to:
5
+ - Azure Blob Storage
6
+ - Azure Data Lake Storage Gen2 (ADLS)
7
+
8
+ Uses azure-storage-blob if available, falls back to urllib with SAS/connection string.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import hashlib
14
+ import hmac
15
+ import json
16
+ import logging
17
+ import os
18
+ import time
19
+ import urllib.error
20
+ import urllib.parse
21
+ import urllib.request
22
+ from base64 import b64decode, b64encode
23
+ from datetime import datetime, timezone
24
+
25
+ from proxilion.audit.exporters.cloud_base import (
26
+ BaseCloudExporter,
27
+ CloudExporterConfig,
28
+ ExportBatch,
29
+ ExportResult,
30
+ )
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Check for azure-storage-blob availability
35
+ try:
36
+ from azure.identity import DefaultAzureCredential
37
+ from azure.storage.blob import BlobServiceClient
38
+ HAS_AZURE_STORAGE = True
39
+ except ImportError:
40
+ HAS_AZURE_STORAGE = False
41
+
42
+
43
+ class AzureBlobExporter(BaseCloudExporter):
44
+ """
45
+ Export audit logs to Azure Blob Storage.
46
+
47
+ Uses azure-storage-blob if installed, otherwise falls back to
48
+ urllib with connection string or SAS token.
49
+
50
+ Example:
51
+ >>> config = CloudExporterConfig(
52
+ ... provider="azure",
53
+ ... bucket_name="audit-logs", # Container name
54
+ ... prefix="proxilion/prod/",
55
+ ... )
56
+ >>> exporter = AzureBlobExporter(config)
57
+ >>> result = exporter.export(events)
58
+ """
59
+
60
+ def __init__(self, config: CloudExporterConfig) -> None:
61
+ """
62
+ Initialize the Azure Blob exporter.
63
+
64
+ Args:
65
+ config: Exporter configuration.
66
+ """
67
+ super().__init__(config)
68
+ self._client = None
69
+ self._container_client = None
70
+ self._connection_string: str | None = None
71
+ self._account_name: str | None = None
72
+ self._account_key: str | None = None
73
+ self._sas_token: str | None = None
74
+ self._initialize_client()
75
+
76
+ def _initialize_client(self) -> None:
77
+ """Initialize the Azure Blob client."""
78
+ if HAS_AZURE_STORAGE:
79
+ self._init_azure_sdk_client()
80
+ else:
81
+ self._init_urllib_client()
82
+
83
+ def _init_azure_sdk_client(self) -> None:
84
+ """Initialize azure-storage-blob client."""
85
+ # Try connection string first (from env or credentials file)
86
+ connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
87
+
88
+ if self.config.credentials_path:
89
+ creds = self._load_credentials_file(self.config.credentials_path)
90
+ connection_string = creds.get("connection_string", connection_string)
91
+
92
+ if connection_string:
93
+ self._client = BlobServiceClient.from_connection_string(connection_string)
94
+ elif self.config.use_instance_credentials:
95
+ # Use DefaultAzureCredential (managed identity, etc.)
96
+ account_url = self._get_account_url()
97
+ credential = DefaultAzureCredential()
98
+ self._client = BlobServiceClient(account_url, credential=credential)
99
+ else:
100
+ raise ValueError(
101
+ "Azure credentials not configured. Set AZURE_STORAGE_CONNECTION_STRING "
102
+ "or use credentials_path or use_instance_credentials."
103
+ )
104
+
105
+ self._container_client = self._client.get_container_client(
106
+ self.config.bucket_name
107
+ )
108
+
109
+ def _init_urllib_client(self) -> None:
110
+ """Initialize urllib-based client."""
111
+ # Load connection string or credentials
112
+ self._connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
113
+
114
+ if self.config.credentials_path:
115
+ creds = self._load_credentials_file(self.config.credentials_path)
116
+ self._connection_string = creds.get("connection_string", self._connection_string)
117
+ self._account_name = creds.get("account_name")
118
+ self._account_key = creds.get("account_key")
119
+ self._sas_token = creds.get("sas_token")
120
+
121
+ # Parse connection string if provided
122
+ if self._connection_string:
123
+ self._parse_connection_string()
124
+
125
+ def _load_credentials_file(self, path: str) -> dict[str, str]:
126
+ """Load credentials from a JSON file."""
127
+ try:
128
+ with open(path) as f:
129
+ return json.load(f)
130
+ except Exception as e:
131
+ logger.warning(f"Failed to load credentials from {path}: {e}")
132
+ return {}
133
+
134
+ def _parse_connection_string(self) -> None:
135
+ """Parse Azure connection string to extract credentials."""
136
+ if not self._connection_string:
137
+ return
138
+
139
+ parts = {}
140
+ for part in self._connection_string.split(";"):
141
+ if "=" in part:
142
+ key, value = part.split("=", 1)
143
+ parts[key] = value
144
+
145
+ self._account_name = parts.get("AccountName")
146
+ self._account_key = parts.get("AccountKey")
147
+
148
+ def _get_account_url(self) -> str:
149
+ """Get the blob service account URL."""
150
+ # Try to get from endpoint URL config
151
+ if self.config.endpoint_url:
152
+ return self.config.endpoint_url
153
+
154
+ # Try to get account name from environment or credentials
155
+ account_name = self._account_name or os.environ.get("AZURE_STORAGE_ACCOUNT")
156
+
157
+ if not account_name:
158
+ raise ValueError(
159
+ "Azure storage account name not configured. Set endpoint_url, "
160
+ "AZURE_STORAGE_ACCOUNT, or provide in credentials."
161
+ )
162
+
163
+ return f"https://{account_name}.blob.core.windows.net"
164
+
165
+ def export_batch(self, batch: ExportBatch) -> ExportResult:
166
+ """
167
+ Export a batch to Azure Blob Storage.
168
+
169
+ Args:
170
+ batch: The batch to export.
171
+
172
+ Returns:
173
+ ExportResult with success/failure information.
174
+ """
175
+ start_time = time.time()
176
+
177
+ try:
178
+ # Prepare data
179
+ data = batch.to_bytes(self.config.compression)
180
+ key = self.generate_key(batch.created_at, batch.batch_id)
181
+ checksum = self.compute_checksum(data)
182
+
183
+ # Upload with retry
184
+ self.with_retry(self._upload_blob, key, data)
185
+
186
+ duration_ms = (time.time() - start_time) * 1000
187
+
188
+ account_url = self._get_account_url()
189
+ destination = f"{account_url}/{self.config.bucket_name}/{key}"
190
+
191
+ logger.info(
192
+ f"Exported {batch.event_count} events to {destination}"
193
+ )
194
+
195
+ return ExportResult(
196
+ success=True,
197
+ events_exported=batch.event_count,
198
+ batch_id=batch.batch_id,
199
+ destination=destination,
200
+ duration_ms=duration_ms,
201
+ bytes_written=len(data),
202
+ checksum=checksum,
203
+ )
204
+
205
+ except Exception as e:
206
+ duration_ms = (time.time() - start_time) * 1000
207
+ logger.error(f"Failed to export batch {batch.batch_id}: {e}")
208
+
209
+ return ExportResult(
210
+ success=False,
211
+ events_exported=0,
212
+ batch_id=batch.batch_id,
213
+ error=str(e),
214
+ duration_ms=duration_ms,
215
+ )
216
+
217
+ def _upload_blob(self, blob_name: str, data: bytes) -> None:
218
+ """
219
+ Upload a blob to Azure Storage.
220
+
221
+ Args:
222
+ blob_name: Blob name (path within container).
223
+ data: Blob data.
224
+ """
225
+ if HAS_AZURE_STORAGE:
226
+ self._upload_azure_sdk(blob_name, data)
227
+ else:
228
+ self._upload_urllib(blob_name, data)
229
+
230
+ def _upload_azure_sdk(self, blob_name: str, data: bytes) -> None:
231
+ """Upload using azure-storage-blob."""
232
+ blob_client = self._container_client.get_blob_client(blob_name)
233
+ blob_client.upload_blob(
234
+ data,
235
+ overwrite=True,
236
+ content_settings={
237
+ "content_type": self.get_content_type(),
238
+ },
239
+ )
240
+
241
+ def _upload_urllib(self, blob_name: str, data: bytes) -> None:
242
+ """Upload using urllib with Shared Key or SAS token."""
243
+ if not self._account_name:
244
+ raise ValueError(
245
+ "Azure credentials not configured. Set AZURE_STORAGE_CONNECTION_STRING "
246
+ "or provide account credentials."
247
+ )
248
+
249
+ # Build URL
250
+ url = (
251
+ f"https://{self._account_name}.blob.core.windows.net/"
252
+ f"{self.config.bucket_name}/{blob_name}"
253
+ )
254
+
255
+ if self._sas_token:
256
+ # Use SAS token
257
+ url = f"{url}?{self._sas_token}"
258
+ headers = self._get_blob_headers(data)
259
+ elif self._account_key:
260
+ # Use Shared Key authentication
261
+ headers = self._sign_request(blob_name, data)
262
+ else:
263
+ raise ValueError("No authentication method available")
264
+
265
+ request = urllib.request.Request(url, data=data, headers=headers, method="PUT")
266
+
267
+ try:
268
+ with urllib.request.urlopen(
269
+ request, timeout=self.config.read_timeout
270
+ ) as response:
271
+ if response.status not in (200, 201):
272
+ raise ValueError(f"Azure upload failed with status {response.status}")
273
+ except urllib.error.HTTPError as e:
274
+ raise ValueError(f"Azure upload failed: {e.code} {e.reason}") from e
275
+
276
+ def _get_blob_headers(self, data: bytes) -> dict[str, str]:
277
+ """Get basic headers for blob upload."""
278
+ return {
279
+ "Content-Type": self.get_content_type(),
280
+ "Content-Length": str(len(data)),
281
+ "x-ms-blob-type": "BlockBlob",
282
+ "x-ms-version": "2020-10-02",
283
+ }
284
+
285
+ def _sign_request(self, blob_name: str, data: bytes) -> dict[str, str]:
286
+ """
287
+ Sign a request using Azure Shared Key.
288
+
289
+ Args:
290
+ blob_name: Blob name.
291
+ data: Request data.
292
+
293
+ Returns:
294
+ Dict of headers including Authorization.
295
+ """
296
+ # Current time in RFC 1123 format
297
+ now = datetime.now(timezone.utc)
298
+ x_ms_date = now.strftime("%a, %d %b %Y %H:%M:%S GMT")
299
+ x_ms_version = "2020-10-02"
300
+
301
+ # Build canonical headers
302
+ content_length = str(len(data))
303
+ content_type = self.get_content_type()
304
+
305
+ canonical_headers = (
306
+ f"x-ms-blob-type:BlockBlob\n"
307
+ f"x-ms-date:{x_ms_date}\n"
308
+ f"x-ms-version:{x_ms_version}"
309
+ )
310
+
311
+ # Build canonical resource
312
+ canonical_resource = f"/{self._account_name}/{self.config.bucket_name}/{blob_name}"
313
+
314
+ # Build string to sign
315
+ string_to_sign = (
316
+ f"PUT\n"
317
+ f"\n" # Content-Encoding
318
+ f"\n" # Content-Language
319
+ f"{content_length}\n"
320
+ f"\n" # Content-MD5
321
+ f"{content_type}\n"
322
+ f"\n" # Date
323
+ f"\n" # If-Modified-Since
324
+ f"\n" # If-Match
325
+ f"\n" # If-None-Match
326
+ f"\n" # If-Unmodified-Since
327
+ f"\n" # Range
328
+ f"{canonical_headers}\n"
329
+ f"{canonical_resource}"
330
+ )
331
+
332
+ # Sign with HMAC-SHA256
333
+ key = b64decode(self._account_key)
334
+ signature = b64encode(
335
+ hmac.new(key, string_to_sign.encode(), hashlib.sha256).digest()
336
+ ).decode()
337
+
338
+ return {
339
+ "Authorization": f"SharedKey {self._account_name}:{signature}",
340
+ "Content-Type": content_type,
341
+ "Content-Length": content_length,
342
+ "x-ms-blob-type": "BlockBlob",
343
+ "x-ms-date": x_ms_date,
344
+ "x-ms-version": x_ms_version,
345
+ }
346
+
347
+ def health_check(self) -> bool:
348
+ """
349
+ Check if we can connect to Azure Blob Storage.
350
+
351
+ Returns:
352
+ True if healthy.
353
+ """
354
+ try:
355
+ if HAS_AZURE_STORAGE:
356
+ self._container_client.get_container_properties()
357
+ else:
358
+ # Try to get container properties
359
+ url = (
360
+ f"https://{self._account_name}.blob.core.windows.net/"
361
+ f"{self.config.bucket_name}?restype=container"
362
+ )
363
+
364
+ if self._sas_token:
365
+ url = f"{url}&{self._sas_token}"
366
+ headers = {"x-ms-version": "2020-10-02"}
367
+ else:
368
+ headers = self._sign_container_request()
369
+
370
+ request = urllib.request.Request(url, headers=headers)
371
+ with urllib.request.urlopen(request, timeout=10) as response:
372
+ return response.status == 200
373
+
374
+ return True
375
+ except Exception as e:
376
+ logger.warning(f"Azure health check failed: {e}")
377
+ return False
378
+
379
+ def _sign_container_request(self) -> dict[str, str]:
380
+ """Sign a GET container request."""
381
+ now = datetime.now(timezone.utc)
382
+ x_ms_date = now.strftime("%a, %d %b %Y %H:%M:%S GMT")
383
+ x_ms_version = "2020-10-02"
384
+
385
+ canonical_headers = (
386
+ f"x-ms-date:{x_ms_date}\n"
387
+ f"x-ms-version:{x_ms_version}"
388
+ )
389
+
390
+ canonical_resource = (
391
+ f"/{self._account_name}/{self.config.bucket_name}\n"
392
+ f"restype:container"
393
+ )
394
+
395
+ # Build string to sign (GET + 11 empty headers + canonical headers + resource)
396
+ empty_headers = "\n" * 11
397
+ string_to_sign = (
398
+ f"GET\n"
399
+ f"{empty_headers}"
400
+ f"{canonical_headers}\n"
401
+ f"{canonical_resource}"
402
+ )
403
+
404
+ key = b64decode(self._account_key)
405
+ signature = b64encode(
406
+ hmac.new(key, string_to_sign.encode(), hashlib.sha256).digest()
407
+ ).decode()
408
+
409
+ return {
410
+ "Authorization": f"SharedKey {self._account_name}:{signature}",
411
+ "x-ms-date": x_ms_date,
412
+ "x-ms-version": x_ms_version,
413
+ }
414
+
415
+ def list_exports(
416
+ self,
417
+ start_date: datetime | None = None,
418
+ end_date: datetime | None = None,
419
+ max_results: int = 1000,
420
+ ) -> list[str]:
421
+ """
422
+ List exported blobs in the container.
423
+
424
+ Args:
425
+ start_date: Filter to exports after this date.
426
+ end_date: Filter to exports before this date.
427
+ max_results: Maximum number of results to return.
428
+
429
+ Returns:
430
+ List of blob names.
431
+ """
432
+ if not HAS_AZURE_STORAGE:
433
+ raise NotImplementedError("list_exports requires azure-storage-blob")
434
+
435
+ prefix = self.config.prefix
436
+ if start_date:
437
+ prefix += f"{start_date.year:04d}/"
438
+
439
+ blobs = self._container_client.list_blobs(
440
+ name_starts_with=prefix,
441
+ results_per_page=max_results,
442
+ )
443
+
444
+ names = []
445
+ for blob in blobs:
446
+ # Filter by date if needed
447
+ if end_date:
448
+ parts = blob.name.split("/")
449
+ if len(parts) >= 4:
450
+ try:
451
+ year = int(parts[-4])
452
+ month = int(parts[-3])
453
+ day = int(parts[-2])
454
+ blob_date = datetime(year, month, day, tzinfo=timezone.utc)
455
+ if blob_date > end_date:
456
+ continue
457
+ except (ValueError, IndexError):
458
+ pass
459
+
460
+ names.append(blob.name)
461
+
462
+ if len(names) >= max_results:
463
+ break
464
+
465
+ return names
466
+
467
+
468
+ class AzureDataLakeExporter(AzureBlobExporter):
469
+ """
470
+ Export audit logs to Azure Data Lake Storage Gen2.
471
+
472
+ Extends AzureBlobExporter with:
473
+ - Hierarchical namespace support
474
+ - Hive-style partition naming
475
+ - Integration hints for Azure Synapse Analytics
476
+
477
+ Example:
478
+ >>> config = CloudExporterConfig(
479
+ ... provider="azure",
480
+ ... bucket_name="audit-filesystem", # ADLS filesystem
481
+ ... prefix="proxilion/prod/",
482
+ ... )
483
+ >>> exporter = AzureDataLakeExporter(config)
484
+ """
485
+
486
+ def __init__(
487
+ self,
488
+ config: CloudExporterConfig,
489
+ use_hive_partitions: bool = True,
490
+ ) -> None:
491
+ """
492
+ Initialize the Data Lake exporter.
493
+
494
+ Args:
495
+ config: Exporter configuration.
496
+ use_hive_partitions: Use Hive-style partition naming.
497
+ """
498
+ super().__init__(config)
499
+ self.use_hive_partitions = use_hive_partitions
500
+
501
+ def generate_key(
502
+ self,
503
+ timestamp: datetime | None = None,
504
+ batch_id: str | None = None,
505
+ ) -> str:
506
+ """
507
+ Generate a path with Hive-style partitioning.
508
+
509
+ Format: {prefix}/year=YYYY/month=MM/day=DD/hour=HH/{batch_id}.{ext}
510
+
511
+ Args:
512
+ timestamp: Timestamp for partitioning.
513
+ batch_id: Unique batch identifier.
514
+
515
+ Returns:
516
+ The generated path.
517
+ """
518
+ if timestamp is None:
519
+ timestamp = datetime.now(timezone.utc)
520
+
521
+ if batch_id is None:
522
+ with self._lock:
523
+ self._batch_counter += 1
524
+ batch_id = f"{timestamp.strftime('%Y%m%d%H%M%S')}_{self._batch_counter:06d}"
525
+
526
+ # Determine file extension
527
+ ext = self.config.format.value
528
+ if self.config.compression.value != "none":
529
+ ext += f".{self.config.compression.value}"
530
+
531
+ # Build partitioned path
532
+ if self.use_hive_partitions:
533
+ key = (
534
+ f"{self.config.prefix}"
535
+ f"year={timestamp.year:04d}/"
536
+ f"month={timestamp.month:02d}/"
537
+ f"day={timestamp.day:02d}/"
538
+ f"hour={timestamp.hour:02d}/"
539
+ f"{batch_id}.{ext}"
540
+ )
541
+ else:
542
+ key = super().generate_key(timestamp, batch_id)
543
+
544
+ return key
545
+
546
+ def get_synapse_table_sql(
547
+ self,
548
+ table_name: str,
549
+ schema: str = "dbo",
550
+ ) -> str:
551
+ """
552
+ Generate Azure Synapse Analytics CREATE EXTERNAL TABLE SQL.
553
+
554
+ Args:
555
+ table_name: Name for the external table.
556
+ schema: SQL schema name.
557
+
558
+ Returns:
559
+ CREATE EXTERNAL TABLE SQL statement.
560
+ """
561
+ location = f"abfss://{self.config.bucket_name}@{self._account_name}.dfs.core.windows.net/{self.config.prefix}"
562
+
563
+ sql = f"""
564
+ -- Create data source (run once)
565
+ CREATE EXTERNAL DATA SOURCE AuditDataLake
566
+ WITH (
567
+ LOCATION = '{location}',
568
+ CREDENTIAL = [YourCredential]
569
+ );
570
+
571
+ -- Create file format
572
+ CREATE EXTERNAL FILE FORMAT AuditJsonFormat
573
+ WITH (
574
+ FORMAT_TYPE = DELIMITEDTEXT,
575
+ FORMAT_OPTIONS (
576
+ FIELD_TERMINATOR = '|', -- Not used for JSON but required
577
+ STRING_DELIMITER = '',
578
+ FIRST_ROW = 1
579
+ )
580
+ );
581
+
582
+ -- Create external table
583
+ CREATE EXTERNAL TABLE [{schema}].[{table_name}] (
584
+ [event_id] NVARCHAR(100),
585
+ [timestamp] DATETIME2,
586
+ [sequence_number] BIGINT,
587
+ [event_type] NVARCHAR(100),
588
+ [user_id] NVARCHAR(200),
589
+ [user_roles] NVARCHAR(1000),
590
+ [session_id] NVARCHAR(200),
591
+ [agent_id] NVARCHAR(200),
592
+ [tool_name] NVARCHAR(200),
593
+ [tool_arguments] NVARCHAR(MAX),
594
+ [authorization_allowed] BIT,
595
+ [authorization_reason] NVARCHAR(500),
596
+ [policies_evaluated] NVARCHAR(1000),
597
+ [event_hash] NVARCHAR(100),
598
+ [previous_hash] NVARCHAR(100)
599
+ )
600
+ WITH (
601
+ LOCATION = '',
602
+ DATA_SOURCE = AuditDataLake,
603
+ FILE_FORMAT = AuditJsonFormat,
604
+ REJECT_TYPE = VALUE,
605
+ REJECT_VALUE = 0
606
+ );
607
+ """
608
+ return sql.strip()