pangea-sdk 3.8.0b1__py3-none-any.whl → 5.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pangea/__init__.py +1 -1
  2. pangea/asyncio/file_uploader.py +1 -1
  3. pangea/asyncio/request.py +56 -34
  4. pangea/asyncio/services/__init__.py +4 -0
  5. pangea/asyncio/services/ai_guard.py +75 -0
  6. pangea/asyncio/services/audit.py +192 -31
  7. pangea/asyncio/services/authn.py +187 -109
  8. pangea/asyncio/services/authz.py +285 -0
  9. pangea/asyncio/services/base.py +21 -2
  10. pangea/asyncio/services/embargo.py +2 -2
  11. pangea/asyncio/services/file_scan.py +24 -9
  12. pangea/asyncio/services/intel.py +108 -34
  13. pangea/asyncio/services/prompt_guard.py +73 -0
  14. pangea/asyncio/services/redact.py +72 -4
  15. pangea/asyncio/services/sanitize.py +217 -0
  16. pangea/asyncio/services/share.py +246 -73
  17. pangea/asyncio/services/vault.py +1710 -750
  18. pangea/crypto/rsa.py +135 -0
  19. pangea/deep_verify.py +7 -1
  20. pangea/dump_audit.py +9 -8
  21. pangea/request.py +87 -59
  22. pangea/response.py +49 -31
  23. pangea/services/__init__.py +4 -0
  24. pangea/services/ai_guard.py +128 -0
  25. pangea/services/audit/audit.py +205 -42
  26. pangea/services/audit/models.py +56 -8
  27. pangea/services/audit/signing.py +6 -5
  28. pangea/services/audit/util.py +3 -3
  29. pangea/services/authn/authn.py +140 -70
  30. pangea/services/authn/models.py +167 -11
  31. pangea/services/authz.py +400 -0
  32. pangea/services/base.py +39 -8
  33. pangea/services/embargo.py +2 -2
  34. pangea/services/file_scan.py +32 -15
  35. pangea/services/intel.py +157 -32
  36. pangea/services/prompt_guard.py +83 -0
  37. pangea/services/redact.py +152 -4
  38. pangea/services/sanitize.py +371 -0
  39. pangea/services/share/share.py +683 -107
  40. pangea/services/vault/models/asymmetric.py +120 -18
  41. pangea/services/vault/models/common.py +439 -141
  42. pangea/services/vault/models/keys.py +94 -0
  43. pangea/services/vault/models/secret.py +27 -3
  44. pangea/services/vault/models/symmetric.py +68 -22
  45. pangea/services/vault/vault.py +1690 -749
  46. pangea/tools.py +6 -7
  47. pangea/utils.py +16 -27
  48. pangea/verify_audit.py +270 -83
  49. {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/METADATA +43 -35
  50. pangea_sdk-5.4.0b1.dist-info/RECORD +60 -0
  51. {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/WHEEL +1 -1
  52. pangea_sdk-3.8.0b1.dist-info/RECORD +0 -50
@@ -1,11 +1,14 @@
1
1
  # Copyright 2022 Pangea Cyber Corporation
2
2
  # Author: Pangea Cyber Corporation
3
+ from __future__ import annotations
4
+
3
5
  import datetime
4
- from typing import Any, Dict, List, Optional, Union
6
+ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
5
7
 
6
8
  import pangea.exceptions as pexc
7
9
  from pangea.asyncio.services.base import ServiceBaseAsync
8
- from pangea.response import PangeaResponse
10
+ from pangea.config import PangeaConfig
11
+ from pangea.response import PangeaResponse, PangeaResponseResult
9
12
  from pangea.services.audit.audit import AuditBase
10
13
  from pangea.services.audit.exceptions import AuditException
11
14
  from pangea.services.audit.models import (
@@ -13,6 +16,7 @@ from pangea.services.audit.models import (
13
16
  DownloadRequest,
14
17
  DownloadResult,
15
18
  Event,
19
+ ExportRequest,
16
20
  LogBulkResult,
17
21
  LogResult,
18
22
  PublishedRoot,
@@ -56,14 +60,33 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
56
60
 
57
61
  def __init__(
58
62
  self,
59
- token,
60
- config=None,
63
+ token: str,
64
+ config: PangeaConfig | None = None,
61
65
  private_key_file: str = "",
62
- public_key_info: Dict[str, str] = {},
63
- tenant_id: Optional[str] = None,
64
- logger_name="pangea",
65
- config_id: Optional[str] = None,
66
- ):
66
+ public_key_info: dict[str, str] = {},
67
+ tenant_id: str | None = None,
68
+ logger_name: str = "pangea",
69
+ config_id: str | None = None,
70
+ ) -> None:
71
+ """
72
+ Audit client
73
+
74
+ Initializes a new Audit client.
75
+
76
+ Args:
77
+ token: Pangea API token.
78
+ config: Configuration.
79
+ private_key_file: Private key filepath.
80
+ public_key_info: Public key information.
81
+ tenant_id: Tenant ID.
82
+ logger_name: Logger name.
83
+ config_id: Configuration ID.
84
+
85
+ Examples:
86
+ config = PangeaConfig(domain="pangea_domain")
87
+ audit = AuditAsync(token="pangea_token", config=config)
88
+ """
89
+
67
90
  # FIXME: Temporary check to deprecate config_id from PangeaConfig.
68
91
  # Delete it when deprecate PangeaConfig.config_id
69
92
  if config_id and config is not None and config.config_id is not None:
@@ -151,14 +174,16 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
151
174
  verbose: Optional[bool] = None,
152
175
  ) -> PangeaResponse[LogResult]:
153
176
  """
154
- Log an entry
177
+ Log an event
155
178
 
156
179
  Create a log entry in the Secure Audit Log.
180
+
157
181
  Args:
158
182
  event (dict[str, Any]): event to be logged
159
183
  verify (bool, optional): True to verify logs consistency after response.
160
184
  sign_local (bool, optional): True to sign event with local key.
161
185
  verbose (bool, optional): True to get a more verbose response.
186
+
162
187
  Raises:
163
188
  AuditException: If an audit based api exception happens
164
189
  PangeaAPIException: If an API Error happens
@@ -169,18 +194,12 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
169
194
  Available response fields can be found in our [API documentation](https://pangea.cloud/docs/api/audit#log-an-entry).
170
195
 
171
196
  Examples:
172
- try:
173
- log_response = audit.log({"message"="Hello world"}, verbose=False)
174
- print(f"Response. Hash: {log_response.result.hash}")
175
- except pe.PangeaAPIException as e:
176
- print(f"Request Error: {e.response.summary}")
177
- for err in e.errors:
178
- print(f"\\t{err.detail} \\n")
197
+ response = await audit.log_event({"message": "hello world"}, verbose=True)
179
198
  """
180
199
 
181
200
  input = self._get_log_request(event, sign_local=sign_local, verify=verify, verbose=verbose)
182
201
  response: PangeaResponse[LogResult] = await self.request.post(
183
- "v1/log", LogResult, data=input.dict(exclude_none=True)
202
+ "v1/log", LogResult, data=input.model_dump(exclude_none=True)
184
203
  )
185
204
  if response.success and response.result is not None:
186
205
  self._process_log_result(response.result, verify=verify)
@@ -216,7 +235,7 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
216
235
 
217
236
  input = self._get_log_request(events, sign_local=sign_local, verify=False, verbose=verbose)
218
237
  response: PangeaResponse[LogBulkResult] = await self.request.post(
219
- "v2/log", LogBulkResult, data=input.dict(exclude_none=True)
238
+ "v2/log", LogBulkResult, data=input.model_dump(exclude_none=True)
220
239
  )
221
240
  if response.success and response.result is not None:
222
241
  for result in response.result.results:
@@ -254,7 +273,7 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
254
273
  input = self._get_log_request(events, sign_local=sign_local, verify=False, verbose=verbose)
255
274
  try:
256
275
  response: PangeaResponse[LogBulkResult] = await self.request.post(
257
- "v2/log_async", LogBulkResult, data=input.dict(exclude_none=True), poll_result=False
276
+ "v2/log_async", LogBulkResult, data=input.model_dump(exclude_none=True), poll_result=False
258
277
  )
259
278
  except pexc.AcceptedRequestException as e:
260
279
  return e.response
@@ -272,10 +291,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
272
291
  end: Optional[Union[datetime.datetime, str]] = None,
273
292
  limit: Optional[int] = None,
274
293
  max_results: Optional[int] = None,
275
- search_restriction: Optional[dict] = None,
294
+ search_restriction: Optional[Dict[str, Sequence[str]]] = None,
276
295
  verbose: Optional[bool] = None,
277
296
  verify_consistency: bool = False,
278
297
  verify_events: bool = True,
298
+ return_context: Optional[bool] = None,
279
299
  ) -> PangeaResponse[SearchOutput]:
280
300
  """
281
301
  Search the log
@@ -301,10 +321,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
301
321
  end (datetime, optional): An RFC-3339 formatted timestamp, or relative time adjustment from the current time.
302
322
  limit (int, optional): Optional[int] = None,
303
323
  max_results (int, optional): Maximum number of results to return.
304
- search_restriction (dict, optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
324
+ search_restriction (Dict[str, Sequence[str]], optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
305
325
  verbose (bool, optional): If true, response include root and membership and consistency proofs.
306
326
  verify_consistency (bool): True to verify logs consistency
307
327
  verify_events (bool): True to verify hash events and signatures
328
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
308
329
 
309
330
  Raises:
310
331
  AuditException: If an audit based api exception happens
@@ -332,10 +353,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
332
353
  max_results=max_results,
333
354
  search_restriction=search_restriction,
334
355
  verbose=verbose,
356
+ return_context=return_context,
335
357
  )
336
358
 
337
359
  response: PangeaResponse[SearchOutput] = await self.request.post(
338
- "v1/search", SearchOutput, data=input.dict(exclude_none=True)
360
+ "v1/search", SearchOutput, data=input.model_dump(exclude_none=True)
339
361
  )
340
362
  if verify_consistency:
341
363
  await self.update_published_roots(response.result) # type: ignore[arg-type]
@@ -347,8 +369,10 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
347
369
  id: str,
348
370
  limit: Optional[int] = 20,
349
371
  offset: Optional[int] = 0,
372
+ assert_search_restriction: Optional[Dict[str, Sequence[str]]] = None,
350
373
  verify_consistency: bool = False,
351
374
  verify_events: bool = True,
375
+ return_context: Optional[bool] = None,
352
376
  ) -> PangeaResponse[SearchResultOutput]:
353
377
  """
354
378
  Results of a search
@@ -361,8 +385,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
361
385
  id (string): the id of a search action, found in `response.result.id`
362
386
  limit (integer, optional): the maximum number of results to return, default is 20
363
387
  offset (integer, optional): the position of the first result to return, default is 0
388
+ assert_search_restriction (Dict[str, Sequence[str]], optional): Assert the requested search results were queried with the exact same search restrictions, to ensure the results comply to the expected restrictions.
364
389
  verify_consistency (bool): True to verify logs consistency
365
390
  verify_events (bool): True to verify hash events and signatures
391
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
392
+
366
393
  Raises:
367
394
  AuditException: If an audit based api exception happens
368
395
  PangeaAPIException: If an API Error happens
@@ -378,7 +405,8 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
378
405
  result_res: PangeaResponse[SearchResultsOutput] = audit.results(
379
406
  id=search_res.result.id,
380
407
  limit=10,
381
- offset=0)
408
+ offset=0,
409
+ assert_search_restriction={'source': ["monitor"]})
382
410
  """
383
411
 
384
412
  if limit <= 0: # type: ignore[operator]
@@ -391,13 +419,116 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
391
419
  id=id,
392
420
  limit=limit,
393
421
  offset=offset,
422
+ assert_search_restriction=assert_search_restriction,
423
+ return_context=return_context,
394
424
  )
395
- response = await self.request.post("v1/results", SearchResultOutput, data=input.dict(exclude_none=True))
425
+ response = await self.request.post("v1/results", SearchResultOutput, data=input.model_dump(exclude_none=True))
396
426
  if verify_consistency and response.result is not None:
397
427
  await self.update_published_roots(response.result)
398
428
 
399
429
  return self.handle_results_response(response, verify_consistency, verify_events)
400
430
 
431
+ async def export(
432
+ self,
433
+ *,
434
+ format: DownloadFormat = DownloadFormat.CSV,
435
+ start: Optional[datetime.datetime] = None,
436
+ end: Optional[datetime.datetime] = None,
437
+ order: Optional[SearchOrder] = None,
438
+ order_by: Optional[str] = None,
439
+ verbose: bool = True,
440
+ ) -> PangeaResponse[PangeaResponseResult]:
441
+ """
442
+ Export from the audit log
443
+
444
+ Bulk export of data from the Secure Audit Log, with optional filtering.
445
+
446
+ OperationId: audit_post_v1_export
447
+
448
+ Args:
449
+ format: Format for the records.
450
+ start: The start of the time range to perform the search on.
451
+ end: The end of the time range to perform the search on. If omitted,
452
+ then all records up to the latest will be searched.
453
+ order: Specify the sort order of the response.
454
+ order_by: Name of column to sort the results by.
455
+ verbose: Whether or not to include the root hash of the tree and the
456
+ membership proof for each record.
457
+
458
+ Raises:
459
+ AuditException: If an audit based api exception happens
460
+ PangeaAPIException: If an API Error happens
461
+
462
+ Examples:
463
+ export_res = await audit.export(verbose=False)
464
+
465
+ # Export may take several dozens of minutes, so polling for the result
466
+ # should be done in a loop. That is omitted here for brevity's sake.
467
+ try:
468
+ await audit.poll_result(request_id=export_res.request_id)
469
+ except AcceptedRequestException:
470
+ # Retry later.
471
+
472
+ # Download the result when it's ready.
473
+ download_res = await audit.download_results(request_id=export_res.request_id)
474
+ download_res.result.dest_url
475
+ # => https://pangea-runtime.s3.amazonaws.com/audit/xxxxx/search_results_[...]
476
+ """
477
+ input = ExportRequest(
478
+ format=format,
479
+ start=start,
480
+ end=end,
481
+ order=order,
482
+ order_by=order_by,
483
+ verbose=verbose,
484
+ )
485
+ try:
486
+ return await self.request.post(
487
+ "v1/export", PangeaResponseResult, data=input.model_dump(exclude_none=True), poll_result=False
488
+ )
489
+ except pexc.AcceptedRequestException as e:
490
+ return e.response
491
+
492
+ async def log_stream(self, data: dict) -> PangeaResponse[PangeaResponseResult]:
493
+ """
494
+ Log streaming endpoint
495
+
496
+ This API allows 3rd party vendors (like Auth0) to stream events to this
497
+ endpoint where the structure of the payload varies across different
498
+ vendors.
499
+
500
+ OperationId: audit_post_v1_log_stream
501
+
502
+ Args:
503
+ data: Event data. The exact schema of this will vary by vendor.
504
+
505
+ Raises:
506
+ AuditException: If an audit based api exception happens
507
+ PangeaAPIException: If an API Error happens
508
+
509
+ Examples:
510
+ data = {
511
+ "logs": [
512
+ {
513
+ "log_id": "some log ID",
514
+ "data": {
515
+ "date": "2024-03-29T17:26:50.193Z",
516
+ "type": "sapi",
517
+ "description": "Create a log stream",
518
+ "client_id": "some client ID",
519
+ "ip": "127.0.0.1",
520
+ "user_agent": "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0",
521
+ "user_id": "some user ID",
522
+ },
523
+ }
524
+ # ...
525
+ ]
526
+ }
527
+
528
+ response = await audit.log_stream(data)
529
+ """
530
+ return await self.request.post("v1/log_stream", PangeaResponseResult, data=data)
531
+
401
532
  async def root(self, tree_size: Optional[int] = None) -> PangeaResponse[RootResult]:
402
533
  """
403
534
  Tamperproof verification
@@ -420,10 +551,14 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
420
551
  response = audit.root(tree_size=7)
421
552
  """
422
553
  input = RootRequest(tree_size=tree_size)
423
- return await self.request.post("v1/root", RootResult, data=input.dict(exclude_none=True))
554
+ return await self.request.post("v1/root", RootResult, data=input.model_dump(exclude_none=True))
424
555
 
425
556
  async def download_results(
426
- self, result_id: str, format: Optional[DownloadFormat] = None
557
+ self,
558
+ result_id: Optional[str] = None,
559
+ format: DownloadFormat = DownloadFormat.CSV,
560
+ request_id: Optional[str] = None,
561
+ return_context: Optional[bool] = None,
427
562
  ) -> PangeaResponse[DownloadResult]:
428
563
  """
429
564
  Download search results
@@ -435,6 +570,8 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
435
570
  Args:
436
571
  result_id: ID returned by the search API.
437
572
  format: Format for the records.
573
+ request_id: ID returned by the export API.
574
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
438
575
 
439
576
  Returns:
440
577
  URL where search results can be downloaded.
@@ -450,8 +587,13 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
450
587
  )
451
588
  """
452
589
 
453
- input = DownloadRequest(result_id=result_id, format=format)
454
- return await self.request.post("v1/download_results", DownloadResult, data=input.dict(exclude_none=True))
590
+ if request_id is None and result_id is None:
591
+ raise ValueError("must pass one of `request_id` or `result_id`")
592
+
593
+ input = DownloadRequest(
594
+ request_id=request_id, result_id=result_id, format=format, return_context=return_context
595
+ )
596
+ return await self.request.post("v1/download_results", DownloadResult, data=input.model_dump(exclude_none=True))
455
597
 
456
598
  async def update_published_roots(self, result: SearchResultOutput):
457
599
  """Fetches series of published root hashes from Arweave
@@ -476,12 +618,31 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
476
618
  for tree_size in tree_sizes:
477
619
  pub_root = None
478
620
  if tree_size in arweave_roots:
479
- pub_root = PublishedRoot(**arweave_roots[tree_size].dict(exclude_none=True))
621
+ pub_root = PublishedRoot(**arweave_roots[tree_size].model_dump(exclude_none=True))
480
622
  pub_root.source = RootSource.ARWEAVE
481
623
  elif self.allow_server_roots:
482
624
  resp = await self.root(tree_size=tree_size)
483
625
  if resp.success and resp.result is not None:
484
- pub_root = PublishedRoot(**resp.result.data.dict(exclude_none=True))
626
+ pub_root = PublishedRoot(**resp.result.data.model_dump(exclude_none=True))
485
627
  pub_root.source = RootSource.PANGEA
486
628
  if pub_root is not None:
487
629
  self.pub_roots[tree_size] = pub_root
630
+
631
+ await self._fix_consistency_proofs(tree_sizes)
632
+
633
+ async def _fix_consistency_proofs(self, tree_sizes: Iterable[int]) -> None:
634
+ # on very rare occasions, the consistency proof in Arweave may be wrong
635
+ # override it with the proof from pangea (not the root hash, just the proof)
636
+ for tree_size in tree_sizes:
637
+ if tree_size not in self.pub_roots or tree_size - 1 not in self.pub_roots:
638
+ continue
639
+
640
+ if self.pub_roots[tree_size].source == RootSource.PANGEA:
641
+ continue
642
+
643
+ if self.verify_consistency_proof(tree_size):
644
+ continue
645
+
646
+ resp = await self.root(tree_size=tree_size)
647
+ if resp.success and resp.result is not None and resp.result.data is not None:
648
+ self.pub_roots[tree_size].consistency_proof = resp.result.data.consistency_proof