pangea-sdk 3.8.0b4__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pangea/__init__.py +1 -2
  2. pangea/asyncio/request.py +17 -22
  3. pangea/asyncio/services/__init__.py +0 -2
  4. pangea/asyncio/services/audit.py +188 -23
  5. pangea/asyncio/services/authn.py +167 -108
  6. pangea/asyncio/services/authz.py +36 -45
  7. pangea/asyncio/services/embargo.py +2 -2
  8. pangea/asyncio/services/file_scan.py +3 -3
  9. pangea/asyncio/services/intel.py +44 -26
  10. pangea/asyncio/services/redact.py +60 -4
  11. pangea/asyncio/services/vault.py +145 -30
  12. pangea/dump_audit.py +1 -1
  13. pangea/request.py +30 -24
  14. pangea/response.py +34 -42
  15. pangea/services/__init__.py +0 -2
  16. pangea/services/audit/audit.py +202 -34
  17. pangea/services/audit/models.py +56 -8
  18. pangea/services/audit/util.py +3 -3
  19. pangea/services/authn/authn.py +116 -65
  20. pangea/services/authn/models.py +88 -4
  21. pangea/services/authz.py +51 -56
  22. pangea/services/base.py +23 -6
  23. pangea/services/embargo.py +2 -2
  24. pangea/services/file_scan.py +3 -2
  25. pangea/services/intel.py +25 -23
  26. pangea/services/redact.py +124 -4
  27. pangea/services/vault/models/common.py +121 -6
  28. pangea/services/vault/models/symmetric.py +2 -2
  29. pangea/services/vault/vault.py +143 -32
  30. pangea/utils.py +20 -109
  31. pangea/verify_audit.py +267 -83
  32. {pangea_sdk-3.8.0b4.dist-info → pangea_sdk-4.0.0.dist-info}/METADATA +12 -20
  33. pangea_sdk-4.0.0.dist-info/RECORD +46 -0
  34. {pangea_sdk-3.8.0b4.dist-info → pangea_sdk-4.0.0.dist-info}/WHEEL +1 -1
  35. pangea/asyncio/__init__.py +0 -1
  36. pangea/asyncio/file_uploader.py +0 -39
  37. pangea/asyncio/services/sanitize.py +0 -185
  38. pangea/asyncio/services/share.py +0 -573
  39. pangea/file_uploader.py +0 -35
  40. pangea/services/sanitize.py +0 -275
  41. pangea/services/share/file_format.py +0 -170
  42. pangea/services/share/share.py +0 -877
  43. pangea_sdk-3.8.0b4.dist-info/RECORD +0 -54
pangea/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
- __version__ = "3.8.0beta4"
1
+ __version__ = "4.0.0"
2
2
 
3
3
  from pangea.asyncio.request import PangeaRequestAsync
4
4
  from pangea.config import PangeaConfig
5
- from pangea.file_uploader import FileUploader
6
5
  from pangea.request import PangeaRequest
7
6
  from pangea.response import PangeaResponse
pangea/asyncio/request.py CHANGED
@@ -3,18 +3,20 @@
3
3
 
4
4
  import asyncio
5
5
  import json
6
- import os
7
6
  import time
8
- from typing import Dict, List, Optional, Tuple, Type, Union
7
+ from typing import Dict, List, Optional, Sequence, Tuple, Type, Union
9
8
 
10
9
  import aiohttp
11
10
  from aiohttp import FormData
11
+ from typing_extensions import TypeVar
12
12
 
13
13
  import pangea.exceptions as pe
14
14
  from pangea.request import MultipartResponse, PangeaRequestBase
15
15
  from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
16
16
  from pangea.utils import default_encoder
17
17
 
18
+ TResult = TypeVar("TResult", bound=PangeaResponseResult)
19
+
18
20
 
19
21
  class PangeaRequestAsync(PangeaRequestBase):
20
22
  """An object that makes direct calls to Pangea Service APIs.
@@ -28,12 +30,12 @@ class PangeaRequestAsync(PangeaRequestBase):
28
30
  async def post(
29
31
  self,
30
32
  endpoint: str,
31
- result_class: Type[PangeaResponseResult],
33
+ result_class: Type[TResult],
32
34
  data: Union[str, Dict] = {},
33
35
  files: List[Tuple] = [],
34
36
  poll_result: bool = True,
35
37
  url: Optional[str] = None,
36
- ) -> PangeaResponse:
38
+ ) -> PangeaResponse[TResult]:
37
39
  """Makes the POST call to a Pangea Service endpoint.
38
40
 
39
41
  Args:
@@ -91,9 +93,7 @@ class PangeaRequestAsync(PangeaRequestBase):
91
93
 
92
94
  return self._check_response(pangea_response)
93
95
 
94
- async def get(
95
- self, path: str, result_class: Type[PangeaResponseResult], check_response: bool = True
96
- ) -> PangeaResponse[Type[PangeaResponseResult]]:
96
+ async def get(self, path: str, result_class: Type[TResult], check_response: bool = True) -> PangeaResponse[TResult]:
97
97
  """Makes the GET call to a Pangea Service endpoint.
98
98
 
99
99
  Args:
@@ -110,7 +110,7 @@ class PangeaRequestAsync(PangeaRequestBase):
110
110
 
111
111
  async with self.session.get(url, headers=self._headers()) as requests_response:
112
112
  await self._check_http_errors(requests_response)
113
- pangea_response = PangeaResponse( # type: ignore[var-annotated]
113
+ pangea_response = PangeaResponse(
114
114
  requests_response, result_class=result_class, json=await requests_response.json()
115
115
  )
116
116
 
@@ -131,11 +131,11 @@ class PangeaRequestAsync(PangeaRequestBase):
131
131
  raise pe.ServiceTemporarilyUnavailable(await resp.json())
132
132
 
133
133
  async def poll_result_by_id(
134
- self, request_id: str, result_class: Union[Type[PangeaResponseResult], Type[dict]], check_response: bool = True
135
- ):
134
+ self, request_id: str, result_class: Type[TResult], check_response: bool = True
135
+ ) -> PangeaResponse[TResult]:
136
136
  path = self._get_poll_path(request_id)
137
137
  self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_once", "url": path}))
138
- return await self.get(path, result_class, check_response=check_response) # type: ignore[arg-type]
138
+ return await self.get(path, result_class, check_response=check_response)
139
139
 
140
140
  async def poll_result_once(self, response: PangeaResponse, check_response: bool = True):
141
141
  request_id = response.request_id
@@ -160,7 +160,7 @@ class PangeaRequestAsync(PangeaRequestBase):
160
160
  if resp.status < 200 or resp.status >= 300:
161
161
  raise pe.PresignedUploadError(f"presigned POST failure: {resp.status}", await resp.text())
162
162
 
163
- async def put_presigned_url(self, url: str, files: List[Tuple]):
163
+ async def put_presigned_url(self, url: str, files: Sequence[Tuple]):
164
164
  # Send put request with file as body
165
165
  resp = await self._http_put(url=url, files=files)
166
166
  self.logger.debug(
@@ -180,7 +180,6 @@ class PangeaRequestAsync(PangeaRequestBase):
180
180
  "service": self.service,
181
181
  "action": "download_file",
182
182
  "url": url,
183
- "filename": filename,
184
183
  "status": "start",
185
184
  }
186
185
  )
@@ -276,7 +275,7 @@ class PangeaRequestAsync(PangeaRequestBase):
276
275
  async def _http_put(
277
276
  self,
278
277
  url: str,
279
- files: List[Tuple],
278
+ files: Sequence[Tuple],
280
279
  headers: Dict = {},
281
280
  ) -> aiohttp.ClientResponse:
282
281
  self.logger.debug(
@@ -296,9 +295,6 @@ class PangeaRequestAsync(PangeaRequestBase):
296
295
  raise AttributeError("files attribute should have at least 1 file")
297
296
 
298
297
  response = await self.request_presigned_url(endpoint=endpoint, result_class=result_class, data=data)
299
- if response.success: # This should only happen when uploading a zero bytes file
300
- return response.raw_response
301
-
302
298
  if response.accepted_result is None:
303
299
  raise pe.PangeaException("No accepted_result field when requesting presigned url")
304
300
  if response.accepted_result.post_url is None:
@@ -318,8 +314,9 @@ class PangeaRequestAsync(PangeaRequestBase):
318
314
  ) -> PangeaResponse:
319
315
  # Send request
320
316
  try:
321
- # This should return 202 (AcceptedRequestException) at least zero size file is sent
322
- return await self.post(endpoint=endpoint, result_class=result_class, data=data, poll_result=False)
317
+ # This should return 202 (AcceptedRequestException)
318
+ resp = await self.post(endpoint=endpoint, result_class=result_class, data=data, poll_result=False)
319
+ raise pe.PresignedURLException("Should return 202", resp)
323
320
  except pe.AcceptedRequestException as e:
324
321
  accepted_exception = e
325
322
  except Exception as e:
@@ -328,9 +325,7 @@ class PangeaRequestAsync(PangeaRequestBase):
328
325
  # Receive 202
329
326
  return await self._poll_presigned_url(accepted_exception.response)
330
327
 
331
- async def _poll_presigned_url(
332
- self, response: PangeaResponse[Type[PangeaResponseResult]]
333
- ) -> PangeaResponse[Type[PangeaResponseResult]]:
328
+ async def _poll_presigned_url(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
334
329
  if response.http_status != 202:
335
330
  raise AttributeError("Response should be 202")
336
331
 
@@ -5,6 +5,4 @@ from .embargo import EmbargoAsync
5
5
  from .file_scan import FileScanAsync
6
6
  from .intel import DomainIntelAsync, FileIntelAsync, IpIntelAsync, UrlIntelAsync, UserIntelAsync
7
7
  from .redact import RedactAsync
8
- from .sanitize import SanitizeAsync
9
- from .share import ShareAsync
10
8
  from .vault import VaultAsync
@@ -1,11 +1,14 @@
1
1
  # Copyright 2022 Pangea Cyber Corporation
2
2
  # Author: Pangea Cyber Corporation
3
+ from __future__ import annotations
4
+
3
5
  import datetime
4
- from typing import Any, Dict, List, Optional, Union
6
+ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
5
7
 
6
8
  import pangea.exceptions as pexc
7
9
  from pangea.asyncio.services.base import ServiceBaseAsync
8
- from pangea.response import PangeaResponse
10
+ from pangea.config import PangeaConfig
11
+ from pangea.response import PangeaResponse, PangeaResponseResult
9
12
  from pangea.services.audit.audit import AuditBase
10
13
  from pangea.services.audit.exceptions import AuditException
11
14
  from pangea.services.audit.models import (
@@ -13,6 +16,7 @@ from pangea.services.audit.models import (
13
16
  DownloadRequest,
14
17
  DownloadResult,
15
18
  Event,
19
+ ExportRequest,
16
20
  LogBulkResult,
17
21
  LogResult,
18
22
  PublishedRoot,
@@ -56,14 +60,33 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
56
60
 
57
61
  def __init__(
58
62
  self,
59
- token,
60
- config=None,
63
+ token: str,
64
+ config: PangeaConfig | None = None,
61
65
  private_key_file: str = "",
62
- public_key_info: Dict[str, str] = {},
63
- tenant_id: Optional[str] = None,
64
- logger_name="pangea",
65
- config_id: Optional[str] = None,
66
- ):
66
+ public_key_info: dict[str, str] = {},
67
+ tenant_id: str | None = None,
68
+ logger_name: str = "pangea",
69
+ config_id: str | None = None,
70
+ ) -> None:
71
+ """
72
+ Audit client
73
+
74
+ Initializes a new Audit client.
75
+
76
+ Args:
77
+ token: Pangea API token.
78
+ config: Configuration.
79
+ private_key_file: Private key filepath.
80
+ public_key_info: Public key information.
81
+ tenant_id: Tenant ID.
82
+ logger_name: Logger name.
83
+ config_id: Configuration ID.
84
+
85
+ Examples:
86
+ config = PangeaConfig(domain="pangea_domain")
87
+ audit = AuditAsync(token="pangea_token", config=config)
88
+ """
89
+
67
90
  # FIXME: Temporary check to deprecate config_id from PangeaConfig.
68
91
  # Delete it when deprecate PangeaConfig.config_id
69
92
  if config_id and config is not None and config.config_id is not None:
@@ -180,7 +203,7 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
180
203
 
181
204
  input = self._get_log_request(event, sign_local=sign_local, verify=verify, verbose=verbose)
182
205
  response: PangeaResponse[LogResult] = await self.request.post(
183
- "v1/log", LogResult, data=input.dict(exclude_none=True)
206
+ "v1/log", LogResult, data=input.model_dump(exclude_none=True)
184
207
  )
185
208
  if response.success and response.result is not None:
186
209
  self._process_log_result(response.result, verify=verify)
@@ -216,7 +239,7 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
216
239
 
217
240
  input = self._get_log_request(events, sign_local=sign_local, verify=False, verbose=verbose)
218
241
  response: PangeaResponse[LogBulkResult] = await self.request.post(
219
- "v2/log", LogBulkResult, data=input.dict(exclude_none=True)
242
+ "v2/log", LogBulkResult, data=input.model_dump(exclude_none=True)
220
243
  )
221
244
  if response.success and response.result is not None:
222
245
  for result in response.result.results:
@@ -254,7 +277,7 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
254
277
  input = self._get_log_request(events, sign_local=sign_local, verify=False, verbose=verbose)
255
278
  try:
256
279
  response: PangeaResponse[LogBulkResult] = await self.request.post(
257
- "v2/log_async", LogBulkResult, data=input.dict(exclude_none=True), poll_result=False
280
+ "v2/log_async", LogBulkResult, data=input.model_dump(exclude_none=True), poll_result=False
258
281
  )
259
282
  except pexc.AcceptedRequestException as e:
260
283
  return e.response
@@ -272,10 +295,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
272
295
  end: Optional[Union[datetime.datetime, str]] = None,
273
296
  limit: Optional[int] = None,
274
297
  max_results: Optional[int] = None,
275
- search_restriction: Optional[dict] = None,
298
+ search_restriction: Optional[Dict[str, Sequence[str]]] = None,
276
299
  verbose: Optional[bool] = None,
277
300
  verify_consistency: bool = False,
278
301
  verify_events: bool = True,
302
+ return_context: Optional[bool] = None,
279
303
  ) -> PangeaResponse[SearchOutput]:
280
304
  """
281
305
  Search the log
@@ -301,10 +325,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
301
325
  end (datetime, optional): An RFC-3339 formatted timestamp, or relative time adjustment from the current time.
302
326
  limit (int, optional): Optional[int] = None,
303
327
  max_results (int, optional): Maximum number of results to return.
304
- search_restriction (dict, optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
328
+ search_restriction (Dict[str, Sequence[str]], optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
305
329
  verbose (bool, optional): If true, response include root and membership and consistency proofs.
306
330
  verify_consistency (bool): True to verify logs consistency
307
331
  verify_events (bool): True to verify hash events and signatures
332
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
308
333
 
309
334
  Raises:
310
335
  AuditException: If an audit based api exception happens
@@ -332,10 +357,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
332
357
  max_results=max_results,
333
358
  search_restriction=search_restriction,
334
359
  verbose=verbose,
360
+ return_context=return_context,
335
361
  )
336
362
 
337
363
  response: PangeaResponse[SearchOutput] = await self.request.post(
338
- "v1/search", SearchOutput, data=input.dict(exclude_none=True)
364
+ "v1/search", SearchOutput, data=input.model_dump(exclude_none=True)
339
365
  )
340
366
  if verify_consistency:
341
367
  await self.update_published_roots(response.result) # type: ignore[arg-type]
@@ -347,8 +373,10 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
347
373
  id: str,
348
374
  limit: Optional[int] = 20,
349
375
  offset: Optional[int] = 0,
376
+ assert_search_restriction: Optional[Dict[str, Sequence[str]]] = None,
350
377
  verify_consistency: bool = False,
351
378
  verify_events: bool = True,
379
+ return_context: Optional[bool] = None,
352
380
  ) -> PangeaResponse[SearchResultOutput]:
353
381
  """
354
382
  Results of a search
@@ -361,8 +389,11 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
361
389
  id (string): the id of a search action, found in `response.result.id`
362
390
  limit (integer, optional): the maximum number of results to return, default is 20
363
391
  offset (integer, optional): the position of the first result to return, default is 0
392
+ assert_search_restriction (Dict[str, Sequence[str]], optional): Assert the requested search results were queried with the exact same search restrictions, to ensure the results comply to the expected restrictions.
364
393
  verify_consistency (bool): True to verify logs consistency
365
394
  verify_events (bool): True to verify hash events and signatures
395
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
396
+
366
397
  Raises:
367
398
  AuditException: If an audit based api exception happens
368
399
  PangeaAPIException: If an API Error happens
@@ -378,7 +409,8 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
378
409
  result_res: PangeaResponse[SearchResultsOutput] = audit.results(
379
410
  id=search_res.result.id,
380
411
  limit=10,
381
- offset=0)
412
+ offset=0,
413
+ assert_search_restriction={'source': ["monitor"]})
382
414
  """
383
415
 
384
416
  if limit <= 0: # type: ignore[operator]
@@ -391,13 +423,116 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
391
423
  id=id,
392
424
  limit=limit,
393
425
  offset=offset,
426
+ assert_search_restriction=assert_search_restriction,
427
+ return_context=return_context,
394
428
  )
395
- response = await self.request.post("v1/results", SearchResultOutput, data=input.dict(exclude_none=True))
429
+ response = await self.request.post("v1/results", SearchResultOutput, data=input.model_dump(exclude_none=True))
396
430
  if verify_consistency and response.result is not None:
397
431
  await self.update_published_roots(response.result)
398
432
 
399
433
  return self.handle_results_response(response, verify_consistency, verify_events)
400
434
 
435
+ async def export(
436
+ self,
437
+ *,
438
+ format: DownloadFormat = DownloadFormat.CSV,
439
+ start: Optional[datetime.datetime] = None,
440
+ end: Optional[datetime.datetime] = None,
441
+ order: Optional[SearchOrder] = None,
442
+ order_by: Optional[str] = None,
443
+ verbose: bool = True,
444
+ ) -> PangeaResponse[PangeaResponseResult]:
445
+ """
446
+ Export from the audit log
447
+
448
+ Bulk export of data from the Secure Audit Log, with optional filtering.
449
+
450
+ OperationId: audit_post_v1_export
451
+
452
+ Args:
453
+ format: Format for the records.
454
+ start: The start of the time range to perform the search on.
455
+ end: The end of the time range to perform the search on. If omitted,
456
+ then all records up to the latest will be searched.
457
+ order: Specify the sort order of the response.
458
+ order_by: Name of column to sort the results by.
459
+ verbose: Whether or not to include the root hash of the tree and the
460
+ membership proof for each record.
461
+
462
+ Raises:
463
+ AuditException: If an audit based api exception happens
464
+ PangeaAPIException: If an API Error happens
465
+
466
+ Examples:
467
+ export_res = await audit.export(verbose=False)
468
+
469
+ # Export may take several dozens of minutes, so polling for the result
470
+ # should be done in a loop. That is omitted here for brevity's sake.
471
+ try:
472
+ await audit.poll_result(request_id=export_res.request_id)
473
+ except AcceptedRequestException:
474
+ # Retry later.
475
+
476
+ # Download the result when it's ready.
477
+ download_res = await audit.download_results(request_id=export_res.request_id)
478
+ download_res.result.dest_url
479
+ # => https://pangea-runtime.s3.amazonaws.com/audit/xxxxx/search_results_[...]
480
+ """
481
+ input = ExportRequest(
482
+ format=format,
483
+ start=start,
484
+ end=end,
485
+ order=order,
486
+ order_by=order_by,
487
+ verbose=verbose,
488
+ )
489
+ try:
490
+ return await self.request.post(
491
+ "v1/export", PangeaResponseResult, data=input.model_dump(exclude_none=True), poll_result=False
492
+ )
493
+ except pexc.AcceptedRequestException as e:
494
+ return e.response
495
+
496
+ async def log_stream(self, data: dict) -> PangeaResponse[PangeaResponseResult]:
497
+ """
498
+ Log streaming endpoint
499
+
500
+ This API allows 3rd party vendors (like Auth0) to stream events to this
501
+ endpoint where the structure of the payload varies across different
502
+ vendors.
503
+
504
+ OperationId: audit_post_v1_log_stream
505
+
506
+ Args:
507
+ data: Event data. The exact schema of this will vary by vendor.
508
+
509
+ Raises:
510
+ AuditException: If an audit based api exception happens
511
+ PangeaAPIException: If an API Error happens
512
+
513
+ Examples:
514
+ data = {
515
+ "logs": [
516
+ {
517
+ "log_id": "some log ID",
518
+ "data": {
519
+ "date": "2024-03-29T17:26:50.193Z",
520
+ "type": "sapi",
521
+ "description": "Create a log stream",
522
+ "client_id": "some client ID",
523
+ "ip": "127.0.0.1",
524
+ "user_agent": "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0",
525
+ "user_id": "some user ID",
526
+ },
527
+ }
528
+ # ...
529
+ ]
530
+ }
531
+
532
+ response = await audit.log_stream(data)
533
+ """
534
+ return await self.request.post("v1/log_stream", PangeaResponseResult, data=data)
535
+
401
536
  async def root(self, tree_size: Optional[int] = None) -> PangeaResponse[RootResult]:
402
537
  """
403
538
  Tamperproof verification
@@ -420,10 +555,14 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
420
555
  response = audit.root(tree_size=7)
421
556
  """
422
557
  input = RootRequest(tree_size=tree_size)
423
- return await self.request.post("v1/root", RootResult, data=input.dict(exclude_none=True))
558
+ return await self.request.post("v1/root", RootResult, data=input.model_dump(exclude_none=True))
424
559
 
425
560
  async def download_results(
426
- self, result_id: str, format: Optional[DownloadFormat] = None
561
+ self,
562
+ result_id: Optional[str] = None,
563
+ format: DownloadFormat = DownloadFormat.CSV,
564
+ request_id: Optional[str] = None,
565
+ return_context: Optional[bool] = None,
427
566
  ) -> PangeaResponse[DownloadResult]:
428
567
  """
429
568
  Download search results
@@ -435,6 +574,8 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
435
574
  Args:
436
575
  result_id: ID returned by the search API.
437
576
  format: Format for the records.
577
+ request_id: ID returned by the export API.
578
+ return_context (bool): Return the context data needed to decrypt secure audit events that have been redacted with format preserving encryption.
438
579
 
439
580
  Returns:
440
581
  URL where search results can be downloaded.
@@ -450,8 +591,13 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
450
591
  )
451
592
  """
452
593
 
453
- input = DownloadRequest(result_id=result_id, format=format)
454
- return await self.request.post("v1/download_results", DownloadResult, data=input.dict(exclude_none=True))
594
+ if request_id is None and result_id is None:
595
+ raise ValueError("must pass one of `request_id` or `result_id`")
596
+
597
+ input = DownloadRequest(
598
+ request_id=request_id, result_id=result_id, format=format, return_context=return_context
599
+ )
600
+ return await self.request.post("v1/download_results", DownloadResult, data=input.model_dump(exclude_none=True))
455
601
 
456
602
  async def update_published_roots(self, result: SearchResultOutput):
457
603
  """Fetches series of published root hashes from Arweave
@@ -476,12 +622,31 @@ class AuditAsync(ServiceBaseAsync, AuditBase):
476
622
  for tree_size in tree_sizes:
477
623
  pub_root = None
478
624
  if tree_size in arweave_roots:
479
- pub_root = PublishedRoot(**arweave_roots[tree_size].dict(exclude_none=True))
625
+ pub_root = PublishedRoot(**arweave_roots[tree_size].model_dump(exclude_none=True))
480
626
  pub_root.source = RootSource.ARWEAVE
481
627
  elif self.allow_server_roots:
482
628
  resp = await self.root(tree_size=tree_size)
483
629
  if resp.success and resp.result is not None:
484
- pub_root = PublishedRoot(**resp.result.data.dict(exclude_none=True))
630
+ pub_root = PublishedRoot(**resp.result.data.model_dump(exclude_none=True))
485
631
  pub_root.source = RootSource.PANGEA
486
632
  if pub_root is not None:
487
633
  self.pub_roots[tree_size] = pub_root
634
+
635
+ await self.fix_consistency_proofs(tree_sizes)
636
+
637
+ async def fix_consistency_proofs(self, tree_sizes: Iterable[int]):
638
+ # on very rare occasions, the consistency proof in Arweave may be wrong
639
+ # override it with the proof from pangea (not the root hash, just the proof)
640
+ for tree_size in tree_sizes:
641
+ if tree_size not in self.pub_roots or tree_size - 1 not in self.pub_roots:
642
+ continue
643
+
644
+ if self.pub_roots[tree_size].source == RootSource.PANGEA:
645
+ continue
646
+
647
+ if self.verify_consistency_proof(tree_size):
648
+ continue
649
+
650
+ resp = await self.root(tree_size=tree_size)
651
+ if resp.success and resp.result is not None and resp.result.data is not None:
652
+ self.pub_roots[tree_size].consistency_proof = resp.result.data.consistency_proof