pangea-sdk 3.6.1__py3-none-any.whl → 3.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
pangea/response.py CHANGED
@@ -2,14 +2,38 @@
2
2
  # Author: Pangea Cyber Corporation
3
3
  import datetime
4
4
  import enum
5
- from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union
5
+ import os
6
+ from typing import Any, Dict, Generic, List, Optional, Type, Union
6
7
 
7
8
  import aiohttp
8
9
  import requests
9
- from pangea.utils import format_datetime
10
10
  from pydantic import BaseModel
11
+ from typing_extensions import TypeVar
12
+
13
+ from pangea.utils import format_datetime
14
+
15
+
16
+ class AttachedFile(object):
17
+ filename: str
18
+ file: bytes
19
+ content_type: str
20
+
21
+ def __init__(self, filename: str, file: bytes, content_type: str):
22
+ self.filename = filename
23
+ self.file = file
24
+ self.content_type = content_type
25
+
26
+ def save(self, dest_folder: str = "./", filename: Optional[str] = None):
27
+ if filename is None:
28
+ filename = self.filename if self.filename else "default_save_filename"
11
29
 
12
- T = TypeVar("T")
30
+ filepath = os.path.join(dest_folder, filename)
31
+ directory = os.path.dirname(filepath)
32
+ if not os.path.exists(directory):
33
+ os.makedirs(directory)
34
+
35
+ with open(filepath, "wb") as file:
36
+ file.write(self.file)
13
37
 
14
38
 
15
39
  class TransferMethod(str, enum.Enum):
@@ -17,6 +41,7 @@ class TransferMethod(str, enum.Enum):
17
41
  POST_URL = "post-url"
18
42
  PUT_URL = "put-url"
19
43
  SOURCE_URL = "source-url"
44
+ DEST_URL = "dest-url"
20
45
 
21
46
  def __str__(self):
22
47
  return str(self.value)
@@ -117,22 +142,34 @@ class ResponseStatus(str, enum.Enum):
117
142
 
118
143
 
119
144
  class ResponseHeader(APIResponseModel):
120
- """
121
- Pangea response API header.
122
-
123
- Arguments:
124
- request_id -- The request ID.
125
- request_time -- The time the request was issued, ISO8601.
126
- response_time -- The time the response was issued, ISO8601.
127
- status -- Pangea response status
128
- summary -- The summary of the response.
129
- """
145
+ """Pangea response API header."""
130
146
 
131
147
  request_id: str
148
+ """A unique identifier assigned to each request made to the API."""
149
+
132
150
  request_time: str
151
+ """
152
+ Timestamp indicating the exact moment when a request is made to the API.
153
+ """
154
+
133
155
  response_time: str
156
+ """
157
+ Duration it takes for the API to process a request and generate a response.
158
+ """
159
+
134
160
  status: str
161
+ """
162
+ Represents the status or outcome of the API request.
163
+ """
164
+
135
165
  summary: str
166
+ """
167
+ Provides a concise and brief overview of the purpose or primary objective of
168
+ the API endpoint.
169
+ """
170
+
171
+
172
+ T = TypeVar("T", bound=PangeaResponseResult, default=PangeaResponseResult)
136
173
 
137
174
 
138
175
  class PangeaResponse(Generic[T], ResponseHeader):
@@ -141,17 +178,26 @@ class PangeaResponse(Generic[T], ResponseHeader):
141
178
  result: Optional[T] = None
142
179
  pangea_error: Optional[PangeaError] = None
143
180
  accepted_result: Optional[AcceptedResult] = None
144
- result_class: Type[PangeaResponseResult] = PangeaResponseResult
181
+ result_class: Type[T] = PangeaResponseResult # type: ignore[assignment]
145
182
  _json: Any
146
-
147
- def __init__(self, response: requests.Response, result_class: Type[PangeaResponseResult], json: dict):
183
+ attached_files: List[AttachedFile] = []
184
+
185
+ def __init__(
186
+ self,
187
+ response: requests.Response,
188
+ result_class: Type[T],
189
+ json: dict,
190
+ attached_files: List[AttachedFile] = [],
191
+ ):
148
192
  super(PangeaResponse, self).__init__(**json)
149
193
  self._json = json
150
194
  self.raw_response = response
151
195
  self.raw_result = self._json["result"]
152
196
  self.result_class = result_class
197
+ self.attached_files = attached_files
198
+
153
199
  self.result = (
154
- self.result_class(**self.raw_result) # type: ignore[assignment]
200
+ self.result_class(**self.raw_result)
155
201
  if self.raw_result is not None and issubclass(self.result_class, PangeaResponseResult) and self.success
156
202
  else None
157
203
  )
@@ -1,5 +1,6 @@
1
1
  from .audit.audit import Audit
2
2
  from .authn.authn import AuthN
3
+ from .authz import AuthZ
3
4
  from .embargo import Embargo
4
5
  from .file_scan import FileScan
5
6
  from .intel import DomainIntel, FileIntel, IpIntel, UrlIntel, UserIntel
@@ -1,16 +1,23 @@
1
1
  # Copyright 2022 Pangea Cyber Corporation
2
2
  # Author: Pangea Cyber Corporation
3
+ from __future__ import annotations
4
+
3
5
  import datetime
4
6
  import json
5
- from typing import Any, Dict, List, Optional, Union
7
+ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
6
8
 
7
9
  import pangea.exceptions as pexc
8
- from pangea.response import PangeaResponse
10
+ from pangea.config import PangeaConfig
11
+ from pangea.response import PangeaResponse, PangeaResponseResult
9
12
  from pangea.services.audit.exceptions import AuditException, EventCorruption
10
13
  from pangea.services.audit.models import (
14
+ DownloadFormat,
15
+ DownloadRequest,
16
+ DownloadResult,
11
17
  Event,
12
18
  EventEnvelope,
13
19
  EventVerification,
20
+ ExportRequest,
14
21
  LogBulkRequest,
15
22
  LogBulkResult,
16
23
  LogEvent,
@@ -48,9 +55,9 @@ from pangea.utils import canonicalize_nested_json
48
55
 
49
56
  class AuditBase:
50
57
  def __init__(
51
- self, private_key_file: str = "", public_key_info: Dict[str, str] = {}, tenant_id: Optional[str] = None
52
- ):
53
- self.pub_roots: Dict[int, Root] = {}
58
+ self, private_key_file: str = "", public_key_info: dict[str, str] = {}, tenant_id: str | None = None
59
+ ) -> None:
60
+ self.pub_roots: Dict[int, PublishedRoot] = {}
54
61
  self.buffer_data: Optional[str] = None
55
62
  self.signer: Optional[Signer] = Signer(private_key_file) if private_key_file else None
56
63
  self.public_key_info = public_key_info
@@ -182,8 +189,6 @@ class AuditBase:
182
189
  unpublished_root = response.result.unpublished_root # type: ignore[union-attr]
183
190
 
184
191
  if verify_consistency:
185
- self.update_published_roots(response.result) # type: ignore[arg-type]
186
-
187
192
  for search_event in response.result.events: # type: ignore[union-attr]
188
193
  # verify membership proofs
189
194
  if self.can_verify_membership_proof(search_event):
@@ -201,22 +206,9 @@ class AuditBase:
201
206
 
202
207
  return response
203
208
 
204
- def update_published_roots(self, result: SearchOutput):
205
- """Fetches series of published root hashes from Arweave
206
-
207
- This is used for subsequent calls to verify_consistency_proof(). Root hashes
208
- are published on [Arweave](https://arweave.net).
209
-
210
- Args:
211
- result (SearchOutput): PangeaResponse object from previous call to audit.search()
212
-
213
- Raises:
214
- AuditException: If an audit based api exception happens
215
- PangeaAPIException: If an API Error happens
216
- """
217
-
209
+ def _get_tree_sizes_and_roots(self, result: SearchResultOutput) -> Tuple[Set[int], Dict[int, PublishedRoot]]:
218
210
  if not result.root:
219
- return
211
+ return set(), {}
220
212
 
221
213
  tree_sizes = set()
222
214
  for search_event in result.events:
@@ -230,22 +222,11 @@ class AuditBase:
230
222
  tree_sizes.difference_update(self.pub_roots.keys())
231
223
 
232
224
  if tree_sizes:
233
- arweave_roots = get_arweave_published_roots(result.root.tree_name, list(tree_sizes)) # + [result.count])
225
+ arweave_roots = get_arweave_published_roots(result.root.tree_name, list(tree_sizes))
234
226
  else:
235
227
  arweave_roots = {}
236
228
 
237
- # fill the missing roots from the server (if allowed)
238
- for tree_size in tree_sizes:
239
- pub_root = None
240
- if tree_size in arweave_roots:
241
- pub_root = PublishedRoot(**arweave_roots[tree_size].dict(exclude_none=True))
242
- pub_root.source = RootSource.ARWEAVE
243
- elif self.allow_server_roots:
244
- resp = self.root(tree_size=tree_size) # type: ignore[attr-defined]
245
- if resp.success:
246
- pub_root = PublishedRoot(**resp.result.data.dict(exclude_none=True))
247
- pub_root.source = RootSource.PANGEA
248
- self.pub_roots[tree_size] = pub_root # type: ignore[assignment]
229
+ return tree_sizes, arweave_roots
249
230
 
250
231
  def can_verify_membership_proof(self, event: SearchEvent) -> bool:
251
232
  """
@@ -303,7 +284,7 @@ class AuditBase:
303
284
  """
304
285
  return event.published and event.leaf_index is not None and event.leaf_index >= 0 # type: ignore[return-value]
305
286
 
306
- def verify_consistency_proof(self, pub_roots: Dict[int, Root], event: SearchEvent) -> bool:
287
+ def verify_consistency_proof(self, pub_roots: Dict[int, PublishedRoot], event: SearchEvent) -> bool:
307
288
  """
308
289
  Verify consistency proof
309
290
 
@@ -329,7 +310,7 @@ class AuditBase:
329
310
  return False
330
311
 
331
312
  if not self.allow_server_roots and (
332
- curr_root.source != RootSource.ARWEAVE or prev_root.source != RootSource.ARWEAVE # type: ignore[attr-defined]
313
+ curr_root.source != RootSource.ARWEAVE or prev_root.source != RootSource.ARWEAVE
333
314
  ):
334
315
  return False
335
316
 
@@ -355,7 +336,9 @@ class AuditBase:
355
336
  if audit_envelope and audit_envelope.signature and public_key:
356
337
  v = Verifier()
357
338
  verification = v.verify_signature(
358
- audit_envelope.signature, canonicalize_event(audit_envelope.event), public_key # type: ignore[arg-type]
339
+ audit_envelope.signature,
340
+ canonicalize_event(Event(**audit_envelope.event)),
341
+ public_key,
359
342
  )
360
343
  if verification is not None:
361
344
  return EventVerification.PASS if verification else EventVerification.FAIL
@@ -397,14 +380,32 @@ class Audit(ServiceBase, AuditBase):
397
380
 
398
381
  def __init__(
399
382
  self,
400
- token,
401
- config=None,
383
+ token: str,
384
+ config: PangeaConfig | None = None,
402
385
  private_key_file: str = "",
403
- public_key_info: Dict[str, str] = {},
404
- tenant_id: Optional[str] = None,
405
- logger_name="pangea",
406
- config_id: Optional[str] = None,
407
- ):
386
+ public_key_info: dict[str, str] = {},
387
+ tenant_id: str | None = None,
388
+ logger_name: str = "pangea",
389
+ config_id: str | None = None,
390
+ ) -> None:
391
+ """
392
+ Audit client
393
+
394
+ Initializes a new Audit client.
395
+
396
+ Args:
397
+ token: Pangea API token.
398
+ config: Configuration.
399
+ private_key_file: Private key filepath.
400
+ public_key_info: Public key information.
401
+ tenant_id: Tenant ID.
402
+ logger_name: Logger name.
403
+ config_id: Configuration ID.
404
+
405
+ Examples:
406
+ config = PangeaConfig(domain="pangea_domain")
407
+ audit = Audit(token="pangea_token", config=config)
408
+ """
408
409
  # FIXME: Temporary check to deprecate config_id from PangeaConfig.
409
410
  # Delete it when deprecate PangeaConfig.config_id
410
411
  if config_id and config is not None and config.config_id is not None:
@@ -518,8 +519,8 @@ class Audit(ServiceBase, AuditBase):
518
519
  """
519
520
 
520
521
  input = self._get_log_request(event, sign_local=sign_local, verify=verify, verbose=verbose)
521
- response = self.request.post("v1/log", LogResult, data=input.dict(exclude_none=True))
522
- if response.success:
522
+ response: PangeaResponse[LogResult] = self.request.post("v1/log", LogResult, data=input.dict(exclude_none=True))
523
+ if response.success and response.result is not None:
523
524
  self._process_log_result(response.result, verify=verify)
524
525
  return response
525
526
 
@@ -557,9 +558,11 @@ class Audit(ServiceBase, AuditBase):
557
558
  """
558
559
 
559
560
  input = self._get_log_request(events, sign_local=sign_local, verify=False, verbose=verbose)
560
- response = self.request.post("v2/log", LogBulkResult, data=input.dict(exclude_none=True))
561
+ response: PangeaResponse[LogBulkResult] = self.request.post(
562
+ "v2/log", LogBulkResult, data=input.dict(exclude_none=True)
563
+ )
561
564
 
562
- if response.success:
565
+ if response.success and response.result is not None:
563
566
  for result in response.result.results:
564
567
  self._process_log_result(result, verify=True)
565
568
  return response
@@ -599,13 +602,13 @@ class Audit(ServiceBase, AuditBase):
599
602
 
600
603
  try:
601
604
  # Calling to v2 methods will return always a 202.
602
- response = self.request.post(
605
+ response: PangeaResponse[LogBulkResult] = self.request.post(
603
606
  "v2/log_async", LogBulkResult, data=input.dict(exclude_none=True), poll_result=False
604
607
  )
605
608
  except pexc.AcceptedRequestException as e:
606
609
  return e.response
607
610
 
608
- if response.success:
611
+ if response.success and response.result is not None:
609
612
  for result in response.result.results:
610
613
  self._process_log_result(result, verify=True)
611
614
  return response
@@ -619,7 +622,7 @@ class Audit(ServiceBase, AuditBase):
619
622
  end: Optional[Union[datetime.datetime, str]] = None,
620
623
  limit: Optional[int] = None,
621
624
  max_results: Optional[int] = None,
622
- search_restriction: Optional[dict] = None,
625
+ search_restriction: Optional[Dict[str, Sequence[str]]] = None,
623
626
  verbose: Optional[bool] = None,
624
627
  verify_consistency: bool = False,
625
628
  verify_events: bool = True,
@@ -648,7 +651,7 @@ class Audit(ServiceBase, AuditBase):
648
651
  end (datetime, optional): An RFC-3339 formatted timestamp, or relative time adjustment from the current time.
649
652
  limit (int, optional): Optional[int] = None,
650
653
  max_results (int, optional): Maximum number of results to return.
651
- search_restriction (dict, optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
654
+ search_restriction (Dict[str, Sequence[str]], optional): A list of keys to restrict the search results to. Useful for partitioning data available to the query string.
652
655
  verbose (bool, optional): If true, response include root and membership and consistency proofs.
653
656
  verify_consistency (bool): True to verify logs consistency
654
657
  verify_events (bool): True to verify hash events and signatures
@@ -687,7 +690,11 @@ class Audit(ServiceBase, AuditBase):
687
690
  verbose=verbose,
688
691
  )
689
692
 
690
- response = self.request.post("v1/search", SearchOutput, data=input.dict(exclude_none=True))
693
+ response: PangeaResponse[SearchOutput] = self.request.post(
694
+ "v1/search", SearchOutput, data=input.dict(exclude_none=True)
695
+ )
696
+ if verify_consistency and response.result is not None:
697
+ self.update_published_roots(response.result)
691
698
  return self.handle_search_response(response, verify_consistency, verify_events)
692
699
 
693
700
  def results(
@@ -695,6 +702,7 @@ class Audit(ServiceBase, AuditBase):
695
702
  id: str,
696
703
  limit: Optional[int] = 20,
697
704
  offset: Optional[int] = 0,
705
+ assert_search_restriction: Optional[Dict[str, Sequence[str]]] = None,
698
706
  verify_consistency: bool = False,
699
707
  verify_events: bool = True,
700
708
  ) -> PangeaResponse[SearchResultOutput]:
@@ -709,6 +717,7 @@ class Audit(ServiceBase, AuditBase):
709
717
  id (string): the id of a search action, found in `response.result.id`
710
718
  limit (integer, optional): the maximum number of results to return, default is 20
711
719
  offset (integer, optional): the position of the first result to return, default is 0
720
+ assert_search_restriction (Dict[str, Sequence[str]], optional): Assert the requested search results were queried with the exact same search restrictions, to ensure the results comply to the expected restrictions.
712
721
  verify_consistency (bool): True to verify logs consistency
713
722
  verify_events (bool): True to verify hash events and signatures
714
723
  Raises:
@@ -733,10 +742,116 @@ class Audit(ServiceBase, AuditBase):
733
742
  id=id,
734
743
  limit=limit,
735
744
  offset=offset,
745
+ assert_search_restriction=assert_search_restriction,
736
746
  )
737
- response = self.request.post("v1/results", SearchResultOutput, data=input.dict(exclude_none=True))
747
+ response: PangeaResponse[SearchResultOutput] = self.request.post(
748
+ "v1/results", SearchResultOutput, data=input.dict(exclude_none=True)
749
+ )
750
+ if verify_consistency and response.result is not None:
751
+ self.update_published_roots(response.result)
738
752
  return self.handle_results_response(response, verify_consistency, verify_events)
739
753
 
754
+ def export(
755
+ self,
756
+ *,
757
+ format: DownloadFormat = DownloadFormat.CSV,
758
+ start: Optional[datetime.datetime] = None,
759
+ end: Optional[datetime.datetime] = None,
760
+ order: Optional[SearchOrder] = None,
761
+ order_by: Optional[str] = None,
762
+ verbose: bool = True,
763
+ ) -> PangeaResponse[PangeaResponseResult]:
764
+ """
765
+ Export from the audit log
766
+
767
+ Bulk export of data from the Secure Audit Log, with optional filtering.
768
+
769
+ OperationId: audit_post_v1_export
770
+
771
+ Args:
772
+ format: Format for the records.
773
+ start: The start of the time range to perform the search on.
774
+ end: The end of the time range to perform the search on. If omitted,
775
+ then all records up to the latest will be searched.
776
+ order: Specify the sort order of the response.
777
+ order_by: Name of column to sort the results by.
778
+ verbose: Whether or not to include the root hash of the tree and the
779
+ membership proof for each record.
780
+
781
+ Raises:
782
+ AuditException: If an audit based api exception happens
783
+ PangeaAPIException: If an API Error happens
784
+
785
+ Examples:
786
+ export_res = audit.export(verbose=False)
787
+
788
+ # Export may take several dozens of minutes, so polling for the result
789
+ # should be done in a loop. That is omitted here for brevity's sake.
790
+ try:
791
+ audit.poll_result(request_id=export_res.request_id)
792
+ except AcceptedRequestException:
793
+ # Retry later.
794
+
795
+ # Download the result when it's ready.
796
+ download_res = audit.download_results(request_id=export_res.request_id)
797
+ download_res.result.dest_url
798
+ # => https://pangea-runtime.s3.amazonaws.com/audit/xxxxx/search_results_[...]
799
+ """
800
+ input = ExportRequest(
801
+ format=format,
802
+ start=start,
803
+ end=end,
804
+ order=order,
805
+ order_by=order_by,
806
+ verbose=verbose,
807
+ )
808
+ try:
809
+ return self.request.post(
810
+ "v1/export", PangeaResponseResult, data=input.dict(exclude_none=True), poll_result=False
811
+ )
812
+ except pexc.AcceptedRequestException as e:
813
+ return e.response
814
+
815
+ def log_stream(self, data: dict) -> PangeaResponse[PangeaResponseResult]:
816
+ """
817
+ Log streaming endpoint
818
+
819
+ This API allows 3rd party vendors (like Auth0) to stream events to this
820
+ endpoint where the structure of the payload varies across different
821
+ vendors.
822
+
823
+ OperationId: audit_post_v1_log_stream
824
+
825
+ Args:
826
+ data: Event data. The exact schema of this will vary by vendor.
827
+
828
+ Raises:
829
+ AuditException: If an audit based api exception happens
830
+ PangeaAPIException: If an API Error happens
831
+
832
+ Examples:
833
+ data = {
834
+ "logs": [
835
+ {
836
+ "log_id": "some log ID",
837
+ "data": {
838
+ "date": "2024-03-29T17:26:50.193Z",
839
+ "type": "sapi",
840
+ "description": "Create a log stream",
841
+ "client_id": "some client ID",
842
+ "ip": "127.0.0.1",
843
+ "user_agent": "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0",
844
+ "user_id": "some user ID",
845
+ },
846
+ }
847
+ # ...
848
+ ]
849
+ }
850
+
851
+ response = audit.log_stream(data)
852
+ """
853
+ return self.request.post("v1/log_stream", PangeaResponseResult, data=data)
854
+
740
855
  def root(self, tree_size: Optional[int] = None) -> PangeaResponse[RootResult]:
741
856
  """
742
857
  Tamperproof verification
@@ -760,3 +875,74 @@ class Audit(ServiceBase, AuditBase):
760
875
  """
761
876
  input = RootRequest(tree_size=tree_size)
762
877
  return self.request.post("v1/root", RootResult, data=input.dict(exclude_none=True))
878
+
879
+ def download_results(
880
+ self,
881
+ result_id: Optional[str] = None,
882
+ format: DownloadFormat = DownloadFormat.CSV,
883
+ request_id: Optional[str] = None,
884
+ ) -> PangeaResponse[DownloadResult]:
885
+ """
886
+ Download search results
887
+
888
+ Get all search results as a compressed (gzip) CSV file.
889
+
890
+ OperationId: audit_post_v1_download_results
891
+
892
+ Args:
893
+ result_id: ID returned by the search API.
894
+ format: Format for the records.
895
+ request_id: ID returned by the export API.
896
+
897
+ Returns:
898
+ URL where search results can be downloaded.
899
+
900
+ Raises:
901
+ AuditException: If an Audit-based API exception occurs.
902
+ PangeaAPIException: If an API exception occurs.
903
+
904
+ Examples:
905
+ response = audit.download_results(
906
+ result_id="pas_[...]",
907
+ format=DownloadFormat.JSON,
908
+ )
909
+ """
910
+
911
+ if request_id is None and result_id is None:
912
+ raise ValueError("must pass one of `request_id` or `result_id`")
913
+
914
+ input = DownloadRequest(request_id=request_id, result_id=result_id, format=format)
915
+ return self.request.post("v1/download_results", DownloadResult, data=input.dict(exclude_none=True))
916
+
917
+ def update_published_roots(self, result: SearchResultOutput):
918
+ """Fetches series of published root hashes from Arweave
919
+
920
+ This is used for subsequent calls to verify_consistency_proof(). Root hashes
921
+ are published on [Arweave](https://arweave.net).
922
+
923
+ Args:
924
+ result (SearchResultOutput): Result object from previous call to Audit.search() or Audit.results()
925
+
926
+ Raises:
927
+ AuditException: If an audit based api exception happens
928
+ PangeaAPIException: If an API Error happens
929
+ """
930
+
931
+ if not result.root:
932
+ return
933
+
934
+ tree_sizes, arweave_roots = self._get_tree_sizes_and_roots(result)
935
+
936
+ # fill the missing roots from the server (if allowed)
937
+ for tree_size in tree_sizes:
938
+ pub_root = None
939
+ if tree_size in arweave_roots:
940
+ pub_root = PublishedRoot(**arweave_roots[tree_size].dict(exclude_none=True))
941
+ pub_root.source = RootSource.ARWEAVE
942
+ elif self.allow_server_roots:
943
+ resp = self.root(tree_size=tree_size)
944
+ if resp.success and resp.result is not None:
945
+ pub_root = PublishedRoot(**resp.result.data.dict(exclude_none=True))
946
+ pub_root.source = RootSource.PANGEA
947
+ if pub_root is not None:
948
+ self.pub_roots[tree_size] = pub_root
@@ -1,6 +1,5 @@
1
1
  from pangea.exceptions import PangeaException
2
-
3
- from .models import EventEnvelope
2
+ from pangea.services.audit.models import EventEnvelope
4
3
 
5
4
 
6
5
  # Audit SDK Specific Exceptions