pangea-sdk 3.7.0__py3-none-any.whl → 3.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pangea/config.py CHANGED
@@ -9,65 +9,57 @@ from typing import Optional
9
9
  class PangeaConfig:
10
10
  """Holds run time configuration information used by SDK components."""
11
11
 
12
+ domain: str = "aws.us.pangea.cloud"
12
13
  """
13
14
  Used to set Pangea domain (and port if needed), it should not include service subdomain
14
15
  just for particular use cases when environment = "local", domain could be set to an url including:
15
16
  scheme (http:// or https://), subdomain, domain and port.
16
-
17
17
  """
18
- domain: str = "aws.us.pangea.cloud"
19
18
 
19
+ environment: str = "production"
20
20
  """
21
21
  Used to generate service url.
22
22
  It should be only 'production' or 'local' in cases of particular services that can run locally as Redact.
23
-
24
23
  """
25
- environment: str = "production"
26
24
 
25
+ config_id: Optional[str] = None
27
26
  """
28
27
  Only used for services that support multiconfig (e.g.: Audit service)
29
28
 
30
29
  @deprecated("config_id will be deprecated from PangeaConfig. Set it on service initialization instead")
31
30
  """
32
- config_id: Optional[str] = None
33
31
 
32
+ insecure: bool = False
34
33
  """
35
34
  Set to true to use plain http
36
-
37
35
  """
38
- insecure: bool = False
39
36
 
37
+ request_retries: int = 3
40
38
  """
41
39
  Number of retries on the initial request
42
-
43
40
  """
44
- request_retries: int = 3
45
41
 
42
+ request_backoff: float = 0.5
46
43
  """
47
44
  Backoff strategy passed to 'requests'
48
-
49
45
  """
50
- request_backoff: float = 0.5
51
46
 
47
+ request_timeout: int = 5
52
48
  """
53
49
  Timeout used on initial request attempts
54
-
55
50
  """
56
- request_timeout: int = 5
57
51
 
52
+ poll_result_timeout: int = 30
58
53
  """
59
54
  Timeout used to poll results after 202 (in secs)
60
-
61
55
  """
62
- poll_result_timeout: int = 30
63
56
 
57
+ queued_retry_enabled: bool = True
64
58
  """
65
59
  Enable queued request retry support
66
60
  """
67
- queued_retry_enabled: bool = True
68
61
 
62
+ custom_user_agent: Optional[str] = None
69
63
  """
70
64
  Extra user agent to be added to request user agent
71
-
72
65
  """
73
- custom_user_agent: Optional[str] = None
pangea/dump_audit.py CHANGED
@@ -10,6 +10,7 @@ from datetime import datetime
10
10
  from typing import Tuple
11
11
 
12
12
  import dateutil.parser
13
+
13
14
  from pangea.response import PangeaResponse
14
15
  from pangea.services import Audit
15
16
  from pangea.services.audit.models import SearchEvent, SearchOrder, SearchOrderBy, SearchOutput, SearchResultOutput
pangea/exceptions.py CHANGED
@@ -30,6 +30,14 @@ class PresignedUploadError(PangeaException):
30
30
  self.body = body
31
31
 
32
32
 
33
+ class DownloadFileError(PangeaException):
34
+ body: str
35
+
36
+ def __init__(self, message: str, body: str):
37
+ super().__init__(message)
38
+ self.body = body
39
+
40
+
33
41
  class PangeaAPIException(PangeaException):
34
42
  """Exceptions raised during API calls"""
35
43
 
pangea/request.py CHANGED
@@ -8,13 +8,25 @@ import time
8
8
  from typing import Dict, List, Optional, Tuple, Type, Union
9
9
 
10
10
  import aiohttp
11
+ import requests
12
+ from requests.adapters import HTTPAdapter, Retry
13
+ from requests.structures import CaseInsensitiveDict
14
+ from requests_toolbelt import MultipartDecoder # type: ignore
15
+
11
16
  import pangea
12
17
  import pangea.exceptions as pe
13
- import requests
14
18
  from pangea.config import PangeaConfig
15
- from pangea.response import PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
19
+ from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
16
20
  from pangea.utils import default_encoder
17
- from requests.adapters import HTTPAdapter, Retry
21
+
22
+
23
+ class MultipartResponse(object):
24
+ pangea_json: Dict[str, str]
25
+ attached_files: List = []
26
+
27
+ def __init__(self, pangea_json: Dict[str, str], attached_files: List = []):
28
+ self.pangea_json = pangea_json
29
+ self.attached_files = attached_files
18
30
 
19
31
 
20
32
  class PangeaRequestBase(object):
@@ -30,9 +42,10 @@ class PangeaRequestBase(object):
30
42
  self._queued_retry_enabled = config.queued_retry_enabled
31
43
 
32
44
  # Custom headers
33
- self._extra_headers = {} # type: ignore[var-annotated]
45
+ self._extra_headers: Dict = {}
34
46
  self._user_agent = ""
35
- self.set_custom_user_agent(config.custom_user_agent) # type: ignore[arg-type]
47
+
48
+ self.set_custom_user_agent(config.custom_user_agent)
36
49
  self._session: Optional[Union[requests.Session, aiohttp.ClientSession]] = None
37
50
 
38
51
  self.logger = logger
@@ -57,7 +70,7 @@ class PangeaRequestBase(object):
57
70
  if isinstance(headers, dict):
58
71
  self._extra_headers = headers
59
72
 
60
- def set_custom_user_agent(self, user_agent: str):
73
+ def set_custom_user_agent(self, user_agent: Optional[str]):
61
74
  self.config.custom_user_agent = user_agent
62
75
  self._user_agent = f"pangea-python/{pangea.__version__}"
63
76
  if self.config.custom_user_agent:
@@ -110,6 +123,16 @@ class PangeaRequestBase(object):
110
123
  self._extra_headers.update(headers)
111
124
  return self._extra_headers
112
125
 
126
+ def _get_filename_from_content_disposition(self, content_disposition: str) -> Optional[str]:
127
+ filename_parts = content_disposition.split("name=")
128
+ if len(filename_parts) > 1:
129
+ return filename_parts[1].split(";")[0].strip('"')
130
+ else:
131
+ return None
132
+
133
+ def _get_filename_from_url(self, url: str) -> Optional[str]:
134
+ return url.split("/")[-1].split("?")[0]
135
+
113
136
  def _check_response(self, response: PangeaResponse) -> PangeaResponse:
114
137
  status = response.status
115
138
  summary = response.summary
@@ -149,7 +172,7 @@ class PangeaRequestBase(object):
149
172
  elif status == ResponseStatus.TREE_NOT_FOUND.value:
150
173
  raise pe.TreeNotFoundException(summary, response)
151
174
  elif status == ResponseStatus.IP_NOT_FOUND.value:
152
- raise pe.IPNotFoundException(summary) # type: ignore[call-arg]
175
+ raise pe.IPNotFoundException(summary, response)
153
176
  elif status == ResponseStatus.BAD_OFFSET.value:
154
177
  raise pe.BadOffsetException(summary, response)
155
178
  elif status == ResponseStatus.FORBIDDEN_VAULT_OPERATION.value:
@@ -157,7 +180,7 @@ class PangeaRequestBase(object):
157
180
  elif status == ResponseStatus.VAULT_ITEM_NOT_FOUND.value:
158
181
  raise pe.VaultItemNotFound(summary, response)
159
182
  elif status == ResponseStatus.NOT_FOUND.value:
160
- raise pe.NotFound(response.raw_response.url if response.raw_response is not None else "", response) # type: ignore[arg-type]
183
+ raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response) # type: ignore[arg-type]
161
184
  elif status == ResponseStatus.INTERNAL_SERVER_ERROR.value:
162
185
  raise pe.InternalServerError(response)
163
186
  elif status == ResponseStatus.ACCEPTED.value:
@@ -200,13 +223,13 @@ class PangeaRequest(PangeaRequestBase):
200
223
  url = self._url(endpoint)
201
224
 
202
225
  # Set config ID if available
203
- if self.config_id and data.get("config_id", None) is None: # type: ignore[union-attr]
204
- data["config_id"] = self.config_id # type: ignore[index]
226
+ if self.config_id and isinstance(data, dict) and data.get("config_id", None) is None:
227
+ data["config_id"] = self.config_id
205
228
 
206
229
  self.logger.debug(
207
230
  json.dumps({"service": self.service, "action": "post", "url": url, "data": data}, default=default_encoder)
208
231
  )
209
- transfer_method = data.get("transfer_method", None) # type: ignore[union-attr]
232
+ transfer_method = data.get("transfer_method", None) if isinstance(data, dict) else None
210
233
 
211
234
  if files is not None and type(data) is dict and (transfer_method == TransferMethod.POST_URL.value):
212
235
  requests_response = self._full_post_presigned_url(
@@ -218,15 +241,68 @@ class PangeaRequest(PangeaRequestBase):
218
241
  )
219
242
 
220
243
  self._check_http_errors(requests_response)
221
- json_resp = requests_response.json()
222
- self.logger.debug(json.dumps({"service": self.service, "action": "post", "url": url, "response": json_resp}))
223
244
 
224
- pangea_response = PangeaResponse(requests_response, result_class=result_class, json=json_resp) # type: ignore[var-annotated]
245
+ if "multipart/form-data" in requests_response.headers.get("content-type", ""):
246
+ multipart_response = self._process_multipart_response(requests_response)
247
+ pangea_response: PangeaResponse = PangeaResponse(
248
+ requests_response,
249
+ result_class=result_class,
250
+ json=multipart_response.pangea_json,
251
+ attached_files=multipart_response.attached_files,
252
+ )
253
+ else:
254
+ try:
255
+ json_resp = requests_response.json()
256
+ self.logger.debug(
257
+ json.dumps({"service": self.service, "action": "post", "url": url, "response": json_resp})
258
+ )
259
+
260
+ pangea_response = PangeaResponse(requests_response, result_class=result_class, json=json_resp)
261
+ except requests.exceptions.JSONDecodeError as e:
262
+ raise pe.PangeaException(f"Failed to decode json response. {e}. Body: {requests_response.text}")
263
+
225
264
  if poll_result:
226
265
  pangea_response = self._handle_queued_result(pangea_response)
227
266
 
228
267
  return self._check_response(pangea_response)
229
268
 
269
+ def _get_pangea_json(self, decoder: MultipartDecoder) -> Optional[Dict]:
270
+ # Iterate through parts
271
+ for i, part in enumerate(decoder.parts):
272
+ if i == 0:
273
+ json_str = part.content.decode("utf-8")
274
+ return json.loads(json_str)
275
+
276
+ return None
277
+
278
+ def _get_attached_files(self, decoder: MultipartDecoder) -> List[AttachedFile]:
279
+ files = []
280
+
281
+ for i, part in enumerate(decoder.parts):
282
+ content_type = part.headers.get(b"Content-Type", b"").decode("utf-8")
283
+ # if "application/octet-stream" in content_type:
284
+ if i > 0:
285
+ content_disposition = part.headers.get(b"Content-Disposition", b"").decode("utf-8")
286
+ name = self._get_filename_from_content_disposition(content_disposition)
287
+ if name is None:
288
+ name = f"default_file_name_{i}"
289
+
290
+ files.append(AttachedFile(name, part.content, content_type))
291
+
292
+ return files
293
+
294
+ def _process_multipart_response(self, resp: requests.Response) -> MultipartResponse:
295
+ # Parse the multipart response
296
+ decoder = MultipartDecoder.from_response(resp)
297
+
298
+ pangea_json = self._get_pangea_json(decoder)
299
+ self.logger.debug(
300
+ json.dumps({"service": self.service, "action": "multipart response", "response": pangea_json})
301
+ )
302
+
303
+ attached_files = self._get_attached_files(decoder)
304
+ return MultipartResponse(pangea_json, attached_files) # type: ignore
305
+
230
306
  def _check_http_errors(self, resp: requests.Response):
231
307
  if resp.status_code == 503:
232
308
  raise pe.ServiceTemporarilyUnavailable(resp.json())
@@ -268,48 +344,8 @@ class PangeaRequest(PangeaRequestBase):
268
344
 
269
345
  return data, files
270
346
 
271
- def _post_presigned_url(
272
- self,
273
- endpoint: str,
274
- result_class: Type[PangeaResponseResult],
275
- data: Union[str, Dict] = {},
276
- files: Optional[List[Tuple]] = None,
277
- ):
278
- if len(files) == 0: # type: ignore[arg-type]
279
- raise AttributeError("files attribute should have at least 1 file")
280
-
281
- # Send request
282
- try:
283
- # This should return 202 (AcceptedRequestException)
284
- resp = self.post(endpoint=endpoint, result_class=result_class, data=data, poll_result=False)
285
- raise pe.PresignedURLException("Should return 202", resp)
286
-
287
- except pe.AcceptedRequestException as e:
288
- accepted_exception = e
289
- except Exception as e:
290
- raise e
291
-
292
- # Receive 202 with accepted_status
293
- result = self._poll_presigned_url(accepted_exception) # type: ignore[arg-type]
294
- data_to_presigned = result.accepted_status.upload_details # type: ignore[attr-defined]
295
- presigned_url = result.accepted_status.upload_url # type: ignore[attr-defined]
296
-
297
- # Send multipart request with file and upload_details as body
298
- resp = self._http_post(url=presigned_url, data=data_to_presigned, files=files, multipart_post=False) # type: ignore[assignment]
299
- self.logger.debug(
300
- json.dumps(
301
- {"service": self.service, "action": "post presigned", "url": presigned_url, "response": resp.text}, # type: ignore[attr-defined]
302
- default=default_encoder,
303
- )
304
- )
305
-
306
- if resp.status_code < 200 or resp.status_code >= 300: # type: ignore[attr-defined]
307
- raise pe.PresignedUploadError(f"presigned POST failure: {resp.status_code}", resp.text) # type: ignore[attr-defined]
308
-
309
- return accepted_exception.response.raw_response
310
-
311
- def _handle_queued_result(self, response: PangeaResponse) -> PangeaResponse:
312
- if self._queued_retry_enabled and response.raw_response.status_code == 202: # type: ignore[union-attr]
347
+ def _handle_queued_result(self, response: PangeaResponse) -> PangeaResponse[Type[PangeaResponseResult]]:
348
+ if self._queued_retry_enabled and response.http_status == 202:
313
349
  self.logger.debug(
314
350
  json.dumps(
315
351
  {"service": self.service, "action": "poll_result", "response": response.json},
@@ -336,7 +372,9 @@ class PangeaRequest(PangeaRequestBase):
336
372
  self.logger.debug(json.dumps({"service": self.service, "action": "get", "url": url}))
337
373
  requests_response = self.session.get(url, headers=self._headers())
338
374
  self._check_http_errors(requests_response)
339
- pangea_response = PangeaResponse(requests_response, result_class=result_class, json=requests_response.json()) # type: ignore[var-annotated]
375
+ pangea_response: PangeaResponse = PangeaResponse(
376
+ requests_response, result_class=result_class, json=requests_response.json()
377
+ )
340
378
 
341
379
  self.logger.debug(
342
380
  json.dumps(
@@ -350,14 +388,55 @@ class PangeaRequest(PangeaRequestBase):
350
388
 
351
389
  return self._check_response(pangea_response)
352
390
 
391
+ def download_file(self, url: str, filename: Optional[str] = None) -> AttachedFile:
392
+ self.logger.debug(
393
+ json.dumps(
394
+ {
395
+ "service": self.service,
396
+ "action": "download_file",
397
+ "url": url,
398
+ "filename": filename,
399
+ "status": "start",
400
+ }
401
+ )
402
+ )
403
+ response = self.session.get(url, headers={})
404
+ if response.status_code == 200:
405
+ if filename is None:
406
+ content_disposition = response.headers.get(b"Content-Disposition", b"").decode("utf-8")
407
+ filename = self._get_filename_from_content_disposition(content_disposition)
408
+ if filename is None:
409
+ filename = self._get_filename_from_url(url)
410
+ if filename is None:
411
+ filename = "default_filename"
412
+
413
+ content_type = response.headers.get(b"Content-Type", b"").decode("utf-8")
414
+
415
+ self.logger.debug(
416
+ json.dumps(
417
+ {
418
+ "service": self.service,
419
+ "action": "download_file",
420
+ "url": url,
421
+ "filename": filename,
422
+ "status": "success",
423
+ }
424
+ )
425
+ )
426
+ return AttachedFile(filename=filename, file=response.content, content_type=content_type)
427
+ else:
428
+ raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
429
+
353
430
  def poll_result_by_id(
354
- self, request_id: str, result_class: Union[Type[PangeaResponseResult], dict], check_response: bool = True
431
+ self, request_id: str, result_class: Union[Type[PangeaResponseResult], Type[dict]], check_response: bool = True
355
432
  ):
356
433
  path = self._get_poll_path(request_id)
357
434
  self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_once", "url": path}))
358
435
  return self.get(path, result_class, check_response=check_response) # type: ignore[arg-type]
359
436
 
360
- def poll_result_once(self, response: PangeaResponse, check_response: bool = True):
437
+ def poll_result_once(
438
+ self, response: PangeaResponse, check_response: bool = True
439
+ ) -> PangeaResponse[Type[PangeaResponseResult]]:
361
440
  request_id = response.request_id
362
441
  if not request_id:
363
442
  raise pe.PangeaException("Poll result error: response did not include a 'request_id'")
@@ -422,7 +501,8 @@ class PangeaRequest(PangeaRequestBase):
422
501
  self.logger.debug(
423
502
  json.dumps({"service": self.service, "action": "http_put", "url": url}, default=default_encoder)
424
503
  )
425
- return self.session.put(url, headers=headers, files=files)
504
+ _, value = files[0]
505
+ return self.session.put(url, headers=headers, data=value[1])
426
506
 
427
507
  def _full_post_presigned_url(
428
508
  self,
@@ -431,17 +511,22 @@ class PangeaRequest(PangeaRequestBase):
431
511
  data: Union[str, Dict] = {},
432
512
  files: Optional[List[Tuple]] = None,
433
513
  ):
434
- if len(files) == 0: # type: ignore[arg-type]
514
+ if files is None or len(files) == 0:
435
515
  raise AttributeError("files attribute should have at least 1 file")
436
516
 
437
517
  response = self.request_presigned_url(endpoint=endpoint, result_class=result_class, data=data)
438
- data_to_presigned = response.accepted_result.post_form_data # type: ignore[union-attr]
439
- presigned_url = response.accepted_result.post_url # type: ignore[union-attr]
518
+ if response.accepted_result is None:
519
+ raise pe.PangeaException("No accepted_result field when requesting presigned url")
520
+ if response.accepted_result.post_url is None:
521
+ raise pe.PresignedURLException("No presigned url", response)
522
+
523
+ data_to_presigned = response.accepted_result.post_form_data
524
+ presigned_url = response.accepted_result.post_url
440
525
 
441
- self.post_presigned_url(url=presigned_url, data=data_to_presigned, files=files) # type: ignore[arg-type]
526
+ self.post_presigned_url(url=presigned_url, data=data_to_presigned, files=files)
442
527
  return response.raw_response
443
528
 
444
- def _poll_result_retry(self, response: PangeaResponse) -> PangeaResponse:
529
+ def _poll_result_retry(self, response: PangeaResponse) -> PangeaResponse[Type[PangeaResponseResult]]:
445
530
  retry_count = 1
446
531
  start = time.time()
447
532
 
@@ -453,7 +538,9 @@ class PangeaRequest(PangeaRequestBase):
453
538
  self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_retry", "step": "exit"}))
454
539
  return self._check_response(response)
455
540
 
456
- def _poll_presigned_url(self, response: PangeaResponse) -> PangeaResponse:
541
+ def _poll_presigned_url(
542
+ self, response: PangeaResponse[Type[PangeaResponseResult]]
543
+ ) -> PangeaResponse[Type[PangeaResponseResult]]:
457
544
  if response.http_status != 202:
458
545
  raise AttributeError("Response should be 202")
459
546
 
pangea/response.py CHANGED
@@ -2,21 +2,47 @@
2
2
  # Author: Pangea Cyber Corporation
3
3
  import datetime
4
4
  import enum
5
+ import os
5
6
  from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union
6
7
 
7
8
  import aiohttp
8
9
  import requests
9
- from pangea.utils import format_datetime
10
10
  from pydantic import BaseModel
11
11
 
12
+ from pangea.utils import format_datetime
13
+
12
14
  T = TypeVar("T")
13
15
 
14
16
 
17
+ class AttachedFile(object):
18
+ filename: str
19
+ file: bytes
20
+ content_type: str
21
+
22
+ def __init__(self, filename: str, file: bytes, content_type: str):
23
+ self.filename = filename
24
+ self.file = file
25
+ self.content_type = content_type
26
+
27
+ def save(self, dest_folder: str = "./", filename: Optional[str] = None):
28
+ if filename is None:
29
+ filename = self.filename if self.filename else "default_save_filename"
30
+
31
+ filepath = os.path.join(dest_folder, filename)
32
+ directory = os.path.dirname(filepath)
33
+ if not os.path.exists(directory):
34
+ os.makedirs(directory)
35
+
36
+ with open(filepath, "wb") as file:
37
+ file.write(self.file)
38
+
39
+
15
40
  class TransferMethod(str, enum.Enum):
16
41
  MULTIPART = "multipart"
17
42
  POST_URL = "post-url"
18
43
  PUT_URL = "put-url"
19
44
  SOURCE_URL = "source-url"
45
+ DEST_URL = "dest-url"
20
46
 
21
47
  def __str__(self):
22
48
  return str(self.value)
@@ -141,15 +167,24 @@ class PangeaResponse(Generic[T], ResponseHeader):
141
167
  result: Optional[T] = None
142
168
  pangea_error: Optional[PangeaError] = None
143
169
  accepted_result: Optional[AcceptedResult] = None
144
- result_class: Type[PangeaResponseResult] = PangeaResponseResult
170
+ result_class: Union[Type[PangeaResponseResult], Type[dict]] = PangeaResponseResult
145
171
  _json: Any
146
-
147
- def __init__(self, response: requests.Response, result_class: Type[PangeaResponseResult], json: dict):
172
+ attached_files: List[AttachedFile] = []
173
+
174
+ def __init__(
175
+ self,
176
+ response: requests.Response,
177
+ result_class: Union[Type[PangeaResponseResult], Type[dict]],
178
+ json: dict,
179
+ attached_files: List[AttachedFile] = [],
180
+ ):
148
181
  super(PangeaResponse, self).__init__(**json)
149
182
  self._json = json
150
183
  self.raw_response = response
151
184
  self.raw_result = self._json["result"]
152
185
  self.result_class = result_class
186
+ self.attached_files = attached_files
187
+
153
188
  self.result = (
154
189
  self.result_class(**self.raw_result) # type: ignore[assignment]
155
190
  if self.raw_result is not None and issubclass(self.result_class, PangeaResponseResult) and self.success