pangea-sdk 6.5.0b1__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pangea/request.py CHANGED
@@ -11,18 +11,20 @@ import json
11
11
  import logging
12
12
  import time
13
13
  from collections.abc import Iterable, Mapping
14
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast, overload
14
+ from random import random
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast
15
16
 
16
17
  import requests
17
- from pydantic import BaseModel, TypeAdapter
18
+ from pydantic import BaseModel
18
19
  from pydantic_core import to_jsonable_python
19
- from requests.adapters import HTTPAdapter, Retry
20
+ from requests.adapters import HTTPAdapter
20
21
  from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
21
- from typing_extensions import Literal, TypeAlias, TypeVar, override
22
+ from typing_extensions import TypeAlias, TypeVar, override
22
23
  from yarl import URL
23
24
 
24
25
  import pangea
25
26
  import pangea.exceptions as pe
27
+ from pangea._constants import MAX_RETRY_DELAY, RETRYABLE_HTTP_CODES
26
28
  from pangea.config import PangeaConfig
27
29
  from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
28
30
  from pangea.utils import default_encoder
@@ -46,6 +48,70 @@ _Files: TypeAlias = Union[Mapping[str, _FileSpec], Iterable[tuple[str, _FileSpec
46
48
  _HeadersUpdateMapping: TypeAlias = Mapping[str, str]
47
49
 
48
50
 
51
+ class PangeaHTTPAdapter(HTTPAdapter):
52
+ """Custom HTTP adapter that keeps track of retried request IDs."""
53
+
54
+ @override
55
+ def __init__(self, config: PangeaConfig, *args, **kwargs):
56
+ super().__init__(*args, **kwargs)
57
+ self.config = config
58
+
59
+ @override
60
+ def send(
61
+ self,
62
+ request: requests.PreparedRequest,
63
+ stream: bool = False,
64
+ timeout: None | float | tuple[float, float] | tuple[float, None] = None,
65
+ verify: bool | str = True,
66
+ cert: None | bytes | str | tuple[bytes | str, bytes | str] = None,
67
+ proxies: Mapping[str, str] | None = None,
68
+ ) -> requests.Response:
69
+ max_retries = self.config.request_retries
70
+ request_ids = set[str]()
71
+ retries_taken = 0
72
+ for retries_taken in range(max_retries + 1):
73
+ remaining_retries = max_retries - retries_taken
74
+
75
+ if len(request_ids) > 0:
76
+ request.headers["X-Pangea-Retried-Request-Ids"] = ",".join(request_ids)
77
+
78
+ response = super().send(request, stream, timeout, verify, cert, proxies)
79
+
80
+ request_id = response.headers.get("x-request-id")
81
+ if request_id:
82
+ request_ids.add(request_id)
83
+
84
+ try:
85
+ response.raise_for_status()
86
+ except requests.HTTPError as error:
87
+ if remaining_retries > 0 and self._should_retry(error.response):
88
+ error.response.close()
89
+ self._sleep_for_retry(retries_taken=retries_taken, max_retries=max_retries, config=self.config)
90
+ continue
91
+
92
+ break
93
+
94
+ break
95
+
96
+ return response
97
+
98
+ def _calculate_retry_timeout(self, remaining_retries: int, config: PangeaConfig) -> float:
99
+ max_retries = config.request_retries
100
+ nb_retries = min(max_retries - remaining_retries, 1000)
101
+ sleep_seconds = min(config.request_backoff * pow(2.0, nb_retries), MAX_RETRY_DELAY)
102
+ jitter = 1 - 0.25 * random()
103
+ timeout = sleep_seconds * jitter
104
+ return max(timeout, 0)
105
+
106
+ def _sleep_for_retry(self, *, retries_taken: int, max_retries: int, config: PangeaConfig) -> None:
107
+ remaining_retries = max_retries - retries_taken
108
+ timeout = self._calculate_retry_timeout(remaining_retries, config)
109
+ time.sleep(timeout)
110
+
111
+ def _should_retry(self, response: requests.Response) -> bool:
112
+ return response.status_code in RETRYABLE_HTTP_CODES
113
+
114
+
49
115
  class MultipartResponse:
50
116
  pangea_json: Dict[str, str]
51
117
  attached_files: List = []
@@ -111,8 +177,8 @@ class PangeaRequestBase:
111
177
 
112
178
  return self._queued_retry_enabled
113
179
 
114
- def _get_delay(self, retry_count, start):
115
- delay = retry_count * retry_count
180
+ def _get_delay(self, retry_count: int, start: float) -> float:
181
+ delay: float = retry_count * retry_count
116
182
  now = time.time()
117
183
  # if with this delay exceed timeout, reduce delay
118
184
  if now - start + delay >= self.config.poll_result_timeout:
@@ -120,10 +186,10 @@ class PangeaRequestBase:
120
186
 
121
187
  return delay
122
188
 
123
- def _reach_timeout(self, start):
189
+ def _reach_timeout(self, start: float) -> bool:
124
190
  return time.time() - start >= self.config.poll_result_timeout
125
191
 
126
- def _get_poll_path(self, request_id: str):
192
+ def _get_poll_path(self, request_id: str) -> str:
127
193
  return f"request/{request_id}"
128
194
 
129
195
  def _url(self, path: str) -> str:
@@ -220,24 +286,6 @@ class PangeaRequest(PangeaRequestBase):
220
286
  def __del__(self) -> None:
221
287
  self.session.close()
222
288
 
223
- def delete(self, endpoint: str) -> None:
224
- """
225
- Makes a DELETE call to a Pangea endpoint.
226
-
227
- Args:
228
- endpoint: The Pangea API endpoint.
229
- """
230
-
231
- url = self._url(endpoint)
232
-
233
- self.logger.debug(
234
- json.dumps({"service": self.service, "action": "delete", "url": url}, default=default_encoder)
235
- )
236
-
237
- requests_response = self._http_delete(url, headers=self._headers())
238
- self._check_http_errors(requests_response)
239
-
240
- @overload
241
289
  def post(
242
290
  self,
243
291
  endpoint: str,
@@ -246,62 +294,18 @@ class PangeaRequest(PangeaRequestBase):
246
294
  files: Optional[list[Tuple]] = None,
247
295
  poll_result: bool = True,
248
296
  url: Optional[str] = None,
249
- *,
250
- pangea_response: Literal[True] = True,
251
297
  ) -> PangeaResponse[TResult]:
252
- """
253
- Makes the POST call to a Pangea Service endpoint.
298
+ """Makes the POST call to a Pangea Service endpoint.
254
299
 
255
300
  Args:
256
- endpoint: The Pangea Service API endpoint.
257
- data: The POST body payload object
301
+ endpoint(str): The Pangea Service API endpoint.
302
+ data(dict): The POST body payload object
258
303
 
259
304
  Returns:
260
305
  PangeaResponse which contains the response in its entirety and
261
306
  various properties to retrieve individual fields
262
307
  """
263
308
 
264
- @overload
265
- def post(
266
- self,
267
- endpoint: str,
268
- result_class: Type[TResult],
269
- data: str | BaseModel | Mapping[str, Any] | None = None,
270
- files: Optional[list[Tuple]] = None,
271
- poll_result: bool = True,
272
- url: Optional[str] = None,
273
- *,
274
- pangea_response: Literal[False],
275
- ) -> TResult:
276
- """
277
- Makes the POST call to a Pangea Service endpoint.
278
-
279
- Args:
280
- endpoint: The Pangea Service API endpoint.
281
- data: The POST body payload object
282
- """
283
-
284
- def post(
285
- self,
286
- endpoint: str,
287
- result_class: Type[TResult],
288
- data: str | BaseModel | Mapping[str, Any] | None = None,
289
- files: Optional[list[Tuple]] = None,
290
- poll_result: bool = True,
291
- url: Optional[str] = None,
292
- *,
293
- pangea_response: bool = True,
294
- ) -> PangeaResponse[TResult] | TResult:
295
- """
296
- Makes a POST call to a Pangea Service endpoint.
297
-
298
- Args:
299
- endpoint: The Pangea Service API endpoint.
300
- data: The POST body payload object
301
- pangea_response: Whether or not the response body follows Pangea's
302
- standard response schema
303
- """
304
-
305
309
  if isinstance(data, BaseModel):
306
310
  data = data.model_dump(exclude_none=True)
307
311
 
@@ -339,13 +343,9 @@ class PangeaRequest(PangeaRequestBase):
339
343
 
340
344
  self._check_http_errors(requests_response)
341
345
 
342
- if not pangea_response:
343
- type_adapter = TypeAdapter(result_class)
344
- return type_adapter.validate_python(requests_response.json())
345
-
346
346
  if "multipart/form-data" in requests_response.headers.get("content-type", ""):
347
347
  multipart_response = self._process_multipart_response(requests_response)
348
- pangea_response_obj: PangeaResponse = PangeaResponse(
348
+ pangea_response: PangeaResponse = PangeaResponse(
349
349
  requests_response,
350
350
  result_class=result_class,
351
351
  json=multipart_response.pangea_json,
@@ -358,14 +358,14 @@ class PangeaRequest(PangeaRequestBase):
358
358
  json.dumps({"service": self.service, "action": "post", "url": url, "response": json_resp})
359
359
  )
360
360
 
361
- pangea_response_obj = PangeaResponse(requests_response, result_class=result_class, json=json_resp)
361
+ pangea_response = PangeaResponse(requests_response, result_class=result_class, json=json_resp)
362
362
  except requests.exceptions.JSONDecodeError as e:
363
363
  raise pe.PangeaException(f"Failed to decode json response. {e}. Body: {requests_response.text}") from e
364
364
 
365
365
  if poll_result:
366
- pangea_response_obj = self._handle_queued_result(pangea_response_obj)
366
+ pangea_response = self._handle_queued_result(pangea_response)
367
367
 
368
- return self._check_response(pangea_response_obj)
368
+ return self._check_response(pangea_response)
369
369
 
370
370
  def _get_pangea_json(self, decoder: MultipartDecoder) -> Optional[Dict]:
371
371
  # Iterate through parts
@@ -408,14 +408,6 @@ class PangeaRequest(PangeaRequestBase):
408
408
  if resp.status_code == 503:
409
409
  raise pe.ServiceTemporarilyUnavailable(resp.json())
410
410
 
411
- def _http_delete(
412
- self,
413
- url: str,
414
- *,
415
- headers: Mapping[str, str | bytes | None] = {},
416
- ) -> requests.Response:
417
- return self.session.delete(url, headers=headers)
418
-
419
411
  def _http_post(
420
412
  self,
421
413
  url: str,
@@ -460,98 +452,37 @@ class PangeaRequest(PangeaRequestBase):
460
452
 
461
453
  return response
462
454
 
463
- @overload
464
- def get(
465
- self,
466
- path: str,
467
- result_class: Type[TResult],
468
- check_response: bool = True,
469
- *,
470
- params: (
471
- Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
472
- | None
473
- ) = None,
474
- pangea_response: Literal[True] = True,
475
- ) -> PangeaResponse[TResult]:
476
- """
477
- Makes the GET call to a Pangea Service endpoint.
455
+ def get(self, path: str, result_class: Type[TResult], check_response: bool = True) -> PangeaResponse[TResult]:
456
+ """Makes the GET call to a Pangea Service endpoint.
478
457
 
479
458
  Args:
480
- path: Additional URL path
481
- params: Dictionary of querystring data to attach to the request
459
+ endpoint(str): The Pangea Service API endpoint.
460
+ path(str): Additional URL path
482
461
 
483
462
  Returns:
484
463
  PangeaResponse which contains the response in its entirety and
485
464
  various properties to retrieve individual fields
486
465
  """
487
466
 
488
- @overload
489
- def get(
490
- self,
491
- path: str,
492
- result_class: Type[TResult],
493
- check_response: bool = True,
494
- *,
495
- params: (
496
- Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
497
- | None
498
- ) = None,
499
- pangea_response: Literal[False] = False,
500
- ) -> TResult:
501
- """
502
- Makes the GET call to a Pangea Service endpoint.
503
-
504
- Args:
505
- path: Additional URL path
506
- params: Dictionary of querystring data to attach to the request
507
- """
508
-
509
- def get(
510
- self,
511
- path: str,
512
- result_class: Type[TResult],
513
- check_response: bool = True,
514
- *,
515
- params: (
516
- Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
517
- | None
518
- ) = None,
519
- pangea_response: bool = True,
520
- ) -> PangeaResponse[TResult] | TResult:
521
- """
522
- Makes the GET call to a Pangea Service endpoint.
523
-
524
- Args:
525
- path: Additional URL path
526
- params: Dictionary of querystring data to attach to the request
527
- pangea_response: Whether or not the response body follows Pangea's
528
- standard response schema
529
- """
530
-
531
467
  url = self._url(path)
532
468
  self.logger.debug(json.dumps({"service": self.service, "action": "get", "url": url}))
533
- requests_response = self.session.get(url, params=params, headers=self._headers())
469
+ requests_response = self.session.get(url, headers=self._headers())
534
470
  self._check_http_errors(requests_response)
535
-
536
- if not pangea_response:
537
- type_adapter = TypeAdapter(result_class)
538
- return type_adapter.validate_python(requests_response.json())
539
-
540
- pangea_response_obj: PangeaResponse = PangeaResponse(
471
+ pangea_response: PangeaResponse = PangeaResponse(
541
472
  requests_response, result_class=result_class, json=requests_response.json()
542
473
  )
543
474
 
544
475
  self.logger.debug(
545
476
  json.dumps(
546
- {"service": self.service, "action": "get", "url": url, "response": pangea_response_obj.json},
477
+ {"service": self.service, "action": "get", "url": url, "response": pangea_response.json},
547
478
  default=default_encoder,
548
479
  )
549
480
  )
550
481
 
551
482
  if check_response is False:
552
- return pangea_response_obj
483
+ return pangea_response
553
484
 
554
- return self._check_response(pangea_response_obj)
485
+ return self._check_response(pangea_response)
555
486
 
556
487
  def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
557
488
  """
@@ -763,13 +694,7 @@ class PangeaRequest(PangeaRequestBase):
763
694
 
764
695
  @override
765
696
  def _init_session(self) -> requests.Session:
766
- retry_config = Retry(
767
- total=self.config.request_retries,
768
- backoff_factor=self.config.request_backoff,
769
- status_forcelist=[500, 502, 503, 504],
770
- )
771
-
772
- adapter = HTTPAdapter(max_retries=retry_config)
697
+ adapter = PangeaHTTPAdapter(config=self.config)
773
698
  session = requests.Session()
774
699
 
775
700
  session.mount("http://", adapter)
pangea/response.py CHANGED
@@ -18,6 +18,8 @@ from typing_extensions import TypeVar
18
18
 
19
19
  from pangea.utils import format_datetime
20
20
 
21
+ __all__ = ("PangeaResponse", "PangeaResponseResult", "TransferMethod")
22
+
21
23
 
22
24
  class AttachedFile:
23
25
  filename: str
@@ -243,7 +245,9 @@ class PangeaResponse(ResponseHeader, Generic[T]):
243
245
 
244
246
  @property
245
247
  def http_status(self) -> int: # type: ignore[return]
246
- if self.raw_response:
248
+ # Must be an explicit None check because Response's boolean
249
+ # representation is equal to whether or not the response is OK.
250
+ if self.raw_response is not None:
247
251
  if isinstance(self.raw_response, aiohttp.ClientResponse):
248
252
  return self.raw_response.status
249
253
  else:
@@ -1,5 +1,3 @@
1
- # ruff: noqa: F401
2
-
3
1
  from .ai_guard import AIGuard
4
2
  from .audit.audit import Audit
5
3
  from .authn.authn import AuthN
@@ -7,9 +5,27 @@ from .authz import AuthZ
7
5
  from .embargo import Embargo
8
6
  from .file_scan import FileScan
9
7
  from .intel import DomainIntel, FileIntel, IpIntel, UrlIntel, UserIntel
10
- from .management import Management
11
8
  from .prompt_guard import PromptGuard
12
9
  from .redact import Redact
13
10
  from .sanitize import Sanitize
14
11
  from .share.share import Share
15
12
  from .vault.vault import Vault
13
+
14
+ __all__ = (
15
+ "AIGuard",
16
+ "Audit",
17
+ "AuthN",
18
+ "AuthZ",
19
+ "DomainIntel",
20
+ "Embargo",
21
+ "FileIntel",
22
+ "FileScan",
23
+ "IpIntel",
24
+ "PromptGuard",
25
+ "Redact",
26
+ "Sanitize",
27
+ "Share",
28
+ "UrlIntel",
29
+ "UserIntel",
30
+ "Vault",
31
+ )