pangea-sdk 6.5.0b1__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pangea/__init__.py +3 -11
- pangea/_constants.py +4 -0
- pangea/_typing.py +30 -0
- pangea/asyncio/__init__.py +2 -1
- pangea/asyncio/file_uploader.py +3 -2
- pangea/asyncio/request.py +66 -162
- pangea/asyncio/services/__init__.py +19 -3
- pangea/asyncio/services/ai_guard.py +23 -169
- pangea/asyncio/services/audit.py +1 -301
- pangea/asyncio/services/authn.py +25 -8
- pangea/asyncio/services/base.py +21 -6
- pangea/asyncio/services/file_scan.py +1 -1
- pangea/asyncio/services/intel.py +160 -95
- pangea/asyncio/services/prompt_guard.py +5 -112
- pangea/asyncio/services/redact.py +4 -265
- pangea/config.py +4 -2
- pangea/file_uploader.py +4 -1
- pangea/request.py +91 -166
- pangea/response.py +5 -1
- pangea/services/__init__.py +19 -3
- pangea/services/ai_guard.py +84 -694
- pangea/services/audit/audit.py +3 -301
- pangea/services/audit/models.py +1 -273
- pangea/services/audit/util.py +2 -0
- pangea/services/authn/authn.py +4 -5
- pangea/services/base.py +3 -0
- pangea/services/file_scan.py +3 -2
- pangea/services/intel.py +187 -252
- pangea/services/prompt_guard.py +5 -193
- pangea/services/redact.py +7 -473
- pangea/services/vault/vault.py +3 -0
- {pangea_sdk-6.5.0b1.dist-info → pangea_sdk-6.6.0.dist-info}/METADATA +17 -18
- pangea_sdk-6.6.0.dist-info/RECORD +62 -0
- pangea_sdk-6.6.0.dist-info/WHEEL +4 -0
- pangea/asyncio/services/management.py +0 -576
- pangea/services/management.py +0 -720
- pangea_sdk-6.5.0b1.dist-info/RECORD +0 -62
- pangea_sdk-6.5.0b1.dist-info/WHEEL +0 -4
pangea/request.py
CHANGED
@@ -11,18 +11,20 @@ import json
|
|
11
11
|
import logging
|
12
12
|
import time
|
13
13
|
from collections.abc import Iterable, Mapping
|
14
|
-
from
|
14
|
+
from random import random
|
15
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast
|
15
16
|
|
16
17
|
import requests
|
17
|
-
from pydantic import BaseModel
|
18
|
+
from pydantic import BaseModel
|
18
19
|
from pydantic_core import to_jsonable_python
|
19
|
-
from requests.adapters import HTTPAdapter
|
20
|
+
from requests.adapters import HTTPAdapter
|
20
21
|
from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
|
21
|
-
from typing_extensions import
|
22
|
+
from typing_extensions import TypeAlias, TypeVar, override
|
22
23
|
from yarl import URL
|
23
24
|
|
24
25
|
import pangea
|
25
26
|
import pangea.exceptions as pe
|
27
|
+
from pangea._constants import MAX_RETRY_DELAY, RETRYABLE_HTTP_CODES
|
26
28
|
from pangea.config import PangeaConfig
|
27
29
|
from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
|
28
30
|
from pangea.utils import default_encoder
|
@@ -46,6 +48,70 @@ _Files: TypeAlias = Union[Mapping[str, _FileSpec], Iterable[tuple[str, _FileSpec
|
|
46
48
|
_HeadersUpdateMapping: TypeAlias = Mapping[str, str]
|
47
49
|
|
48
50
|
|
51
|
+
class PangeaHTTPAdapter(HTTPAdapter):
|
52
|
+
"""Custom HTTP adapter that keeps track of retried request IDs."""
|
53
|
+
|
54
|
+
@override
|
55
|
+
def __init__(self, config: PangeaConfig, *args, **kwargs):
|
56
|
+
super().__init__(*args, **kwargs)
|
57
|
+
self.config = config
|
58
|
+
|
59
|
+
@override
|
60
|
+
def send(
|
61
|
+
self,
|
62
|
+
request: requests.PreparedRequest,
|
63
|
+
stream: bool = False,
|
64
|
+
timeout: None | float | tuple[float, float] | tuple[float, None] = None,
|
65
|
+
verify: bool | str = True,
|
66
|
+
cert: None | bytes | str | tuple[bytes | str, bytes | str] = None,
|
67
|
+
proxies: Mapping[str, str] | None = None,
|
68
|
+
) -> requests.Response:
|
69
|
+
max_retries = self.config.request_retries
|
70
|
+
request_ids = set[str]()
|
71
|
+
retries_taken = 0
|
72
|
+
for retries_taken in range(max_retries + 1):
|
73
|
+
remaining_retries = max_retries - retries_taken
|
74
|
+
|
75
|
+
if len(request_ids) > 0:
|
76
|
+
request.headers["X-Pangea-Retried-Request-Ids"] = ",".join(request_ids)
|
77
|
+
|
78
|
+
response = super().send(request, stream, timeout, verify, cert, proxies)
|
79
|
+
|
80
|
+
request_id = response.headers.get("x-request-id")
|
81
|
+
if request_id:
|
82
|
+
request_ids.add(request_id)
|
83
|
+
|
84
|
+
try:
|
85
|
+
response.raise_for_status()
|
86
|
+
except requests.HTTPError as error:
|
87
|
+
if remaining_retries > 0 and self._should_retry(error.response):
|
88
|
+
error.response.close()
|
89
|
+
self._sleep_for_retry(retries_taken=retries_taken, max_retries=max_retries, config=self.config)
|
90
|
+
continue
|
91
|
+
|
92
|
+
break
|
93
|
+
|
94
|
+
break
|
95
|
+
|
96
|
+
return response
|
97
|
+
|
98
|
+
def _calculate_retry_timeout(self, remaining_retries: int, config: PangeaConfig) -> float:
|
99
|
+
max_retries = config.request_retries
|
100
|
+
nb_retries = min(max_retries - remaining_retries, 1000)
|
101
|
+
sleep_seconds = min(config.request_backoff * pow(2.0, nb_retries), MAX_RETRY_DELAY)
|
102
|
+
jitter = 1 - 0.25 * random()
|
103
|
+
timeout = sleep_seconds * jitter
|
104
|
+
return max(timeout, 0)
|
105
|
+
|
106
|
+
def _sleep_for_retry(self, *, retries_taken: int, max_retries: int, config: PangeaConfig) -> None:
|
107
|
+
remaining_retries = max_retries - retries_taken
|
108
|
+
timeout = self._calculate_retry_timeout(remaining_retries, config)
|
109
|
+
time.sleep(timeout)
|
110
|
+
|
111
|
+
def _should_retry(self, response: requests.Response) -> bool:
|
112
|
+
return response.status_code in RETRYABLE_HTTP_CODES
|
113
|
+
|
114
|
+
|
49
115
|
class MultipartResponse:
|
50
116
|
pangea_json: Dict[str, str]
|
51
117
|
attached_files: List = []
|
@@ -111,8 +177,8 @@ class PangeaRequestBase:
|
|
111
177
|
|
112
178
|
return self._queued_retry_enabled
|
113
179
|
|
114
|
-
def _get_delay(self, retry_count, start):
|
115
|
-
delay = retry_count * retry_count
|
180
|
+
def _get_delay(self, retry_count: int, start: float) -> float:
|
181
|
+
delay: float = retry_count * retry_count
|
116
182
|
now = time.time()
|
117
183
|
# if with this delay exceed timeout, reduce delay
|
118
184
|
if now - start + delay >= self.config.poll_result_timeout:
|
@@ -120,10 +186,10 @@ class PangeaRequestBase:
|
|
120
186
|
|
121
187
|
return delay
|
122
188
|
|
123
|
-
def _reach_timeout(self, start):
|
189
|
+
def _reach_timeout(self, start: float) -> bool:
|
124
190
|
return time.time() - start >= self.config.poll_result_timeout
|
125
191
|
|
126
|
-
def _get_poll_path(self, request_id: str):
|
192
|
+
def _get_poll_path(self, request_id: str) -> str:
|
127
193
|
return f"request/{request_id}"
|
128
194
|
|
129
195
|
def _url(self, path: str) -> str:
|
@@ -220,24 +286,6 @@ class PangeaRequest(PangeaRequestBase):
|
|
220
286
|
def __del__(self) -> None:
|
221
287
|
self.session.close()
|
222
288
|
|
223
|
-
def delete(self, endpoint: str) -> None:
|
224
|
-
"""
|
225
|
-
Makes a DELETE call to a Pangea endpoint.
|
226
|
-
|
227
|
-
Args:
|
228
|
-
endpoint: The Pangea API endpoint.
|
229
|
-
"""
|
230
|
-
|
231
|
-
url = self._url(endpoint)
|
232
|
-
|
233
|
-
self.logger.debug(
|
234
|
-
json.dumps({"service": self.service, "action": "delete", "url": url}, default=default_encoder)
|
235
|
-
)
|
236
|
-
|
237
|
-
requests_response = self._http_delete(url, headers=self._headers())
|
238
|
-
self._check_http_errors(requests_response)
|
239
|
-
|
240
|
-
@overload
|
241
289
|
def post(
|
242
290
|
self,
|
243
291
|
endpoint: str,
|
@@ -246,62 +294,18 @@ class PangeaRequest(PangeaRequestBase):
|
|
246
294
|
files: Optional[list[Tuple]] = None,
|
247
295
|
poll_result: bool = True,
|
248
296
|
url: Optional[str] = None,
|
249
|
-
*,
|
250
|
-
pangea_response: Literal[True] = True,
|
251
297
|
) -> PangeaResponse[TResult]:
|
252
|
-
"""
|
253
|
-
Makes the POST call to a Pangea Service endpoint.
|
298
|
+
"""Makes the POST call to a Pangea Service endpoint.
|
254
299
|
|
255
300
|
Args:
|
256
|
-
endpoint: The Pangea Service API endpoint.
|
257
|
-
data: The POST body payload object
|
301
|
+
endpoint(str): The Pangea Service API endpoint.
|
302
|
+
data(dict): The POST body payload object
|
258
303
|
|
259
304
|
Returns:
|
260
305
|
PangeaResponse which contains the response in its entirety and
|
261
306
|
various properties to retrieve individual fields
|
262
307
|
"""
|
263
308
|
|
264
|
-
@overload
|
265
|
-
def post(
|
266
|
-
self,
|
267
|
-
endpoint: str,
|
268
|
-
result_class: Type[TResult],
|
269
|
-
data: str | BaseModel | Mapping[str, Any] | None = None,
|
270
|
-
files: Optional[list[Tuple]] = None,
|
271
|
-
poll_result: bool = True,
|
272
|
-
url: Optional[str] = None,
|
273
|
-
*,
|
274
|
-
pangea_response: Literal[False],
|
275
|
-
) -> TResult:
|
276
|
-
"""
|
277
|
-
Makes the POST call to a Pangea Service endpoint.
|
278
|
-
|
279
|
-
Args:
|
280
|
-
endpoint: The Pangea Service API endpoint.
|
281
|
-
data: The POST body payload object
|
282
|
-
"""
|
283
|
-
|
284
|
-
def post(
|
285
|
-
self,
|
286
|
-
endpoint: str,
|
287
|
-
result_class: Type[TResult],
|
288
|
-
data: str | BaseModel | Mapping[str, Any] | None = None,
|
289
|
-
files: Optional[list[Tuple]] = None,
|
290
|
-
poll_result: bool = True,
|
291
|
-
url: Optional[str] = None,
|
292
|
-
*,
|
293
|
-
pangea_response: bool = True,
|
294
|
-
) -> PangeaResponse[TResult] | TResult:
|
295
|
-
"""
|
296
|
-
Makes a POST call to a Pangea Service endpoint.
|
297
|
-
|
298
|
-
Args:
|
299
|
-
endpoint: The Pangea Service API endpoint.
|
300
|
-
data: The POST body payload object
|
301
|
-
pangea_response: Whether or not the response body follows Pangea's
|
302
|
-
standard response schema
|
303
|
-
"""
|
304
|
-
|
305
309
|
if isinstance(data, BaseModel):
|
306
310
|
data = data.model_dump(exclude_none=True)
|
307
311
|
|
@@ -339,13 +343,9 @@ class PangeaRequest(PangeaRequestBase):
|
|
339
343
|
|
340
344
|
self._check_http_errors(requests_response)
|
341
345
|
|
342
|
-
if not pangea_response:
|
343
|
-
type_adapter = TypeAdapter(result_class)
|
344
|
-
return type_adapter.validate_python(requests_response.json())
|
345
|
-
|
346
346
|
if "multipart/form-data" in requests_response.headers.get("content-type", ""):
|
347
347
|
multipart_response = self._process_multipart_response(requests_response)
|
348
|
-
|
348
|
+
pangea_response: PangeaResponse = PangeaResponse(
|
349
349
|
requests_response,
|
350
350
|
result_class=result_class,
|
351
351
|
json=multipart_response.pangea_json,
|
@@ -358,14 +358,14 @@ class PangeaRequest(PangeaRequestBase):
|
|
358
358
|
json.dumps({"service": self.service, "action": "post", "url": url, "response": json_resp})
|
359
359
|
)
|
360
360
|
|
361
|
-
|
361
|
+
pangea_response = PangeaResponse(requests_response, result_class=result_class, json=json_resp)
|
362
362
|
except requests.exceptions.JSONDecodeError as e:
|
363
363
|
raise pe.PangeaException(f"Failed to decode json response. {e}. Body: {requests_response.text}") from e
|
364
364
|
|
365
365
|
if poll_result:
|
366
|
-
|
366
|
+
pangea_response = self._handle_queued_result(pangea_response)
|
367
367
|
|
368
|
-
return self._check_response(
|
368
|
+
return self._check_response(pangea_response)
|
369
369
|
|
370
370
|
def _get_pangea_json(self, decoder: MultipartDecoder) -> Optional[Dict]:
|
371
371
|
# Iterate through parts
|
@@ -408,14 +408,6 @@ class PangeaRequest(PangeaRequestBase):
|
|
408
408
|
if resp.status_code == 503:
|
409
409
|
raise pe.ServiceTemporarilyUnavailable(resp.json())
|
410
410
|
|
411
|
-
def _http_delete(
|
412
|
-
self,
|
413
|
-
url: str,
|
414
|
-
*,
|
415
|
-
headers: Mapping[str, str | bytes | None] = {},
|
416
|
-
) -> requests.Response:
|
417
|
-
return self.session.delete(url, headers=headers)
|
418
|
-
|
419
411
|
def _http_post(
|
420
412
|
self,
|
421
413
|
url: str,
|
@@ -460,98 +452,37 @@ class PangeaRequest(PangeaRequestBase):
|
|
460
452
|
|
461
453
|
return response
|
462
454
|
|
463
|
-
|
464
|
-
|
465
|
-
self,
|
466
|
-
path: str,
|
467
|
-
result_class: Type[TResult],
|
468
|
-
check_response: bool = True,
|
469
|
-
*,
|
470
|
-
params: (
|
471
|
-
Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
|
472
|
-
| None
|
473
|
-
) = None,
|
474
|
-
pangea_response: Literal[True] = True,
|
475
|
-
) -> PangeaResponse[TResult]:
|
476
|
-
"""
|
477
|
-
Makes the GET call to a Pangea Service endpoint.
|
455
|
+
def get(self, path: str, result_class: Type[TResult], check_response: bool = True) -> PangeaResponse[TResult]:
|
456
|
+
"""Makes the GET call to a Pangea Service endpoint.
|
478
457
|
|
479
458
|
Args:
|
480
|
-
|
481
|
-
|
459
|
+
endpoint(str): The Pangea Service API endpoint.
|
460
|
+
path(str): Additional URL path
|
482
461
|
|
483
462
|
Returns:
|
484
463
|
PangeaResponse which contains the response in its entirety and
|
485
464
|
various properties to retrieve individual fields
|
486
465
|
"""
|
487
466
|
|
488
|
-
@overload
|
489
|
-
def get(
|
490
|
-
self,
|
491
|
-
path: str,
|
492
|
-
result_class: Type[TResult],
|
493
|
-
check_response: bool = True,
|
494
|
-
*,
|
495
|
-
params: (
|
496
|
-
Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
|
497
|
-
| None
|
498
|
-
) = None,
|
499
|
-
pangea_response: Literal[False] = False,
|
500
|
-
) -> TResult:
|
501
|
-
"""
|
502
|
-
Makes the GET call to a Pangea Service endpoint.
|
503
|
-
|
504
|
-
Args:
|
505
|
-
path: Additional URL path
|
506
|
-
params: Dictionary of querystring data to attach to the request
|
507
|
-
"""
|
508
|
-
|
509
|
-
def get(
|
510
|
-
self,
|
511
|
-
path: str,
|
512
|
-
result_class: Type[TResult],
|
513
|
-
check_response: bool = True,
|
514
|
-
*,
|
515
|
-
params: (
|
516
|
-
Mapping[str | bytes | int | float, str | bytes | int | float | Iterable[str | bytes | int | float] | None]
|
517
|
-
| None
|
518
|
-
) = None,
|
519
|
-
pangea_response: bool = True,
|
520
|
-
) -> PangeaResponse[TResult] | TResult:
|
521
|
-
"""
|
522
|
-
Makes the GET call to a Pangea Service endpoint.
|
523
|
-
|
524
|
-
Args:
|
525
|
-
path: Additional URL path
|
526
|
-
params: Dictionary of querystring data to attach to the request
|
527
|
-
pangea_response: Whether or not the response body follows Pangea's
|
528
|
-
standard response schema
|
529
|
-
"""
|
530
|
-
|
531
467
|
url = self._url(path)
|
532
468
|
self.logger.debug(json.dumps({"service": self.service, "action": "get", "url": url}))
|
533
|
-
requests_response = self.session.get(url,
|
469
|
+
requests_response = self.session.get(url, headers=self._headers())
|
534
470
|
self._check_http_errors(requests_response)
|
535
|
-
|
536
|
-
if not pangea_response:
|
537
|
-
type_adapter = TypeAdapter(result_class)
|
538
|
-
return type_adapter.validate_python(requests_response.json())
|
539
|
-
|
540
|
-
pangea_response_obj: PangeaResponse = PangeaResponse(
|
471
|
+
pangea_response: PangeaResponse = PangeaResponse(
|
541
472
|
requests_response, result_class=result_class, json=requests_response.json()
|
542
473
|
)
|
543
474
|
|
544
475
|
self.logger.debug(
|
545
476
|
json.dumps(
|
546
|
-
{"service": self.service, "action": "get", "url": url, "response":
|
477
|
+
{"service": self.service, "action": "get", "url": url, "response": pangea_response.json},
|
547
478
|
default=default_encoder,
|
548
479
|
)
|
549
480
|
)
|
550
481
|
|
551
482
|
if check_response is False:
|
552
|
-
return
|
483
|
+
return pangea_response
|
553
484
|
|
554
|
-
return self._check_response(
|
485
|
+
return self._check_response(pangea_response)
|
555
486
|
|
556
487
|
def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
|
557
488
|
"""
|
@@ -763,13 +694,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
763
694
|
|
764
695
|
@override
|
765
696
|
def _init_session(self) -> requests.Session:
|
766
|
-
|
767
|
-
total=self.config.request_retries,
|
768
|
-
backoff_factor=self.config.request_backoff,
|
769
|
-
status_forcelist=[500, 502, 503, 504],
|
770
|
-
)
|
771
|
-
|
772
|
-
adapter = HTTPAdapter(max_retries=retry_config)
|
697
|
+
adapter = PangeaHTTPAdapter(config=self.config)
|
773
698
|
session = requests.Session()
|
774
699
|
|
775
700
|
session.mount("http://", adapter)
|
pangea/response.py
CHANGED
@@ -18,6 +18,8 @@ from typing_extensions import TypeVar
|
|
18
18
|
|
19
19
|
from pangea.utils import format_datetime
|
20
20
|
|
21
|
+
__all__ = ("PangeaResponse", "PangeaResponseResult", "TransferMethod")
|
22
|
+
|
21
23
|
|
22
24
|
class AttachedFile:
|
23
25
|
filename: str
|
@@ -243,7 +245,9 @@ class PangeaResponse(ResponseHeader, Generic[T]):
|
|
243
245
|
|
244
246
|
@property
|
245
247
|
def http_status(self) -> int: # type: ignore[return]
|
246
|
-
|
248
|
+
# Must be an explicit None check because Response's boolean
|
249
|
+
# representation is equal to whether or not the response is OK.
|
250
|
+
if self.raw_response is not None:
|
247
251
|
if isinstance(self.raw_response, aiohttp.ClientResponse):
|
248
252
|
return self.raw_response.status
|
249
253
|
else:
|
pangea/services/__init__.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# ruff: noqa: F401
|
2
|
-
|
3
1
|
from .ai_guard import AIGuard
|
4
2
|
from .audit.audit import Audit
|
5
3
|
from .authn.authn import AuthN
|
@@ -7,9 +5,27 @@ from .authz import AuthZ
|
|
7
5
|
from .embargo import Embargo
|
8
6
|
from .file_scan import FileScan
|
9
7
|
from .intel import DomainIntel, FileIntel, IpIntel, UrlIntel, UserIntel
|
10
|
-
from .management import Management
|
11
8
|
from .prompt_guard import PromptGuard
|
12
9
|
from .redact import Redact
|
13
10
|
from .sanitize import Sanitize
|
14
11
|
from .share.share import Share
|
15
12
|
from .vault.vault import Vault
|
13
|
+
|
14
|
+
__all__ = (
|
15
|
+
"AIGuard",
|
16
|
+
"Audit",
|
17
|
+
"AuthN",
|
18
|
+
"AuthZ",
|
19
|
+
"DomainIntel",
|
20
|
+
"Embargo",
|
21
|
+
"FileIntel",
|
22
|
+
"FileScan",
|
23
|
+
"IpIntel",
|
24
|
+
"PromptGuard",
|
25
|
+
"Redact",
|
26
|
+
"Sanitize",
|
27
|
+
"Share",
|
28
|
+
"UrlIntel",
|
29
|
+
"UserIntel",
|
30
|
+
"Vault",
|
31
|
+
)
|