pangea-sdk 3.8.0b1__py3-none-any.whl → 5.4.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pangea/__init__.py +1 -1
- pangea/asyncio/file_uploader.py +1 -1
- pangea/asyncio/request.py +56 -34
- pangea/asyncio/services/__init__.py +4 -0
- pangea/asyncio/services/ai_guard.py +75 -0
- pangea/asyncio/services/audit.py +192 -31
- pangea/asyncio/services/authn.py +187 -109
- pangea/asyncio/services/authz.py +285 -0
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +108 -34
- pangea/asyncio/services/prompt_guard.py +73 -0
- pangea/asyncio/services/redact.py +72 -4
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +246 -73
- pangea/asyncio/services/vault.py +1710 -750
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/request.py +87 -59
- pangea/response.py +49 -31
- pangea/services/__init__.py +4 -0
- pangea/services/ai_guard.py +128 -0
- pangea/services/audit/audit.py +205 -42
- pangea/services/audit/models.py +56 -8
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +140 -70
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +400 -0
- pangea/services/base.py +39 -8
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +157 -32
- pangea/services/prompt_guard.py +83 -0
- pangea/services/redact.py +152 -4
- pangea/services/sanitize.py +371 -0
- pangea/services/share/share.py +683 -107
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -749
- pangea/tools.py +6 -7
- pangea/utils.py +16 -27
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/METADATA +43 -35
- pangea_sdk-5.4.0b1.dist-info/RECORD +60 -0
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0b1.dist-info/RECORD +0 -50
pangea/crypto/rsa.py
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from cryptography.hazmat.backends import default_backend
|
7
|
+
from cryptography.hazmat.primitives import hashes, serialization
|
8
|
+
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
9
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
10
|
+
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
11
|
+
|
12
|
+
from pangea.services.vault.models.common import ExportEncryptionAlgorithm
|
13
|
+
from pangea.services.vault.models.symmetric import SymmetricKeyEncryptionAlgorithm
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from pangea.services.vault.models.common import ExportResult
|
17
|
+
|
18
|
+
|
19
|
+
def generate_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:
|
20
|
+
# Generate a 4096-bit RSA key pair
|
21
|
+
private_key = rsa.generate_private_key(
|
22
|
+
public_exponent=65537,
|
23
|
+
key_size=4096,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Extract the public key from the private key
|
27
|
+
public_key = private_key.public_key()
|
28
|
+
return private_key, public_key
|
29
|
+
|
30
|
+
|
31
|
+
def decrypt_sha512(private_key: rsa.RSAPrivateKey, encrypted_message: bytes) -> bytes:
|
32
|
+
# Decrypt the message using the private key and OAEP padding
|
33
|
+
return private_key.decrypt(
|
34
|
+
encrypted_message,
|
35
|
+
padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None),
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
def encrypt_sha512(public_key: rsa.RSAPublicKey, message: bytes) -> bytes:
|
40
|
+
# Encrypt the message using the public key and OAEP padding
|
41
|
+
return public_key.encrypt(
|
42
|
+
message, padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None)
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
def private_key_to_pem(private_key: rsa.RSAPrivateKey) -> bytes:
|
47
|
+
# Serialize private key to PEM format
|
48
|
+
return private_key.private_bytes(
|
49
|
+
encoding=serialization.Encoding.PEM,
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
51
|
+
encryption_algorithm=serialization.NoEncryption(),
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def public_key_to_pem(public_key: rsa.RSAPublicKey) -> bytes:
|
56
|
+
# Serialize public key to PEM format
|
57
|
+
return public_key.public_bytes(
|
58
|
+
encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
_AES_GCM_IV_SIZE = 12
|
63
|
+
"""Standard nonce size for GCM."""
|
64
|
+
|
65
|
+
_KEY_LENGTH = 32
|
66
|
+
"""AES-256 key length in bytes."""
|
67
|
+
|
68
|
+
|
69
|
+
def kem_decrypt(
|
70
|
+
private_key: rsa.RSAPrivateKey,
|
71
|
+
iv: bytes,
|
72
|
+
ciphertext: bytes,
|
73
|
+
symmetric_algorithm: str,
|
74
|
+
asymmetric_algorithm: str,
|
75
|
+
encrypted_salt: bytes,
|
76
|
+
password: str,
|
77
|
+
iteration_count: int,
|
78
|
+
hash_algorithm: str,
|
79
|
+
) -> str:
|
80
|
+
if symmetric_algorithm.casefold() != SymmetricKeyEncryptionAlgorithm.AES_GCM_256.value.casefold():
|
81
|
+
raise NotImplementedError(f"Unsupported symmetric algorithm: {symmetric_algorithm}")
|
82
|
+
|
83
|
+
if asymmetric_algorithm != ExportEncryptionAlgorithm.RSA_NO_PADDING_4096_KEM:
|
84
|
+
raise NotImplementedError(f"Unsupported asymmetric algorithm: {asymmetric_algorithm}")
|
85
|
+
|
86
|
+
if hash_algorithm.casefold() != "SHA512".casefold():
|
87
|
+
raise NotImplementedError(f"Unsupported hash algorithm: {hash_algorithm}")
|
88
|
+
|
89
|
+
# No-padding RSA decryption.
|
90
|
+
n = private_key.private_numbers().public_numbers.n
|
91
|
+
salt = pow(
|
92
|
+
int.from_bytes(encrypted_salt, byteorder="big"),
|
93
|
+
private_key.private_numbers().d,
|
94
|
+
n,
|
95
|
+
).to_bytes(n.bit_length() // 8, byteorder="big")
|
96
|
+
|
97
|
+
kdf = PBKDF2HMAC(
|
98
|
+
algorithm=hashes.SHA512(), length=_KEY_LENGTH, salt=salt, iterations=iteration_count, backend=default_backend()
|
99
|
+
)
|
100
|
+
symmetric_key = kdf.derive(password.encode("utf-8"))
|
101
|
+
|
102
|
+
decrypted = AESGCM(symmetric_key).decrypt(nonce=iv, data=ciphertext, associated_data=None)
|
103
|
+
|
104
|
+
return decrypted.decode("ascii")
|
105
|
+
|
106
|
+
|
107
|
+
def kem_decrypt_export_result(*, result: ExportResult, password: str, private_key: rsa.RSAPrivateKey) -> str:
|
108
|
+
"""Decrypt the exported result of a KEM operation."""
|
109
|
+
cipher_encoded = result.private_key or result.key
|
110
|
+
if not cipher_encoded:
|
111
|
+
raise TypeError("`private_key` or `key` should be set.")
|
112
|
+
|
113
|
+
assert result.encrypted_salt
|
114
|
+
assert result.symmetric_algorithm
|
115
|
+
assert result.asymmetric_algorithm
|
116
|
+
assert result.iteration_count
|
117
|
+
assert result.hash_algorithm
|
118
|
+
|
119
|
+
cipher_with_iv = base64.b64decode(cipher_encoded)
|
120
|
+
encrypted_salt = base64.b64decode(result.encrypted_salt)
|
121
|
+
|
122
|
+
iv = cipher_with_iv[:_AES_GCM_IV_SIZE]
|
123
|
+
cipher = cipher_with_iv[_AES_GCM_IV_SIZE:]
|
124
|
+
|
125
|
+
return kem_decrypt(
|
126
|
+
private_key=private_key,
|
127
|
+
iv=iv,
|
128
|
+
ciphertext=cipher,
|
129
|
+
password=password,
|
130
|
+
encrypted_salt=encrypted_salt,
|
131
|
+
symmetric_algorithm=result.symmetric_algorithm,
|
132
|
+
asymmetric_algorithm=result.asymmetric_algorithm,
|
133
|
+
iteration_count=result.iteration_count,
|
134
|
+
hash_algorithm=result.hash_algorithm,
|
135
|
+
)
|
pangea/deep_verify.py
CHANGED
@@ -263,8 +263,14 @@ def main():
|
|
263
263
|
audit = init_audit(args.token, args.domain)
|
264
264
|
errors = deep_verify(audit, args.file)
|
265
265
|
|
266
|
-
print("\n\
|
266
|
+
print("\n\nWarnings:")
|
267
|
+
val = errors["not_persisted"]
|
268
|
+
print(f"\tnot_persisted: {val}")
|
269
|
+
|
270
|
+
print("\nTotal errors:")
|
267
271
|
for key, val in errors.items():
|
272
|
+
if key == "not_persisted":
|
273
|
+
continue
|
268
274
|
print(f"\t{key.title()}: {val}")
|
269
275
|
print()
|
270
276
|
|
pangea/dump_audit.py
CHANGED
@@ -19,7 +19,7 @@ from pangea.utils import default_encoder
|
|
19
19
|
|
20
20
|
|
21
21
|
def dump_event(output: io.TextIOWrapper, row: SearchEvent, resp: PangeaResponse[SearchOutput]):
|
22
|
-
row_data = filter_deep_none(row.
|
22
|
+
row_data = filter_deep_none(row.model_dump())
|
23
23
|
if resp.result and resp.result.root:
|
24
24
|
row_data["tree_size"] = resp.result.root.size
|
25
25
|
output.write(json.dumps(row_data, default=default_encoder) + "\n")
|
@@ -63,11 +63,12 @@ def dump_before(audit: Audit, output: io.TextIOWrapper, start: datetime) -> int:
|
|
63
63
|
cnt = 0
|
64
64
|
if search_res.result and search_res.result.count > 0:
|
65
65
|
leaf_index = search_res.result.events[0].leaf_index
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
if leaf_index is not None:
|
67
|
+
for row in reversed(search_res.result.events):
|
68
|
+
if row.leaf_index != leaf_index:
|
69
|
+
break
|
70
|
+
dump_event(output, row, search_res)
|
71
|
+
cnt += 1
|
71
72
|
print(f"Dumping before... {cnt} events")
|
72
73
|
return cnt
|
73
74
|
|
@@ -89,7 +90,7 @@ def dump_after(audit: Audit, output: io.TextIOWrapper, start: datetime, last_eve
|
|
89
90
|
cnt = 0
|
90
91
|
if search_res.result and search_res.result.count > 0:
|
91
92
|
leaf_index = search_res.result.events[0].leaf_index
|
92
|
-
if leaf_index == last_leaf_index:
|
93
|
+
if leaf_index is not None and leaf_index == last_leaf_index:
|
93
94
|
start_idx: int = 1 if last_event_hash == search_res.result.events[0].hash else 0
|
94
95
|
for row in search_res.result.events[start_idx:]:
|
95
96
|
if row.leaf_index != leaf_index:
|
@@ -124,7 +125,7 @@ def dump_page(
|
|
124
125
|
msg = f"Dumping... {search_res.result.count} events"
|
125
126
|
|
126
127
|
if search_res.result.count <= 1:
|
127
|
-
return end, 0
|
128
|
+
return end, 0, True, "", 0
|
128
129
|
|
129
130
|
offset = 0
|
130
131
|
result_id = search_res.result.id
|
pangea/request.py
CHANGED
@@ -1,16 +1,19 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import copy
|
5
6
|
import json
|
6
7
|
import logging
|
7
8
|
import time
|
8
|
-
from typing import Dict, List, Optional, Tuple, Type, Union
|
9
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Type, Union, cast
|
9
10
|
|
10
|
-
import aiohttp
|
11
11
|
import requests
|
12
|
+
from pydantic import BaseModel
|
13
|
+
from pydantic_core import to_jsonable_python
|
12
14
|
from requests.adapters import HTTPAdapter, Retry
|
13
|
-
from requests_toolbelt import MultipartDecoder # type: ignore
|
15
|
+
from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
|
16
|
+
from typing_extensions import TypeVar
|
14
17
|
|
15
18
|
import pangea
|
16
19
|
import pangea.exceptions as pe
|
@@ -18,8 +21,11 @@ from pangea.config import PangeaConfig
|
|
18
21
|
from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
|
19
22
|
from pangea.utils import default_encoder
|
20
23
|
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
import aiohttp
|
21
26
|
|
22
|
-
|
27
|
+
|
28
|
+
class MultipartResponse:
|
23
29
|
pangea_json: Dict[str, str]
|
24
30
|
attached_files: List = []
|
25
31
|
|
@@ -28,7 +34,7 @@ class MultipartResponse(object):
|
|
28
34
|
self.attached_files = attached_files
|
29
35
|
|
30
36
|
|
31
|
-
class PangeaRequestBase
|
37
|
+
class PangeaRequestBase:
|
32
38
|
def __init__(
|
33
39
|
self, config: PangeaConfig, token: str, service: str, logger: logging.Logger, config_id: Optional[str] = None
|
34
40
|
):
|
@@ -126,8 +132,7 @@ class PangeaRequestBase(object):
|
|
126
132
|
filename_parts = content_disposition.split("name=")
|
127
133
|
if len(filename_parts) > 1:
|
128
134
|
return filename_parts[1].split(";")[0].strip('"')
|
129
|
-
|
130
|
-
return None
|
135
|
+
return None
|
131
136
|
|
132
137
|
def _get_filename_from_url(self, url: str) -> Optional[str]:
|
133
138
|
return url.split("/")[-1].split("?")[0]
|
@@ -154,39 +159,42 @@ class PangeaRequestBase(object):
|
|
154
159
|
|
155
160
|
if status == ResponseStatus.VALIDATION_ERR.value:
|
156
161
|
raise pe.ValidationException(summary, response)
|
157
|
-
|
162
|
+
if status == ResponseStatus.TOO_MANY_REQUESTS.value:
|
158
163
|
raise pe.RateLimitException(summary, response)
|
159
|
-
|
164
|
+
if status == ResponseStatus.NO_CREDIT.value:
|
160
165
|
raise pe.NoCreditException(summary, response)
|
161
|
-
|
166
|
+
if status == ResponseStatus.UNAUTHORIZED.value:
|
162
167
|
raise pe.UnauthorizedException(self.service, response)
|
163
|
-
|
168
|
+
if status == ResponseStatus.SERVICE_NOT_ENABLED.value:
|
164
169
|
raise pe.ServiceNotEnabledException(self.service, response)
|
165
|
-
|
170
|
+
if status == ResponseStatus.PROVIDER_ERR.value:
|
166
171
|
raise pe.ProviderErrorException(summary, response)
|
167
|
-
|
172
|
+
if status in (ResponseStatus.MISSING_CONFIG_ID_SCOPE.value, ResponseStatus.MISSING_CONFIG_ID.value):
|
168
173
|
raise pe.MissingConfigID(self.service, response)
|
169
|
-
|
174
|
+
if status == ResponseStatus.SERVICE_NOT_AVAILABLE.value:
|
170
175
|
raise pe.ServiceNotAvailableException(summary, response)
|
171
|
-
|
176
|
+
if status == ResponseStatus.TREE_NOT_FOUND.value:
|
172
177
|
raise pe.TreeNotFoundException(summary, response)
|
173
|
-
|
178
|
+
if status == ResponseStatus.IP_NOT_FOUND.value:
|
174
179
|
raise pe.IPNotFoundException(summary, response)
|
175
|
-
|
180
|
+
if status == ResponseStatus.BAD_OFFSET.value:
|
176
181
|
raise pe.BadOffsetException(summary, response)
|
177
|
-
|
182
|
+
if status == ResponseStatus.FORBIDDEN_VAULT_OPERATION.value:
|
178
183
|
raise pe.ForbiddenVaultOperation(summary, response)
|
179
|
-
|
184
|
+
if status == ResponseStatus.VAULT_ITEM_NOT_FOUND.value:
|
180
185
|
raise pe.VaultItemNotFound(summary, response)
|
181
|
-
|
182
|
-
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
183
|
-
|
186
|
+
if status == ResponseStatus.NOT_FOUND.value:
|
187
|
+
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
188
|
+
if status == ResponseStatus.INTERNAL_SERVER_ERROR.value:
|
184
189
|
raise pe.InternalServerError(response)
|
185
|
-
|
190
|
+
if status == ResponseStatus.ACCEPTED.value:
|
186
191
|
raise pe.AcceptedRequestException(response)
|
187
192
|
raise pe.PangeaAPIException(f"{summary} ", response)
|
188
193
|
|
189
194
|
|
195
|
+
TResult = TypeVar("TResult", bound=PangeaResponseResult)
|
196
|
+
|
197
|
+
|
190
198
|
class PangeaRequest(PangeaRequestBase):
|
191
199
|
"""An object that makes direct calls to Pangea Service APIs.
|
192
200
|
|
@@ -202,12 +210,12 @@ class PangeaRequest(PangeaRequestBase):
|
|
202
210
|
def post(
|
203
211
|
self,
|
204
212
|
endpoint: str,
|
205
|
-
result_class: Type[
|
206
|
-
data:
|
213
|
+
result_class: Type[TResult],
|
214
|
+
data: str | BaseModel | dict[str, Any] | None = None,
|
207
215
|
files: Optional[List[Tuple]] = None,
|
208
216
|
poll_result: bool = True,
|
209
217
|
url: Optional[str] = None,
|
210
|
-
) -> PangeaResponse:
|
218
|
+
) -> PangeaResponse[TResult]:
|
211
219
|
"""Makes the POST call to a Pangea Service endpoint.
|
212
220
|
|
213
221
|
Args:
|
@@ -218,6 +226,16 @@ class PangeaRequest(PangeaRequestBase):
|
|
218
226
|
PangeaResponse which contains the response in its entirety and
|
219
227
|
various properties to retrieve individual fields
|
220
228
|
"""
|
229
|
+
|
230
|
+
if isinstance(data, BaseModel):
|
231
|
+
data = data.model_dump(exclude_none=True)
|
232
|
+
|
233
|
+
if data is None:
|
234
|
+
data = {}
|
235
|
+
|
236
|
+
# Normalize.
|
237
|
+
data = cast(dict[str, Any], to_jsonable_python(data))
|
238
|
+
|
221
239
|
if url is None:
|
222
240
|
url = self._url(endpoint)
|
223
241
|
|
@@ -318,32 +336,33 @@ class PangeaRequest(PangeaRequestBase):
|
|
318
336
|
return self.session.post(url, headers=headers, data=data_send, files=files)
|
319
337
|
|
320
338
|
def _http_post_process(
|
321
|
-
self,
|
339
|
+
self,
|
340
|
+
data: Union[str, Dict] = {},
|
341
|
+
files: Optional[Sequence[Tuple[str, Tuple[Any, str, str]]]] = None,
|
342
|
+
multipart_post: bool = True,
|
322
343
|
):
|
323
344
|
if files:
|
324
345
|
if multipart_post is True:
|
325
346
|
data_send: str = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
326
347
|
multi = [("request", (None, data_send, "application/json"))]
|
327
|
-
multi.extend(files)
|
328
|
-
files = multi
|
348
|
+
multi.extend(files)
|
349
|
+
files = multi
|
329
350
|
return None, files
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
342
|
-
return data_send, None
|
351
|
+
# Post to presigned url as form
|
352
|
+
data_send: list = [] # type: ignore[no-redef]
|
353
|
+
for k, v in data.items(): # type: ignore[union-attr]
|
354
|
+
data_send.append((k, v)) # type: ignore[attr-defined]
|
355
|
+
# When posting to presigned url, file key should be 'file'
|
356
|
+
files = { # type: ignore[assignment]
|
357
|
+
"file": files[0][1],
|
358
|
+
}
|
359
|
+
return data_send, files
|
360
|
+
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
361
|
+
return data_send, None
|
343
362
|
|
344
363
|
return data, files
|
345
364
|
|
346
|
-
def _handle_queued_result(self, response: PangeaResponse) -> PangeaResponse[
|
365
|
+
def _handle_queued_result(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
347
366
|
if self._queued_retry_enabled and response.http_status == 202:
|
348
367
|
self.logger.debug(
|
349
368
|
json.dumps(
|
@@ -355,7 +374,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
355
374
|
|
356
375
|
return response
|
357
376
|
|
358
|
-
def get(self, path: str, result_class: Type[
|
377
|
+
def get(self, path: str, result_class: Type[TResult], check_response: bool = True) -> PangeaResponse[TResult]:
|
359
378
|
"""Makes the GET call to a Pangea Service endpoint.
|
360
379
|
|
361
380
|
Args:
|
@@ -387,7 +406,20 @@ class PangeaRequest(PangeaRequestBase):
|
|
387
406
|
|
388
407
|
return self._check_response(pangea_response)
|
389
408
|
|
390
|
-
def download_file(self, url: str, filename:
|
409
|
+
def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
|
410
|
+
"""
|
411
|
+
Download file
|
412
|
+
|
413
|
+
Download a file from the specified URL and save it with the given
|
414
|
+
filename.
|
415
|
+
|
416
|
+
Args:
|
417
|
+
url: URL of the file to download
|
418
|
+
filename: Name to save the downloaded file as. If not provided, the
|
419
|
+
filename will be determined from the Content-Disposition header or
|
420
|
+
the URL.
|
421
|
+
"""
|
422
|
+
|
391
423
|
self.logger.debug(
|
392
424
|
json.dumps(
|
393
425
|
{
|
@@ -423,25 +455,24 @@ class PangeaRequest(PangeaRequestBase):
|
|
423
455
|
)
|
424
456
|
)
|
425
457
|
return AttachedFile(filename=filename, file=response.content, content_type=content_type)
|
426
|
-
|
427
|
-
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
458
|
+
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
428
459
|
|
429
460
|
def poll_result_by_id(
|
430
|
-
self, request_id: str, result_class:
|
431
|
-
):
|
461
|
+
self, request_id: str, result_class: Type[TResult], check_response: bool = True
|
462
|
+
) -> PangeaResponse[TResult]:
|
432
463
|
path = self._get_poll_path(request_id)
|
433
464
|
self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_once", "url": path}))
|
434
|
-
return self.get(path, result_class, check_response=check_response)
|
465
|
+
return self.get(path, result_class, check_response=check_response)
|
435
466
|
|
436
467
|
def poll_result_once(
|
437
|
-
self, response: PangeaResponse, check_response: bool = True
|
438
|
-
) -> PangeaResponse[
|
468
|
+
self, response: PangeaResponse[TResult], check_response: bool = True
|
469
|
+
) -> PangeaResponse[TResult]:
|
439
470
|
request_id = response.request_id
|
440
471
|
if not request_id:
|
441
472
|
raise pe.PangeaException("Poll result error: response did not include a 'request_id'")
|
442
473
|
|
443
474
|
if response.status != ResponseStatus.ACCEPTED.value:
|
444
|
-
raise pe.PangeaException("Response already
|
475
|
+
raise pe.PangeaException("Response already processed")
|
445
476
|
|
446
477
|
return self.poll_result_by_id(request_id, response.result_class, check_response=check_response)
|
447
478
|
|
@@ -526,7 +557,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
526
557
|
self.post_presigned_url(url=presigned_url, data=data_to_presigned, files=files)
|
527
558
|
return response.raw_response
|
528
559
|
|
529
|
-
def _poll_result_retry(self, response: PangeaResponse) -> PangeaResponse[
|
560
|
+
def _poll_result_retry(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
530
561
|
retry_count = 1
|
531
562
|
start = time.time()
|
532
563
|
|
@@ -538,9 +569,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
538
569
|
self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_retry", "step": "exit"}))
|
539
570
|
return self._check_response(response)
|
540
571
|
|
541
|
-
def _poll_presigned_url(
|
542
|
-
self, response: PangeaResponse[Type[PangeaResponseResult]]
|
543
|
-
) -> PangeaResponse[Type[PangeaResponseResult]]:
|
572
|
+
def _poll_presigned_url(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
544
573
|
if response.http_status != 202:
|
545
574
|
raise AttributeError("Response should be 202")
|
546
575
|
|
@@ -583,8 +612,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
583
612
|
|
584
613
|
if loop_resp.accepted_result is not None and not loop_resp.accepted_result.has_upload_url:
|
585
614
|
return loop_resp
|
586
|
-
|
587
|
-
raise loop_exc
|
615
|
+
raise loop_exc
|
588
616
|
|
589
617
|
def _init_session(self) -> requests.Session:
|
590
618
|
retry_config = Retry(
|
pangea/response.py
CHANGED
@@ -3,16 +3,15 @@
|
|
3
3
|
import datetime
|
4
4
|
import enum
|
5
5
|
import os
|
6
|
-
from typing import Any, Dict, Generic, List, Optional, Type,
|
6
|
+
from typing import Any, Dict, Generic, List, Optional, Type, Union
|
7
7
|
|
8
8
|
import aiohttp
|
9
9
|
import requests
|
10
|
-
from pydantic import BaseModel
|
10
|
+
from pydantic import BaseModel, ConfigDict, PlainSerializer
|
11
|
+
from typing_extensions import Annotated, TypeVar
|
11
12
|
|
12
13
|
from pangea.utils import format_datetime
|
13
14
|
|
14
|
-
T = TypeVar("T")
|
15
|
-
|
16
15
|
|
17
16
|
class AttachedFile(object):
|
18
17
|
filename: str
|
@@ -50,10 +49,24 @@ class AttachedFile(object):
|
|
50
49
|
|
51
50
|
|
52
51
|
class TransferMethod(str, enum.Enum):
|
52
|
+
"""Transfer methods for uploading file data."""
|
53
|
+
|
53
54
|
MULTIPART = "multipart"
|
54
55
|
POST_URL = "post-url"
|
55
56
|
PUT_URL = "put-url"
|
56
57
|
SOURCE_URL = "source-url"
|
58
|
+
"""
|
59
|
+
A `source-url` is a caller-specified URL where the Pangea APIs can fetch the
|
60
|
+
contents of the input file. When calling a Pangea API with a
|
61
|
+
`transfer_method` of `source-url`, you must also specify a `source_url`
|
62
|
+
input parameter that provides a URL to the input file. The source URL can be
|
63
|
+
a presigned URL created by the caller, and it will be used to download the
|
64
|
+
content of the input file. The `source-url` transfer method is useful when
|
65
|
+
you already have a file in your storage and can provide a URL from which
|
66
|
+
Pangea API can fetch the input file—there is no need to transfer it to
|
67
|
+
Pangea with a separate POST or PUT request.
|
68
|
+
"""
|
69
|
+
|
57
70
|
DEST_URL = "dest-url"
|
58
71
|
|
59
72
|
def __str__(self):
|
@@ -63,24 +76,17 @@ class TransferMethod(str, enum.Enum):
|
|
63
76
|
return str(self.value)
|
64
77
|
|
65
78
|
|
79
|
+
PangeaDateTime = Annotated[datetime.datetime, PlainSerializer(format_datetime)]
|
80
|
+
|
81
|
+
|
66
82
|
# API response should accept arbitrary fields to make them accept possible new parameters
|
67
83
|
class APIResponseModel(BaseModel):
|
68
|
-
|
69
|
-
arbitrary_types_allowed = True
|
70
|
-
# allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
71
|
-
extra = "allow"
|
84
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
72
85
|
|
73
86
|
|
74
87
|
# API request models doesn't not allow arbitrary fields
|
75
88
|
class APIRequestModel(BaseModel):
|
76
|
-
|
77
|
-
arbitrary_types_allowed = True
|
78
|
-
extra = (
|
79
|
-
"allow" # allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
80
|
-
)
|
81
|
-
json_encoders = {
|
82
|
-
datetime.datetime: format_datetime,
|
83
|
-
}
|
89
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
84
90
|
|
85
91
|
|
86
92
|
class PangeaResponseResult(APIResponseModel):
|
@@ -155,38 +161,50 @@ class ResponseStatus(str, enum.Enum):
|
|
155
161
|
|
156
162
|
|
157
163
|
class ResponseHeader(APIResponseModel):
|
158
|
-
"""
|
159
|
-
Pangea response API header.
|
160
|
-
|
161
|
-
Arguments:
|
162
|
-
request_id -- The request ID.
|
163
|
-
request_time -- The time the request was issued, ISO8601.
|
164
|
-
response_time -- The time the response was issued, ISO8601.
|
165
|
-
status -- Pangea response status
|
166
|
-
summary -- The summary of the response.
|
167
|
-
"""
|
164
|
+
"""Pangea response API header."""
|
168
165
|
|
169
166
|
request_id: str
|
167
|
+
"""A unique identifier assigned to each request made to the API."""
|
168
|
+
|
170
169
|
request_time: str
|
170
|
+
"""
|
171
|
+
Timestamp indicating the exact moment when a request is made to the API.
|
172
|
+
"""
|
173
|
+
|
171
174
|
response_time: str
|
175
|
+
"""
|
176
|
+
Duration it takes for the API to process a request and generate a response.
|
177
|
+
"""
|
178
|
+
|
172
179
|
status: str
|
180
|
+
"""
|
181
|
+
Represents the status or outcome of the API request.
|
182
|
+
"""
|
183
|
+
|
173
184
|
summary: str
|
185
|
+
"""
|
186
|
+
Provides a concise and brief overview of the purpose or primary objective of
|
187
|
+
the API endpoint.
|
188
|
+
"""
|
189
|
+
|
190
|
+
|
191
|
+
T = TypeVar("T", bound=PangeaResponseResult)
|
174
192
|
|
175
193
|
|
176
|
-
class PangeaResponse(Generic[T]
|
194
|
+
class PangeaResponse(ResponseHeader, Generic[T]):
|
177
195
|
raw_result: Optional[Dict[str, Any]] = None
|
178
196
|
raw_response: Optional[Union[requests.Response, aiohttp.ClientResponse]] = None
|
179
197
|
result: Optional[T] = None
|
180
198
|
pangea_error: Optional[PangeaError] = None
|
181
199
|
accepted_result: Optional[AcceptedResult] = None
|
182
|
-
result_class:
|
200
|
+
result_class: Type[T] = PangeaResponseResult # type: ignore[assignment]
|
183
201
|
_json: Any
|
184
202
|
attached_files: List[AttachedFile] = []
|
185
203
|
|
186
204
|
def __init__(
|
187
205
|
self,
|
188
206
|
response: requests.Response,
|
189
|
-
result_class:
|
207
|
+
result_class: Type[T],
|
190
208
|
json: dict,
|
191
209
|
attached_files: List[AttachedFile] = [],
|
192
210
|
):
|
@@ -198,7 +216,7 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
198
216
|
self.attached_files = attached_files
|
199
217
|
|
200
218
|
self.result = (
|
201
|
-
self.result_class(**self.raw_result)
|
219
|
+
self.result_class(**self.raw_result)
|
202
220
|
if self.raw_result is not None and issubclass(self.result_class, PangeaResponseResult) and self.success
|
203
221
|
else None
|
204
222
|
)
|
@@ -230,4 +248,4 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
230
248
|
|
231
249
|
@property
|
232
250
|
def url(self) -> str:
|
233
|
-
return str(self.raw_response.url) # type: ignore[
|
251
|
+
return str(self.raw_response.url) # type: ignore[union-attr]
|
pangea/services/__init__.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
from .ai_guard import AIGuard
|
1
2
|
from .audit.audit import Audit
|
2
3
|
from .authn.authn import AuthN
|
4
|
+
from .authz import AuthZ
|
3
5
|
from .embargo import Embargo
|
4
6
|
from .file_scan import FileScan
|
5
7
|
from .intel import DomainIntel, FileIntel, IpIntel, UrlIntel, UserIntel
|
8
|
+
from .prompt_guard import PromptGuard
|
6
9
|
from .redact import Redact
|
10
|
+
from .sanitize import Sanitize
|
7
11
|
from .share.share import Share
|
8
12
|
from .vault.vault import Vault
|