pangea-sdk 3.8.0b1__py3-none-any.whl → 5.4.0b1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pangea/__init__.py +1 -1
- pangea/asyncio/file_uploader.py +1 -1
- pangea/asyncio/request.py +56 -34
- pangea/asyncio/services/__init__.py +4 -0
- pangea/asyncio/services/ai_guard.py +75 -0
- pangea/asyncio/services/audit.py +192 -31
- pangea/asyncio/services/authn.py +187 -109
- pangea/asyncio/services/authz.py +285 -0
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +108 -34
- pangea/asyncio/services/prompt_guard.py +73 -0
- pangea/asyncio/services/redact.py +72 -4
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +246 -73
- pangea/asyncio/services/vault.py +1710 -750
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/request.py +87 -59
- pangea/response.py +49 -31
- pangea/services/__init__.py +4 -0
- pangea/services/ai_guard.py +128 -0
- pangea/services/audit/audit.py +205 -42
- pangea/services/audit/models.py +56 -8
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +140 -70
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +400 -0
- pangea/services/base.py +39 -8
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +157 -32
- pangea/services/prompt_guard.py +83 -0
- pangea/services/redact.py +152 -4
- pangea/services/sanitize.py +371 -0
- pangea/services/share/share.py +683 -107
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -749
- pangea/tools.py +6 -7
- pangea/utils.py +16 -27
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/METADATA +43 -35
- pangea_sdk-5.4.0b1.dist-info/RECORD +60 -0
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.4.0b1.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0b1.dist-info/RECORD +0 -50
pangea/crypto/rsa.py
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from cryptography.hazmat.backends import default_backend
|
7
|
+
from cryptography.hazmat.primitives import hashes, serialization
|
8
|
+
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
9
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
10
|
+
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
11
|
+
|
12
|
+
from pangea.services.vault.models.common import ExportEncryptionAlgorithm
|
13
|
+
from pangea.services.vault.models.symmetric import SymmetricKeyEncryptionAlgorithm
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from pangea.services.vault.models.common import ExportResult
|
17
|
+
|
18
|
+
|
19
|
+
def generate_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:
|
20
|
+
# Generate a 4096-bit RSA key pair
|
21
|
+
private_key = rsa.generate_private_key(
|
22
|
+
public_exponent=65537,
|
23
|
+
key_size=4096,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Extract the public key from the private key
|
27
|
+
public_key = private_key.public_key()
|
28
|
+
return private_key, public_key
|
29
|
+
|
30
|
+
|
31
|
+
def decrypt_sha512(private_key: rsa.RSAPrivateKey, encrypted_message: bytes) -> bytes:
|
32
|
+
# Decrypt the message using the private key and OAEP padding
|
33
|
+
return private_key.decrypt(
|
34
|
+
encrypted_message,
|
35
|
+
padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None),
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
def encrypt_sha512(public_key: rsa.RSAPublicKey, message: bytes) -> bytes:
|
40
|
+
# Encrypt the message using the public key and OAEP padding
|
41
|
+
return public_key.encrypt(
|
42
|
+
message, padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None)
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
def private_key_to_pem(private_key: rsa.RSAPrivateKey) -> bytes:
|
47
|
+
# Serialize private key to PEM format
|
48
|
+
return private_key.private_bytes(
|
49
|
+
encoding=serialization.Encoding.PEM,
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
51
|
+
encryption_algorithm=serialization.NoEncryption(),
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def public_key_to_pem(public_key: rsa.RSAPublicKey) -> bytes:
|
56
|
+
# Serialize public key to PEM format
|
57
|
+
return public_key.public_bytes(
|
58
|
+
encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
_AES_GCM_IV_SIZE = 12
|
63
|
+
"""Standard nonce size for GCM."""
|
64
|
+
|
65
|
+
_KEY_LENGTH = 32
|
66
|
+
"""AES-256 key length in bytes."""
|
67
|
+
|
68
|
+
|
69
|
+
def kem_decrypt(
|
70
|
+
private_key: rsa.RSAPrivateKey,
|
71
|
+
iv: bytes,
|
72
|
+
ciphertext: bytes,
|
73
|
+
symmetric_algorithm: str,
|
74
|
+
asymmetric_algorithm: str,
|
75
|
+
encrypted_salt: bytes,
|
76
|
+
password: str,
|
77
|
+
iteration_count: int,
|
78
|
+
hash_algorithm: str,
|
79
|
+
) -> str:
|
80
|
+
if symmetric_algorithm.casefold() != SymmetricKeyEncryptionAlgorithm.AES_GCM_256.value.casefold():
|
81
|
+
raise NotImplementedError(f"Unsupported symmetric algorithm: {symmetric_algorithm}")
|
82
|
+
|
83
|
+
if asymmetric_algorithm != ExportEncryptionAlgorithm.RSA_NO_PADDING_4096_KEM:
|
84
|
+
raise NotImplementedError(f"Unsupported asymmetric algorithm: {asymmetric_algorithm}")
|
85
|
+
|
86
|
+
if hash_algorithm.casefold() != "SHA512".casefold():
|
87
|
+
raise NotImplementedError(f"Unsupported hash algorithm: {hash_algorithm}")
|
88
|
+
|
89
|
+
# No-padding RSA decryption.
|
90
|
+
n = private_key.private_numbers().public_numbers.n
|
91
|
+
salt = pow(
|
92
|
+
int.from_bytes(encrypted_salt, byteorder="big"),
|
93
|
+
private_key.private_numbers().d,
|
94
|
+
n,
|
95
|
+
).to_bytes(n.bit_length() // 8, byteorder="big")
|
96
|
+
|
97
|
+
kdf = PBKDF2HMAC(
|
98
|
+
algorithm=hashes.SHA512(), length=_KEY_LENGTH, salt=salt, iterations=iteration_count, backend=default_backend()
|
99
|
+
)
|
100
|
+
symmetric_key = kdf.derive(password.encode("utf-8"))
|
101
|
+
|
102
|
+
decrypted = AESGCM(symmetric_key).decrypt(nonce=iv, data=ciphertext, associated_data=None)
|
103
|
+
|
104
|
+
return decrypted.decode("ascii")
|
105
|
+
|
106
|
+
|
107
|
+
def kem_decrypt_export_result(*, result: ExportResult, password: str, private_key: rsa.RSAPrivateKey) -> str:
|
108
|
+
"""Decrypt the exported result of a KEM operation."""
|
109
|
+
cipher_encoded = result.private_key or result.key
|
110
|
+
if not cipher_encoded:
|
111
|
+
raise TypeError("`private_key` or `key` should be set.")
|
112
|
+
|
113
|
+
assert result.encrypted_salt
|
114
|
+
assert result.symmetric_algorithm
|
115
|
+
assert result.asymmetric_algorithm
|
116
|
+
assert result.iteration_count
|
117
|
+
assert result.hash_algorithm
|
118
|
+
|
119
|
+
cipher_with_iv = base64.b64decode(cipher_encoded)
|
120
|
+
encrypted_salt = base64.b64decode(result.encrypted_salt)
|
121
|
+
|
122
|
+
iv = cipher_with_iv[:_AES_GCM_IV_SIZE]
|
123
|
+
cipher = cipher_with_iv[_AES_GCM_IV_SIZE:]
|
124
|
+
|
125
|
+
return kem_decrypt(
|
126
|
+
private_key=private_key,
|
127
|
+
iv=iv,
|
128
|
+
ciphertext=cipher,
|
129
|
+
password=password,
|
130
|
+
encrypted_salt=encrypted_salt,
|
131
|
+
symmetric_algorithm=result.symmetric_algorithm,
|
132
|
+
asymmetric_algorithm=result.asymmetric_algorithm,
|
133
|
+
iteration_count=result.iteration_count,
|
134
|
+
hash_algorithm=result.hash_algorithm,
|
135
|
+
)
|
pangea/deep_verify.py
CHANGED
@@ -263,8 +263,14 @@ def main():
|
|
263
263
|
audit = init_audit(args.token, args.domain)
|
264
264
|
errors = deep_verify(audit, args.file)
|
265
265
|
|
266
|
-
print("\n\
|
266
|
+
print("\n\nWarnings:")
|
267
|
+
val = errors["not_persisted"]
|
268
|
+
print(f"\tnot_persisted: {val}")
|
269
|
+
|
270
|
+
print("\nTotal errors:")
|
267
271
|
for key, val in errors.items():
|
272
|
+
if key == "not_persisted":
|
273
|
+
continue
|
268
274
|
print(f"\t{key.title()}: {val}")
|
269
275
|
print()
|
270
276
|
|
pangea/dump_audit.py
CHANGED
@@ -19,7 +19,7 @@ from pangea.utils import default_encoder
|
|
19
19
|
|
20
20
|
|
21
21
|
def dump_event(output: io.TextIOWrapper, row: SearchEvent, resp: PangeaResponse[SearchOutput]):
|
22
|
-
row_data = filter_deep_none(row.
|
22
|
+
row_data = filter_deep_none(row.model_dump())
|
23
23
|
if resp.result and resp.result.root:
|
24
24
|
row_data["tree_size"] = resp.result.root.size
|
25
25
|
output.write(json.dumps(row_data, default=default_encoder) + "\n")
|
@@ -63,11 +63,12 @@ def dump_before(audit: Audit, output: io.TextIOWrapper, start: datetime) -> int:
|
|
63
63
|
cnt = 0
|
64
64
|
if search_res.result and search_res.result.count > 0:
|
65
65
|
leaf_index = search_res.result.events[0].leaf_index
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
if leaf_index is not None:
|
67
|
+
for row in reversed(search_res.result.events):
|
68
|
+
if row.leaf_index != leaf_index:
|
69
|
+
break
|
70
|
+
dump_event(output, row, search_res)
|
71
|
+
cnt += 1
|
71
72
|
print(f"Dumping before... {cnt} events")
|
72
73
|
return cnt
|
73
74
|
|
@@ -89,7 +90,7 @@ def dump_after(audit: Audit, output: io.TextIOWrapper, start: datetime, last_eve
|
|
89
90
|
cnt = 0
|
90
91
|
if search_res.result and search_res.result.count > 0:
|
91
92
|
leaf_index = search_res.result.events[0].leaf_index
|
92
|
-
if leaf_index == last_leaf_index:
|
93
|
+
if leaf_index is not None and leaf_index == last_leaf_index:
|
93
94
|
start_idx: int = 1 if last_event_hash == search_res.result.events[0].hash else 0
|
94
95
|
for row in search_res.result.events[start_idx:]:
|
95
96
|
if row.leaf_index != leaf_index:
|
@@ -124,7 +125,7 @@ def dump_page(
|
|
124
125
|
msg = f"Dumping... {search_res.result.count} events"
|
125
126
|
|
126
127
|
if search_res.result.count <= 1:
|
127
|
-
return end, 0
|
128
|
+
return end, 0, True, "", 0
|
128
129
|
|
129
130
|
offset = 0
|
130
131
|
result_id = search_res.result.id
|
pangea/request.py
CHANGED
@@ -1,16 +1,19 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import copy
|
5
6
|
import json
|
6
7
|
import logging
|
7
8
|
import time
|
8
|
-
from typing import Dict, List, Optional, Tuple, Type, Union
|
9
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Type, Union, cast
|
9
10
|
|
10
|
-
import aiohttp
|
11
11
|
import requests
|
12
|
+
from pydantic import BaseModel
|
13
|
+
from pydantic_core import to_jsonable_python
|
12
14
|
from requests.adapters import HTTPAdapter, Retry
|
13
|
-
from requests_toolbelt import MultipartDecoder # type: ignore
|
15
|
+
from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
|
16
|
+
from typing_extensions import TypeVar
|
14
17
|
|
15
18
|
import pangea
|
16
19
|
import pangea.exceptions as pe
|
@@ -18,8 +21,11 @@ from pangea.config import PangeaConfig
|
|
18
21
|
from pangea.response import AttachedFile, PangeaResponse, PangeaResponseResult, ResponseStatus, TransferMethod
|
19
22
|
from pangea.utils import default_encoder
|
20
23
|
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
import aiohttp
|
21
26
|
|
22
|
-
|
27
|
+
|
28
|
+
class MultipartResponse:
|
23
29
|
pangea_json: Dict[str, str]
|
24
30
|
attached_files: List = []
|
25
31
|
|
@@ -28,7 +34,7 @@ class MultipartResponse(object):
|
|
28
34
|
self.attached_files = attached_files
|
29
35
|
|
30
36
|
|
31
|
-
class PangeaRequestBase
|
37
|
+
class PangeaRequestBase:
|
32
38
|
def __init__(
|
33
39
|
self, config: PangeaConfig, token: str, service: str, logger: logging.Logger, config_id: Optional[str] = None
|
34
40
|
):
|
@@ -126,8 +132,7 @@ class PangeaRequestBase(object):
|
|
126
132
|
filename_parts = content_disposition.split("name=")
|
127
133
|
if len(filename_parts) > 1:
|
128
134
|
return filename_parts[1].split(";")[0].strip('"')
|
129
|
-
|
130
|
-
return None
|
135
|
+
return None
|
131
136
|
|
132
137
|
def _get_filename_from_url(self, url: str) -> Optional[str]:
|
133
138
|
return url.split("/")[-1].split("?")[0]
|
@@ -154,39 +159,42 @@ class PangeaRequestBase(object):
|
|
154
159
|
|
155
160
|
if status == ResponseStatus.VALIDATION_ERR.value:
|
156
161
|
raise pe.ValidationException(summary, response)
|
157
|
-
|
162
|
+
if status == ResponseStatus.TOO_MANY_REQUESTS.value:
|
158
163
|
raise pe.RateLimitException(summary, response)
|
159
|
-
|
164
|
+
if status == ResponseStatus.NO_CREDIT.value:
|
160
165
|
raise pe.NoCreditException(summary, response)
|
161
|
-
|
166
|
+
if status == ResponseStatus.UNAUTHORIZED.value:
|
162
167
|
raise pe.UnauthorizedException(self.service, response)
|
163
|
-
|
168
|
+
if status == ResponseStatus.SERVICE_NOT_ENABLED.value:
|
164
169
|
raise pe.ServiceNotEnabledException(self.service, response)
|
165
|
-
|
170
|
+
if status == ResponseStatus.PROVIDER_ERR.value:
|
166
171
|
raise pe.ProviderErrorException(summary, response)
|
167
|
-
|
172
|
+
if status in (ResponseStatus.MISSING_CONFIG_ID_SCOPE.value, ResponseStatus.MISSING_CONFIG_ID.value):
|
168
173
|
raise pe.MissingConfigID(self.service, response)
|
169
|
-
|
174
|
+
if status == ResponseStatus.SERVICE_NOT_AVAILABLE.value:
|
170
175
|
raise pe.ServiceNotAvailableException(summary, response)
|
171
|
-
|
176
|
+
if status == ResponseStatus.TREE_NOT_FOUND.value:
|
172
177
|
raise pe.TreeNotFoundException(summary, response)
|
173
|
-
|
178
|
+
if status == ResponseStatus.IP_NOT_FOUND.value:
|
174
179
|
raise pe.IPNotFoundException(summary, response)
|
175
|
-
|
180
|
+
if status == ResponseStatus.BAD_OFFSET.value:
|
176
181
|
raise pe.BadOffsetException(summary, response)
|
177
|
-
|
182
|
+
if status == ResponseStatus.FORBIDDEN_VAULT_OPERATION.value:
|
178
183
|
raise pe.ForbiddenVaultOperation(summary, response)
|
179
|
-
|
184
|
+
if status == ResponseStatus.VAULT_ITEM_NOT_FOUND.value:
|
180
185
|
raise pe.VaultItemNotFound(summary, response)
|
181
|
-
|
182
|
-
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
183
|
-
|
186
|
+
if status == ResponseStatus.NOT_FOUND.value:
|
187
|
+
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
188
|
+
if status == ResponseStatus.INTERNAL_SERVER_ERROR.value:
|
184
189
|
raise pe.InternalServerError(response)
|
185
|
-
|
190
|
+
if status == ResponseStatus.ACCEPTED.value:
|
186
191
|
raise pe.AcceptedRequestException(response)
|
187
192
|
raise pe.PangeaAPIException(f"{summary} ", response)
|
188
193
|
|
189
194
|
|
195
|
+
TResult = TypeVar("TResult", bound=PangeaResponseResult)
|
196
|
+
|
197
|
+
|
190
198
|
class PangeaRequest(PangeaRequestBase):
|
191
199
|
"""An object that makes direct calls to Pangea Service APIs.
|
192
200
|
|
@@ -202,12 +210,12 @@ class PangeaRequest(PangeaRequestBase):
|
|
202
210
|
def post(
|
203
211
|
self,
|
204
212
|
endpoint: str,
|
205
|
-
result_class: Type[
|
206
|
-
data:
|
213
|
+
result_class: Type[TResult],
|
214
|
+
data: str | BaseModel | dict[str, Any] | None = None,
|
207
215
|
files: Optional[List[Tuple]] = None,
|
208
216
|
poll_result: bool = True,
|
209
217
|
url: Optional[str] = None,
|
210
|
-
) -> PangeaResponse:
|
218
|
+
) -> PangeaResponse[TResult]:
|
211
219
|
"""Makes the POST call to a Pangea Service endpoint.
|
212
220
|
|
213
221
|
Args:
|
@@ -218,6 +226,16 @@ class PangeaRequest(PangeaRequestBase):
|
|
218
226
|
PangeaResponse which contains the response in its entirety and
|
219
227
|
various properties to retrieve individual fields
|
220
228
|
"""
|
229
|
+
|
230
|
+
if isinstance(data, BaseModel):
|
231
|
+
data = data.model_dump(exclude_none=True)
|
232
|
+
|
233
|
+
if data is None:
|
234
|
+
data = {}
|
235
|
+
|
236
|
+
# Normalize.
|
237
|
+
data = cast(dict[str, Any], to_jsonable_python(data))
|
238
|
+
|
221
239
|
if url is None:
|
222
240
|
url = self._url(endpoint)
|
223
241
|
|
@@ -318,32 +336,33 @@ class PangeaRequest(PangeaRequestBase):
|
|
318
336
|
return self.session.post(url, headers=headers, data=data_send, files=files)
|
319
337
|
|
320
338
|
def _http_post_process(
|
321
|
-
self,
|
339
|
+
self,
|
340
|
+
data: Union[str, Dict] = {},
|
341
|
+
files: Optional[Sequence[Tuple[str, Tuple[Any, str, str]]]] = None,
|
342
|
+
multipart_post: bool = True,
|
322
343
|
):
|
323
344
|
if files:
|
324
345
|
if multipart_post is True:
|
325
346
|
data_send: str = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
326
347
|
multi = [("request", (None, data_send, "application/json"))]
|
327
|
-
multi.extend(files)
|
328
|
-
files = multi
|
348
|
+
multi.extend(files)
|
349
|
+
files = multi
|
329
350
|
return None, files
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
342
|
-
return data_send, None
|
351
|
+
# Post to presigned url as form
|
352
|
+
data_send: list = [] # type: ignore[no-redef]
|
353
|
+
for k, v in data.items(): # type: ignore[union-attr]
|
354
|
+
data_send.append((k, v)) # type: ignore[attr-defined]
|
355
|
+
# When posting to presigned url, file key should be 'file'
|
356
|
+
files = { # type: ignore[assignment]
|
357
|
+
"file": files[0][1],
|
358
|
+
}
|
359
|
+
return data_send, files
|
360
|
+
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
361
|
+
return data_send, None
|
343
362
|
|
344
363
|
return data, files
|
345
364
|
|
346
|
-
def _handle_queued_result(self, response: PangeaResponse) -> PangeaResponse[
|
365
|
+
def _handle_queued_result(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
347
366
|
if self._queued_retry_enabled and response.http_status == 202:
|
348
367
|
self.logger.debug(
|
349
368
|
json.dumps(
|
@@ -355,7 +374,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
355
374
|
|
356
375
|
return response
|
357
376
|
|
358
|
-
def get(self, path: str, result_class: Type[
|
377
|
+
def get(self, path: str, result_class: Type[TResult], check_response: bool = True) -> PangeaResponse[TResult]:
|
359
378
|
"""Makes the GET call to a Pangea Service endpoint.
|
360
379
|
|
361
380
|
Args:
|
@@ -387,7 +406,20 @@ class PangeaRequest(PangeaRequestBase):
|
|
387
406
|
|
388
407
|
return self._check_response(pangea_response)
|
389
408
|
|
390
|
-
def download_file(self, url: str, filename:
|
409
|
+
def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
|
410
|
+
"""
|
411
|
+
Download file
|
412
|
+
|
413
|
+
Download a file from the specified URL and save it with the given
|
414
|
+
filename.
|
415
|
+
|
416
|
+
Args:
|
417
|
+
url: URL of the file to download
|
418
|
+
filename: Name to save the downloaded file as. If not provided, the
|
419
|
+
filename will be determined from the Content-Disposition header or
|
420
|
+
the URL.
|
421
|
+
"""
|
422
|
+
|
391
423
|
self.logger.debug(
|
392
424
|
json.dumps(
|
393
425
|
{
|
@@ -423,25 +455,24 @@ class PangeaRequest(PangeaRequestBase):
|
|
423
455
|
)
|
424
456
|
)
|
425
457
|
return AttachedFile(filename=filename, file=response.content, content_type=content_type)
|
426
|
-
|
427
|
-
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
458
|
+
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
428
459
|
|
429
460
|
def poll_result_by_id(
|
430
|
-
self, request_id: str, result_class:
|
431
|
-
):
|
461
|
+
self, request_id: str, result_class: Type[TResult], check_response: bool = True
|
462
|
+
) -> PangeaResponse[TResult]:
|
432
463
|
path = self._get_poll_path(request_id)
|
433
464
|
self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_once", "url": path}))
|
434
|
-
return self.get(path, result_class, check_response=check_response)
|
465
|
+
return self.get(path, result_class, check_response=check_response)
|
435
466
|
|
436
467
|
def poll_result_once(
|
437
|
-
self, response: PangeaResponse, check_response: bool = True
|
438
|
-
) -> PangeaResponse[
|
468
|
+
self, response: PangeaResponse[TResult], check_response: bool = True
|
469
|
+
) -> PangeaResponse[TResult]:
|
439
470
|
request_id = response.request_id
|
440
471
|
if not request_id:
|
441
472
|
raise pe.PangeaException("Poll result error: response did not include a 'request_id'")
|
442
473
|
|
443
474
|
if response.status != ResponseStatus.ACCEPTED.value:
|
444
|
-
raise pe.PangeaException("Response already
|
475
|
+
raise pe.PangeaException("Response already processed")
|
445
476
|
|
446
477
|
return self.poll_result_by_id(request_id, response.result_class, check_response=check_response)
|
447
478
|
|
@@ -526,7 +557,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
526
557
|
self.post_presigned_url(url=presigned_url, data=data_to_presigned, files=files)
|
527
558
|
return response.raw_response
|
528
559
|
|
529
|
-
def _poll_result_retry(self, response: PangeaResponse) -> PangeaResponse[
|
560
|
+
def _poll_result_retry(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
530
561
|
retry_count = 1
|
531
562
|
start = time.time()
|
532
563
|
|
@@ -538,9 +569,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
538
569
|
self.logger.debug(json.dumps({"service": self.service, "action": "poll_result_retry", "step": "exit"}))
|
539
570
|
return self._check_response(response)
|
540
571
|
|
541
|
-
def _poll_presigned_url(
|
542
|
-
self, response: PangeaResponse[Type[PangeaResponseResult]]
|
543
|
-
) -> PangeaResponse[Type[PangeaResponseResult]]:
|
572
|
+
def _poll_presigned_url(self, response: PangeaResponse[TResult]) -> PangeaResponse[TResult]:
|
544
573
|
if response.http_status != 202:
|
545
574
|
raise AttributeError("Response should be 202")
|
546
575
|
|
@@ -583,8 +612,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
583
612
|
|
584
613
|
if loop_resp.accepted_result is not None and not loop_resp.accepted_result.has_upload_url:
|
585
614
|
return loop_resp
|
586
|
-
|
587
|
-
raise loop_exc
|
615
|
+
raise loop_exc
|
588
616
|
|
589
617
|
def _init_session(self) -> requests.Session:
|
590
618
|
retry_config = Retry(
|
pangea/response.py
CHANGED
@@ -3,16 +3,15 @@
|
|
3
3
|
import datetime
|
4
4
|
import enum
|
5
5
|
import os
|
6
|
-
from typing import Any, Dict, Generic, List, Optional, Type,
|
6
|
+
from typing import Any, Dict, Generic, List, Optional, Type, Union
|
7
7
|
|
8
8
|
import aiohttp
|
9
9
|
import requests
|
10
|
-
from pydantic import BaseModel
|
10
|
+
from pydantic import BaseModel, ConfigDict, PlainSerializer
|
11
|
+
from typing_extensions import Annotated, TypeVar
|
11
12
|
|
12
13
|
from pangea.utils import format_datetime
|
13
14
|
|
14
|
-
T = TypeVar("T")
|
15
|
-
|
16
15
|
|
17
16
|
class AttachedFile(object):
|
18
17
|
filename: str
|
@@ -50,10 +49,24 @@ class AttachedFile(object):
|
|
50
49
|
|
51
50
|
|
52
51
|
class TransferMethod(str, enum.Enum):
|
52
|
+
"""Transfer methods for uploading file data."""
|
53
|
+
|
53
54
|
MULTIPART = "multipart"
|
54
55
|
POST_URL = "post-url"
|
55
56
|
PUT_URL = "put-url"
|
56
57
|
SOURCE_URL = "source-url"
|
58
|
+
"""
|
59
|
+
A `source-url` is a caller-specified URL where the Pangea APIs can fetch the
|
60
|
+
contents of the input file. When calling a Pangea API with a
|
61
|
+
`transfer_method` of `source-url`, you must also specify a `source_url`
|
62
|
+
input parameter that provides a URL to the input file. The source URL can be
|
63
|
+
a presigned URL created by the caller, and it will be used to download the
|
64
|
+
content of the input file. The `source-url` transfer method is useful when
|
65
|
+
you already have a file in your storage and can provide a URL from which
|
66
|
+
Pangea API can fetch the input file—there is no need to transfer it to
|
67
|
+
Pangea with a separate POST or PUT request.
|
68
|
+
"""
|
69
|
+
|
57
70
|
DEST_URL = "dest-url"
|
58
71
|
|
59
72
|
def __str__(self):
|
@@ -63,24 +76,17 @@ class TransferMethod(str, enum.Enum):
|
|
63
76
|
return str(self.value)
|
64
77
|
|
65
78
|
|
79
|
+
PangeaDateTime = Annotated[datetime.datetime, PlainSerializer(format_datetime)]
|
80
|
+
|
81
|
+
|
66
82
|
# API response should accept arbitrary fields to make them accept possible new parameters
|
67
83
|
class APIResponseModel(BaseModel):
|
68
|
-
|
69
|
-
arbitrary_types_allowed = True
|
70
|
-
# allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
71
|
-
extra = "allow"
|
84
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
72
85
|
|
73
86
|
|
74
87
|
# API request models doesn't not allow arbitrary fields
|
75
88
|
class APIRequestModel(BaseModel):
|
76
|
-
|
77
|
-
arbitrary_types_allowed = True
|
78
|
-
extra = (
|
79
|
-
"allow" # allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
80
|
-
)
|
81
|
-
json_encoders = {
|
82
|
-
datetime.datetime: format_datetime,
|
83
|
-
}
|
89
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
84
90
|
|
85
91
|
|
86
92
|
class PangeaResponseResult(APIResponseModel):
|
@@ -155,38 +161,50 @@ class ResponseStatus(str, enum.Enum):
|
|
155
161
|
|
156
162
|
|
157
163
|
class ResponseHeader(APIResponseModel):
|
158
|
-
"""
|
159
|
-
Pangea response API header.
|
160
|
-
|
161
|
-
Arguments:
|
162
|
-
request_id -- The request ID.
|
163
|
-
request_time -- The time the request was issued, ISO8601.
|
164
|
-
response_time -- The time the response was issued, ISO8601.
|
165
|
-
status -- Pangea response status
|
166
|
-
summary -- The summary of the response.
|
167
|
-
"""
|
164
|
+
"""Pangea response API header."""
|
168
165
|
|
169
166
|
request_id: str
|
167
|
+
"""A unique identifier assigned to each request made to the API."""
|
168
|
+
|
170
169
|
request_time: str
|
170
|
+
"""
|
171
|
+
Timestamp indicating the exact moment when a request is made to the API.
|
172
|
+
"""
|
173
|
+
|
171
174
|
response_time: str
|
175
|
+
"""
|
176
|
+
Duration it takes for the API to process a request and generate a response.
|
177
|
+
"""
|
178
|
+
|
172
179
|
status: str
|
180
|
+
"""
|
181
|
+
Represents the status or outcome of the API request.
|
182
|
+
"""
|
183
|
+
|
173
184
|
summary: str
|
185
|
+
"""
|
186
|
+
Provides a concise and brief overview of the purpose or primary objective of
|
187
|
+
the API endpoint.
|
188
|
+
"""
|
189
|
+
|
190
|
+
|
191
|
+
T = TypeVar("T", bound=PangeaResponseResult)
|
174
192
|
|
175
193
|
|
176
|
-
class PangeaResponse(Generic[T]
|
194
|
+
class PangeaResponse(ResponseHeader, Generic[T]):
|
177
195
|
raw_result: Optional[Dict[str, Any]] = None
|
178
196
|
raw_response: Optional[Union[requests.Response, aiohttp.ClientResponse]] = None
|
179
197
|
result: Optional[T] = None
|
180
198
|
pangea_error: Optional[PangeaError] = None
|
181
199
|
accepted_result: Optional[AcceptedResult] = None
|
182
|
-
result_class:
|
200
|
+
result_class: Type[T] = PangeaResponseResult # type: ignore[assignment]
|
183
201
|
_json: Any
|
184
202
|
attached_files: List[AttachedFile] = []
|
185
203
|
|
186
204
|
def __init__(
|
187
205
|
self,
|
188
206
|
response: requests.Response,
|
189
|
-
result_class:
|
207
|
+
result_class: Type[T],
|
190
208
|
json: dict,
|
191
209
|
attached_files: List[AttachedFile] = [],
|
192
210
|
):
|
@@ -198,7 +216,7 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
198
216
|
self.attached_files = attached_files
|
199
217
|
|
200
218
|
self.result = (
|
201
|
-
self.result_class(**self.raw_result)
|
219
|
+
self.result_class(**self.raw_result)
|
202
220
|
if self.raw_result is not None and issubclass(self.result_class, PangeaResponseResult) and self.success
|
203
221
|
else None
|
204
222
|
)
|
@@ -230,4 +248,4 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
230
248
|
|
231
249
|
@property
|
232
250
|
def url(self) -> str:
|
233
|
-
return str(self.raw_response.url) # type: ignore[
|
251
|
+
return str(self.raw_response.url) # type: ignore[union-attr]
|
pangea/services/__init__.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
from .ai_guard import AIGuard
|
1
2
|
from .audit.audit import Audit
|
2
3
|
from .authn.authn import AuthN
|
4
|
+
from .authz import AuthZ
|
3
5
|
from .embargo import Embargo
|
4
6
|
from .file_scan import FileScan
|
5
7
|
from .intel import DomainIntel, FileIntel, IpIntel, UrlIntel, UserIntel
|
8
|
+
from .prompt_guard import PromptGuard
|
6
9
|
from .redact import Redact
|
10
|
+
from .sanitize import Sanitize
|
7
11
|
from .share.share import Share
|
8
12
|
from .vault.vault import Vault
|