pangea-sdk 3.8.0__py3-none-any.whl → 5.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pangea/__init__.py +2 -1
- pangea/asyncio/__init__.py +1 -0
- pangea/asyncio/file_uploader.py +39 -0
- pangea/asyncio/request.py +46 -23
- pangea/asyncio/services/__init__.py +2 -0
- pangea/asyncio/services/audit.py +46 -20
- pangea/asyncio/services/authn.py +123 -61
- pangea/asyncio/services/authz.py +57 -31
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +104 -30
- pangea/asyncio/services/redact.py +52 -3
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +733 -0
- pangea/asyncio/services/vault.py +1709 -766
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/file_uploader.py +35 -0
- pangea/request.py +70 -49
- pangea/response.py +36 -17
- pangea/services/__init__.py +2 -0
- pangea/services/audit/audit.py +57 -29
- pangea/services/audit/models.py +12 -3
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +120 -66
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +53 -30
- pangea/services/base.py +16 -2
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +155 -30
- pangea/services/redact.py +132 -3
- pangea/services/sanitize.py +388 -0
- pangea/services/share/file_format.py +170 -0
- pangea/services/share/share.py +1440 -0
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -766
- pangea/tools.py +6 -7
- pangea/utils.py +94 -33
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/METADATA +21 -29
- pangea_sdk-5.3.0.dist-info/RECORD +56 -0
- {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0.dist-info/RECORD +0 -46
pangea/crypto/rsa.py
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from cryptography.hazmat.backends import default_backend
|
7
|
+
from cryptography.hazmat.primitives import hashes, serialization
|
8
|
+
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
9
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
10
|
+
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
11
|
+
|
12
|
+
from pangea.services.vault.models.common import ExportEncryptionAlgorithm
|
13
|
+
from pangea.services.vault.models.symmetric import SymmetricKeyEncryptionAlgorithm
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from pangea.services.vault.models.common import ExportResult
|
17
|
+
|
18
|
+
|
19
|
+
def generate_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:
|
20
|
+
# Generate a 4096-bit RSA key pair
|
21
|
+
private_key = rsa.generate_private_key(
|
22
|
+
public_exponent=65537,
|
23
|
+
key_size=4096,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Extract the public key from the private key
|
27
|
+
public_key = private_key.public_key()
|
28
|
+
return private_key, public_key
|
29
|
+
|
30
|
+
|
31
|
+
def decrypt_sha512(private_key: rsa.RSAPrivateKey, encrypted_message: bytes) -> bytes:
|
32
|
+
# Decrypt the message using the private key and OAEP padding
|
33
|
+
return private_key.decrypt(
|
34
|
+
encrypted_message,
|
35
|
+
padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None),
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
def encrypt_sha512(public_key: rsa.RSAPublicKey, message: bytes) -> bytes:
|
40
|
+
# Encrypt the message using the public key and OAEP padding
|
41
|
+
return public_key.encrypt(
|
42
|
+
message, padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None)
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
def private_key_to_pem(private_key: rsa.RSAPrivateKey) -> bytes:
|
47
|
+
# Serialize private key to PEM format
|
48
|
+
return private_key.private_bytes(
|
49
|
+
encoding=serialization.Encoding.PEM,
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
51
|
+
encryption_algorithm=serialization.NoEncryption(),
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def public_key_to_pem(public_key: rsa.RSAPublicKey) -> bytes:
|
56
|
+
# Serialize public key to PEM format
|
57
|
+
return public_key.public_bytes(
|
58
|
+
encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
_AES_GCM_IV_SIZE = 12
|
63
|
+
"""Standard nonce size for GCM."""
|
64
|
+
|
65
|
+
_KEY_LENGTH = 32
|
66
|
+
"""AES-256 key length in bytes."""
|
67
|
+
|
68
|
+
|
69
|
+
def kem_decrypt(
|
70
|
+
private_key: rsa.RSAPrivateKey,
|
71
|
+
iv: bytes,
|
72
|
+
ciphertext: bytes,
|
73
|
+
symmetric_algorithm: str,
|
74
|
+
asymmetric_algorithm: str,
|
75
|
+
encrypted_salt: bytes,
|
76
|
+
password: str,
|
77
|
+
iteration_count: int,
|
78
|
+
hash_algorithm: str,
|
79
|
+
) -> str:
|
80
|
+
if symmetric_algorithm.casefold() != SymmetricKeyEncryptionAlgorithm.AES_GCM_256.value.casefold():
|
81
|
+
raise NotImplementedError(f"Unsupported symmetric algorithm: {symmetric_algorithm}")
|
82
|
+
|
83
|
+
if asymmetric_algorithm != ExportEncryptionAlgorithm.RSA_NO_PADDING_4096_KEM:
|
84
|
+
raise NotImplementedError(f"Unsupported asymmetric algorithm: {asymmetric_algorithm}")
|
85
|
+
|
86
|
+
if hash_algorithm.casefold() != "SHA512".casefold():
|
87
|
+
raise NotImplementedError(f"Unsupported hash algorithm: {hash_algorithm}")
|
88
|
+
|
89
|
+
# No-padding RSA decryption.
|
90
|
+
n = private_key.private_numbers().public_numbers.n
|
91
|
+
salt = pow(
|
92
|
+
int.from_bytes(encrypted_salt, byteorder="big"),
|
93
|
+
private_key.private_numbers().d,
|
94
|
+
n,
|
95
|
+
).to_bytes(n.bit_length() // 8, byteorder="big")
|
96
|
+
|
97
|
+
kdf = PBKDF2HMAC(
|
98
|
+
algorithm=hashes.SHA512(), length=_KEY_LENGTH, salt=salt, iterations=iteration_count, backend=default_backend()
|
99
|
+
)
|
100
|
+
symmetric_key = kdf.derive(password.encode("utf-8"))
|
101
|
+
|
102
|
+
decrypted = AESGCM(symmetric_key).decrypt(nonce=iv, data=ciphertext, associated_data=None)
|
103
|
+
|
104
|
+
return decrypted.decode("ascii")
|
105
|
+
|
106
|
+
|
107
|
+
def kem_decrypt_export_result(*, result: ExportResult, password: str, private_key: rsa.RSAPrivateKey) -> str:
|
108
|
+
"""Decrypt the exported result of a KEM operation."""
|
109
|
+
cipher_encoded = result.private_key or result.key
|
110
|
+
if not cipher_encoded:
|
111
|
+
raise TypeError("`private_key` or `key` should be set.")
|
112
|
+
|
113
|
+
assert result.encrypted_salt
|
114
|
+
assert result.symmetric_algorithm
|
115
|
+
assert result.asymmetric_algorithm
|
116
|
+
assert result.iteration_count
|
117
|
+
assert result.hash_algorithm
|
118
|
+
|
119
|
+
cipher_with_iv = base64.b64decode(cipher_encoded)
|
120
|
+
encrypted_salt = base64.b64decode(result.encrypted_salt)
|
121
|
+
|
122
|
+
iv = cipher_with_iv[:_AES_GCM_IV_SIZE]
|
123
|
+
cipher = cipher_with_iv[_AES_GCM_IV_SIZE:]
|
124
|
+
|
125
|
+
return kem_decrypt(
|
126
|
+
private_key=private_key,
|
127
|
+
iv=iv,
|
128
|
+
ciphertext=cipher,
|
129
|
+
password=password,
|
130
|
+
encrypted_salt=encrypted_salt,
|
131
|
+
symmetric_algorithm=result.symmetric_algorithm,
|
132
|
+
asymmetric_algorithm=result.asymmetric_algorithm,
|
133
|
+
iteration_count=result.iteration_count,
|
134
|
+
hash_algorithm=result.hash_algorithm,
|
135
|
+
)
|
pangea/deep_verify.py
CHANGED
@@ -263,8 +263,14 @@ def main():
|
|
263
263
|
audit = init_audit(args.token, args.domain)
|
264
264
|
errors = deep_verify(audit, args.file)
|
265
265
|
|
266
|
-
print("\n\
|
266
|
+
print("\n\nWarnings:")
|
267
|
+
val = errors["not_persisted"]
|
268
|
+
print(f"\tnot_persisted: {val}")
|
269
|
+
|
270
|
+
print("\nTotal errors:")
|
267
271
|
for key, val in errors.items():
|
272
|
+
if key == "not_persisted":
|
273
|
+
continue
|
268
274
|
print(f"\t{key.title()}: {val}")
|
269
275
|
print()
|
270
276
|
|
pangea/dump_audit.py
CHANGED
@@ -19,7 +19,7 @@ from pangea.utils import default_encoder
|
|
19
19
|
|
20
20
|
|
21
21
|
def dump_event(output: io.TextIOWrapper, row: SearchEvent, resp: PangeaResponse[SearchOutput]):
|
22
|
-
row_data = filter_deep_none(row.
|
22
|
+
row_data = filter_deep_none(row.model_dump())
|
23
23
|
if resp.result and resp.result.root:
|
24
24
|
row_data["tree_size"] = resp.result.root.size
|
25
25
|
output.write(json.dumps(row_data, default=default_encoder) + "\n")
|
@@ -63,11 +63,12 @@ def dump_before(audit: Audit, output: io.TextIOWrapper, start: datetime) -> int:
|
|
63
63
|
cnt = 0
|
64
64
|
if search_res.result and search_res.result.count > 0:
|
65
65
|
leaf_index = search_res.result.events[0].leaf_index
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
if leaf_index is not None:
|
67
|
+
for row in reversed(search_res.result.events):
|
68
|
+
if row.leaf_index != leaf_index:
|
69
|
+
break
|
70
|
+
dump_event(output, row, search_res)
|
71
|
+
cnt += 1
|
71
72
|
print(f"Dumping before... {cnt} events")
|
72
73
|
return cnt
|
73
74
|
|
@@ -89,7 +90,7 @@ def dump_after(audit: Audit, output: io.TextIOWrapper, start: datetime, last_eve
|
|
89
90
|
cnt = 0
|
90
91
|
if search_res.result and search_res.result.count > 0:
|
91
92
|
leaf_index = search_res.result.events[0].leaf_index
|
92
|
-
if leaf_index == last_leaf_index:
|
93
|
+
if leaf_index is not None and leaf_index == last_leaf_index:
|
93
94
|
start_idx: int = 1 if last_event_hash == search_res.result.events[0].hash else 0
|
94
95
|
for row in search_res.result.events[start_idx:]:
|
95
96
|
if row.leaf_index != leaf_index:
|
@@ -124,7 +125,7 @@ def dump_page(
|
|
124
125
|
msg = f"Dumping... {search_res.result.count} events"
|
125
126
|
|
126
127
|
if search_res.result.count <= 1:
|
127
|
-
return end, 0
|
128
|
+
return end, 0, True, "", 0
|
128
129
|
|
129
130
|
offset = 0
|
130
131
|
result_id = search_res.result.id
|
pangea/file_uploader.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright 2022 Pangea Cyber Corporation
|
2
|
+
# Author: Pangea Cyber Corporation
|
3
|
+
import io
|
4
|
+
import logging
|
5
|
+
from typing import Dict, Optional
|
6
|
+
|
7
|
+
from pangea.request import PangeaConfig, PangeaRequest
|
8
|
+
from pangea.response import TransferMethod
|
9
|
+
|
10
|
+
|
11
|
+
class FileUploader:
|
12
|
+
def __init__(self):
|
13
|
+
self.logger = logging.getLogger("pangea")
|
14
|
+
self._request = PangeaRequest(
|
15
|
+
config=PangeaConfig(),
|
16
|
+
token="",
|
17
|
+
service="FileUploader",
|
18
|
+
logger=self.logger,
|
19
|
+
)
|
20
|
+
|
21
|
+
def upload_file(
|
22
|
+
self,
|
23
|
+
url: str,
|
24
|
+
file: io.BufferedReader,
|
25
|
+
transfer_method: TransferMethod = TransferMethod.PUT_URL,
|
26
|
+
file_details: Optional[Dict] = None,
|
27
|
+
):
|
28
|
+
if transfer_method == TransferMethod.PUT_URL:
|
29
|
+
files = [("file", ("filename", file, "application/octet-stream"))]
|
30
|
+
self._request.put_presigned_url(url=url, files=files)
|
31
|
+
elif transfer_method == TransferMethod.POST_URL:
|
32
|
+
files = [("file", ("filename", file, "application/octet-stream"))]
|
33
|
+
self._request.post_presigned_url(url=url, data=file_details, files=files)
|
34
|
+
else:
|
35
|
+
raise ValueError(f"Transfer method not supported: {transfer_method}")
|
pangea/request.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import copy
|
5
6
|
import json
|
6
7
|
import logging
|
7
8
|
import time
|
8
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
|
9
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Type, Union
|
9
10
|
|
10
11
|
import requests
|
12
|
+
from pydantic import BaseModel
|
11
13
|
from requests.adapters import HTTPAdapter, Retry
|
12
|
-
from requests_toolbelt import MultipartDecoder # type: ignore
|
14
|
+
from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
|
13
15
|
from typing_extensions import TypeVar
|
14
16
|
|
15
17
|
import pangea
|
@@ -22,7 +24,7 @@ if TYPE_CHECKING:
|
|
22
24
|
import aiohttp
|
23
25
|
|
24
26
|
|
25
|
-
class MultipartResponse
|
27
|
+
class MultipartResponse:
|
26
28
|
pangea_json: Dict[str, str]
|
27
29
|
attached_files: List = []
|
28
30
|
|
@@ -31,7 +33,7 @@ class MultipartResponse(object):
|
|
31
33
|
self.attached_files = attached_files
|
32
34
|
|
33
35
|
|
34
|
-
class PangeaRequestBase
|
36
|
+
class PangeaRequestBase:
|
35
37
|
def __init__(
|
36
38
|
self, config: PangeaConfig, token: str, service: str, logger: logging.Logger, config_id: Optional[str] = None
|
37
39
|
):
|
@@ -129,8 +131,7 @@ class PangeaRequestBase(object):
|
|
129
131
|
filename_parts = content_disposition.split("name=")
|
130
132
|
if len(filename_parts) > 1:
|
131
133
|
return filename_parts[1].split(";")[0].strip('"')
|
132
|
-
|
133
|
-
return None
|
134
|
+
return None
|
134
135
|
|
135
136
|
def _get_filename_from_url(self, url: str) -> Optional[str]:
|
136
137
|
return url.split("/")[-1].split("?")[0]
|
@@ -157,40 +158,40 @@ class PangeaRequestBase(object):
|
|
157
158
|
|
158
159
|
if status == ResponseStatus.VALIDATION_ERR.value:
|
159
160
|
raise pe.ValidationException(summary, response)
|
160
|
-
|
161
|
+
if status == ResponseStatus.TOO_MANY_REQUESTS.value:
|
161
162
|
raise pe.RateLimitException(summary, response)
|
162
|
-
|
163
|
+
if status == ResponseStatus.NO_CREDIT.value:
|
163
164
|
raise pe.NoCreditException(summary, response)
|
164
|
-
|
165
|
+
if status == ResponseStatus.UNAUTHORIZED.value:
|
165
166
|
raise pe.UnauthorizedException(self.service, response)
|
166
|
-
|
167
|
+
if status == ResponseStatus.SERVICE_NOT_ENABLED.value:
|
167
168
|
raise pe.ServiceNotEnabledException(self.service, response)
|
168
|
-
|
169
|
+
if status == ResponseStatus.PROVIDER_ERR.value:
|
169
170
|
raise pe.ProviderErrorException(summary, response)
|
170
|
-
|
171
|
+
if status in (ResponseStatus.MISSING_CONFIG_ID_SCOPE.value, ResponseStatus.MISSING_CONFIG_ID.value):
|
171
172
|
raise pe.MissingConfigID(self.service, response)
|
172
|
-
|
173
|
+
if status == ResponseStatus.SERVICE_NOT_AVAILABLE.value:
|
173
174
|
raise pe.ServiceNotAvailableException(summary, response)
|
174
|
-
|
175
|
+
if status == ResponseStatus.TREE_NOT_FOUND.value:
|
175
176
|
raise pe.TreeNotFoundException(summary, response)
|
176
|
-
|
177
|
+
if status == ResponseStatus.IP_NOT_FOUND.value:
|
177
178
|
raise pe.IPNotFoundException(summary, response)
|
178
|
-
|
179
|
+
if status == ResponseStatus.BAD_OFFSET.value:
|
179
180
|
raise pe.BadOffsetException(summary, response)
|
180
|
-
|
181
|
+
if status == ResponseStatus.FORBIDDEN_VAULT_OPERATION.value:
|
181
182
|
raise pe.ForbiddenVaultOperation(summary, response)
|
182
|
-
|
183
|
+
if status == ResponseStatus.VAULT_ITEM_NOT_FOUND.value:
|
183
184
|
raise pe.VaultItemNotFound(summary, response)
|
184
|
-
|
185
|
-
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
186
|
-
|
185
|
+
if status == ResponseStatus.NOT_FOUND.value:
|
186
|
+
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
187
|
+
if status == ResponseStatus.INTERNAL_SERVER_ERROR.value:
|
187
188
|
raise pe.InternalServerError(response)
|
188
|
-
|
189
|
+
if status == ResponseStatus.ACCEPTED.value:
|
189
190
|
raise pe.AcceptedRequestException(response)
|
190
191
|
raise pe.PangeaAPIException(f"{summary} ", response)
|
191
192
|
|
192
193
|
|
193
|
-
TResult = TypeVar("TResult", bound=PangeaResponseResult
|
194
|
+
TResult = TypeVar("TResult", bound=PangeaResponseResult)
|
194
195
|
|
195
196
|
|
196
197
|
class PangeaRequest(PangeaRequestBase):
|
@@ -209,7 +210,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
209
210
|
self,
|
210
211
|
endpoint: str,
|
211
212
|
result_class: Type[TResult],
|
212
|
-
data:
|
213
|
+
data: str | BaseModel | dict[str, Any] | None = None,
|
213
214
|
files: Optional[List[Tuple]] = None,
|
214
215
|
poll_result: bool = True,
|
215
216
|
url: Optional[str] = None,
|
@@ -224,6 +225,13 @@ class PangeaRequest(PangeaRequestBase):
|
|
224
225
|
PangeaResponse which contains the response in its entirety and
|
225
226
|
various properties to retrieve individual fields
|
226
227
|
"""
|
228
|
+
|
229
|
+
if isinstance(data, BaseModel):
|
230
|
+
data = data.model_dump(exclude_none=True)
|
231
|
+
|
232
|
+
if data is None:
|
233
|
+
data = {}
|
234
|
+
|
227
235
|
if url is None:
|
228
236
|
url = self._url(endpoint)
|
229
237
|
|
@@ -324,28 +332,29 @@ class PangeaRequest(PangeaRequestBase):
|
|
324
332
|
return self.session.post(url, headers=headers, data=data_send, files=files)
|
325
333
|
|
326
334
|
def _http_post_process(
|
327
|
-
self,
|
335
|
+
self,
|
336
|
+
data: Union[str, Dict] = {},
|
337
|
+
files: Optional[Sequence[Tuple[str, Tuple[Any, str, str]]]] = None,
|
338
|
+
multipart_post: bool = True,
|
328
339
|
):
|
329
340
|
if files:
|
330
341
|
if multipart_post is True:
|
331
342
|
data_send: str = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
332
343
|
multi = [("request", (None, data_send, "application/json"))]
|
333
|
-
multi.extend(files)
|
334
|
-
files = multi
|
344
|
+
multi.extend(files)
|
345
|
+
files = multi
|
335
346
|
return None, files
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
348
|
-
return data_send, None
|
347
|
+
# Post to presigned url as form
|
348
|
+
data_send: list = [] # type: ignore[no-redef]
|
349
|
+
for k, v in data.items(): # type: ignore[union-attr]
|
350
|
+
data_send.append((k, v)) # type: ignore[attr-defined]
|
351
|
+
# When posting to presigned url, file key should be 'file'
|
352
|
+
files = { # type: ignore[assignment]
|
353
|
+
"file": files[0][1],
|
354
|
+
}
|
355
|
+
return data_send, files
|
356
|
+
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
357
|
+
return data_send, None
|
349
358
|
|
350
359
|
return data, files
|
351
360
|
|
@@ -393,7 +402,20 @@ class PangeaRequest(PangeaRequestBase):
|
|
393
402
|
|
394
403
|
return self._check_response(pangea_response)
|
395
404
|
|
396
|
-
def download_file(self, url: str, filename:
|
405
|
+
def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
|
406
|
+
"""
|
407
|
+
Download file
|
408
|
+
|
409
|
+
Download a file from the specified URL and save it with the given
|
410
|
+
filename.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
url: URL of the file to download
|
414
|
+
filename: Name to save the downloaded file as. If not provided, the
|
415
|
+
filename will be determined from the Content-Disposition header or
|
416
|
+
the URL.
|
417
|
+
"""
|
418
|
+
|
397
419
|
self.logger.debug(
|
398
420
|
json.dumps(
|
399
421
|
{
|
@@ -429,8 +451,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
429
451
|
)
|
430
452
|
)
|
431
453
|
return AttachedFile(filename=filename, file=response.content, content_type=content_type)
|
432
|
-
|
433
|
-
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
454
|
+
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
434
455
|
|
435
456
|
def poll_result_by_id(
|
436
457
|
self, request_id: str, result_class: Type[TResult], check_response: bool = True
|
@@ -459,10 +480,8 @@ class PangeaRequest(PangeaRequestBase):
|
|
459
480
|
) -> PangeaResponse:
|
460
481
|
# Send request
|
461
482
|
try:
|
462
|
-
# This should return 202 (AcceptedRequestException)
|
463
|
-
|
464
|
-
raise pe.PresignedURLException("Should return 202", resp)
|
465
|
-
|
483
|
+
# This should return 202 (AcceptedRequestException) at least zero size file is sent
|
484
|
+
return self.post(endpoint=endpoint, result_class=result_class, data=data, poll_result=False)
|
466
485
|
except pe.AcceptedRequestException as e:
|
467
486
|
accepted_exception = e
|
468
487
|
except Exception as e:
|
@@ -520,6 +539,9 @@ class PangeaRequest(PangeaRequestBase):
|
|
520
539
|
raise AttributeError("files attribute should have at least 1 file")
|
521
540
|
|
522
541
|
response = self.request_presigned_url(endpoint=endpoint, result_class=result_class, data=data)
|
542
|
+
|
543
|
+
if response.success: # This should only happen when uploading a zero bytes file
|
544
|
+
return response.raw_response
|
523
545
|
if response.accepted_result is None:
|
524
546
|
raise pe.PangeaException("No accepted_result field when requesting presigned url")
|
525
547
|
if response.accepted_result.post_url is None:
|
@@ -586,8 +608,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
586
608
|
|
587
609
|
if loop_resp.accepted_result is not None and not loop_resp.accepted_result.has_upload_url:
|
588
610
|
return loop_resp
|
589
|
-
|
590
|
-
raise loop_exc
|
611
|
+
raise loop_exc
|
591
612
|
|
592
613
|
def _init_session(self) -> requests.Session:
|
593
614
|
retry_config = Retry(
|
pangea/response.py
CHANGED
@@ -7,8 +7,8 @@ from typing import Any, Dict, Generic, List, Optional, Type, Union
|
|
7
7
|
|
8
8
|
import aiohttp
|
9
9
|
import requests
|
10
|
-
from pydantic import BaseModel
|
11
|
-
from typing_extensions import TypeVar
|
10
|
+
from pydantic import BaseModel, ConfigDict, PlainSerializer
|
11
|
+
from typing_extensions import Annotated, TypeVar
|
12
12
|
|
13
13
|
from pangea.utils import format_datetime
|
14
14
|
|
@@ -28,6 +28,7 @@ class AttachedFile(object):
|
|
28
28
|
filename = self.filename if self.filename else "default_save_filename"
|
29
29
|
|
30
30
|
filepath = os.path.join(dest_folder, filename)
|
31
|
+
filepath = self._find_available_file(filepath)
|
31
32
|
directory = os.path.dirname(filepath)
|
32
33
|
if not os.path.exists(directory):
|
33
34
|
os.makedirs(directory)
|
@@ -35,12 +36,37 @@ class AttachedFile(object):
|
|
35
36
|
with open(filepath, "wb") as file:
|
36
37
|
file.write(self.file)
|
37
38
|
|
39
|
+
def _find_available_file(self, file_path):
|
40
|
+
base_name, ext = os.path.splitext(file_path)
|
41
|
+
counter = 1
|
42
|
+
while os.path.exists(file_path):
|
43
|
+
if ext:
|
44
|
+
file_path = f"{base_name}_{counter}{ext}"
|
45
|
+
else:
|
46
|
+
file_path = f"{base_name}_{counter}"
|
47
|
+
counter += 1
|
48
|
+
return file_path
|
49
|
+
|
38
50
|
|
39
51
|
class TransferMethod(str, enum.Enum):
|
52
|
+
"""Transfer methods for uploading file data."""
|
53
|
+
|
40
54
|
MULTIPART = "multipart"
|
41
55
|
POST_URL = "post-url"
|
42
56
|
PUT_URL = "put-url"
|
43
57
|
SOURCE_URL = "source-url"
|
58
|
+
"""
|
59
|
+
A `source-url` is a caller-specified URL where the Pangea APIs can fetch the
|
60
|
+
contents of the input file. When calling a Pangea API with a
|
61
|
+
`transfer_method` of `source-url`, you must also specify a `source_url`
|
62
|
+
input parameter that provides a URL to the input file. The source URL can be
|
63
|
+
a presigned URL created by the caller, and it will be used to download the
|
64
|
+
content of the input file. The `source-url` transfer method is useful when
|
65
|
+
you already have a file in your storage and can provide a URL from which
|
66
|
+
Pangea API can fetch the input file—there is no need to transfer it to
|
67
|
+
Pangea with a separate POST or PUT request.
|
68
|
+
"""
|
69
|
+
|
44
70
|
DEST_URL = "dest-url"
|
45
71
|
|
46
72
|
def __str__(self):
|
@@ -50,24 +76,17 @@ class TransferMethod(str, enum.Enum):
|
|
50
76
|
return str(self.value)
|
51
77
|
|
52
78
|
|
79
|
+
PangeaDateTime = Annotated[datetime.datetime, PlainSerializer(format_datetime)]
|
80
|
+
|
81
|
+
|
53
82
|
# API response should accept arbitrary fields to make them accept possible new parameters
|
54
83
|
class APIResponseModel(BaseModel):
|
55
|
-
|
56
|
-
arbitrary_types_allowed = True
|
57
|
-
# allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
58
|
-
extra = "allow"
|
84
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
59
85
|
|
60
86
|
|
61
87
|
# API request models doesn't not allow arbitrary fields
|
62
88
|
class APIRequestModel(BaseModel):
|
63
|
-
|
64
|
-
arbitrary_types_allowed = True
|
65
|
-
extra = (
|
66
|
-
"allow" # allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
67
|
-
)
|
68
|
-
json_encoders = {
|
69
|
-
datetime.datetime: format_datetime,
|
70
|
-
}
|
89
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
71
90
|
|
72
91
|
|
73
92
|
class PangeaResponseResult(APIResponseModel):
|
@@ -169,10 +188,10 @@ class ResponseHeader(APIResponseModel):
|
|
169
188
|
"""
|
170
189
|
|
171
190
|
|
172
|
-
T = TypeVar("T", bound=PangeaResponseResult
|
191
|
+
T = TypeVar("T", bound=PangeaResponseResult)
|
173
192
|
|
174
193
|
|
175
|
-
class PangeaResponse(Generic[T]
|
194
|
+
class PangeaResponse(ResponseHeader, Generic[T]):
|
176
195
|
raw_result: Optional[Dict[str, Any]] = None
|
177
196
|
raw_response: Optional[Union[requests.Response, aiohttp.ClientResponse]] = None
|
178
197
|
result: Optional[T] = None
|
@@ -229,4 +248,4 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
229
248
|
|
230
249
|
@property
|
231
250
|
def url(self) -> str:
|
232
|
-
return str(self.raw_response.url) # type: ignore[
|
251
|
+
return str(self.raw_response.url) # type: ignore[union-attr]
|
pangea/services/__init__.py
CHANGED