pangea-sdk 3.8.0__py3-none-any.whl → 5.3.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pangea/__init__.py +2 -1
- pangea/asyncio/__init__.py +1 -0
- pangea/asyncio/file_uploader.py +39 -0
- pangea/asyncio/request.py +46 -23
- pangea/asyncio/services/__init__.py +2 -0
- pangea/asyncio/services/audit.py +46 -20
- pangea/asyncio/services/authn.py +123 -61
- pangea/asyncio/services/authz.py +57 -31
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +104 -30
- pangea/asyncio/services/redact.py +52 -3
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +733 -0
- pangea/asyncio/services/vault.py +1709 -766
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/file_uploader.py +35 -0
- pangea/request.py +70 -49
- pangea/response.py +36 -17
- pangea/services/__init__.py +2 -0
- pangea/services/audit/audit.py +57 -29
- pangea/services/audit/models.py +12 -3
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +120 -66
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +53 -30
- pangea/services/base.py +16 -2
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +155 -30
- pangea/services/redact.py +132 -3
- pangea/services/sanitize.py +388 -0
- pangea/services/share/file_format.py +170 -0
- pangea/services/share/share.py +1440 -0
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -766
- pangea/tools.py +6 -7
- pangea/utils.py +94 -33
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/METADATA +21 -29
- pangea_sdk-5.3.0.dist-info/RECORD +56 -0
- {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0.dist-info/RECORD +0 -46
pangea/crypto/rsa.py
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from cryptography.hazmat.backends import default_backend
|
7
|
+
from cryptography.hazmat.primitives import hashes, serialization
|
8
|
+
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
9
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
10
|
+
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
11
|
+
|
12
|
+
from pangea.services.vault.models.common import ExportEncryptionAlgorithm
|
13
|
+
from pangea.services.vault.models.symmetric import SymmetricKeyEncryptionAlgorithm
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from pangea.services.vault.models.common import ExportResult
|
17
|
+
|
18
|
+
|
19
|
+
def generate_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:
|
20
|
+
# Generate a 4096-bit RSA key pair
|
21
|
+
private_key = rsa.generate_private_key(
|
22
|
+
public_exponent=65537,
|
23
|
+
key_size=4096,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Extract the public key from the private key
|
27
|
+
public_key = private_key.public_key()
|
28
|
+
return private_key, public_key
|
29
|
+
|
30
|
+
|
31
|
+
def decrypt_sha512(private_key: rsa.RSAPrivateKey, encrypted_message: bytes) -> bytes:
|
32
|
+
# Decrypt the message using the private key and OAEP padding
|
33
|
+
return private_key.decrypt(
|
34
|
+
encrypted_message,
|
35
|
+
padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None),
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
def encrypt_sha512(public_key: rsa.RSAPublicKey, message: bytes) -> bytes:
|
40
|
+
# Encrypt the message using the public key and OAEP padding
|
41
|
+
return public_key.encrypt(
|
42
|
+
message, padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA512()), algorithm=hashes.SHA512(), label=None)
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
def private_key_to_pem(private_key: rsa.RSAPrivateKey) -> bytes:
|
47
|
+
# Serialize private key to PEM format
|
48
|
+
return private_key.private_bytes(
|
49
|
+
encoding=serialization.Encoding.PEM,
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
51
|
+
encryption_algorithm=serialization.NoEncryption(),
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def public_key_to_pem(public_key: rsa.RSAPublicKey) -> bytes:
|
56
|
+
# Serialize public key to PEM format
|
57
|
+
return public_key.public_bytes(
|
58
|
+
encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
_AES_GCM_IV_SIZE = 12
|
63
|
+
"""Standard nonce size for GCM."""
|
64
|
+
|
65
|
+
_KEY_LENGTH = 32
|
66
|
+
"""AES-256 key length in bytes."""
|
67
|
+
|
68
|
+
|
69
|
+
def kem_decrypt(
|
70
|
+
private_key: rsa.RSAPrivateKey,
|
71
|
+
iv: bytes,
|
72
|
+
ciphertext: bytes,
|
73
|
+
symmetric_algorithm: str,
|
74
|
+
asymmetric_algorithm: str,
|
75
|
+
encrypted_salt: bytes,
|
76
|
+
password: str,
|
77
|
+
iteration_count: int,
|
78
|
+
hash_algorithm: str,
|
79
|
+
) -> str:
|
80
|
+
if symmetric_algorithm.casefold() != SymmetricKeyEncryptionAlgorithm.AES_GCM_256.value.casefold():
|
81
|
+
raise NotImplementedError(f"Unsupported symmetric algorithm: {symmetric_algorithm}")
|
82
|
+
|
83
|
+
if asymmetric_algorithm != ExportEncryptionAlgorithm.RSA_NO_PADDING_4096_KEM:
|
84
|
+
raise NotImplementedError(f"Unsupported asymmetric algorithm: {asymmetric_algorithm}")
|
85
|
+
|
86
|
+
if hash_algorithm.casefold() != "SHA512".casefold():
|
87
|
+
raise NotImplementedError(f"Unsupported hash algorithm: {hash_algorithm}")
|
88
|
+
|
89
|
+
# No-padding RSA decryption.
|
90
|
+
n = private_key.private_numbers().public_numbers.n
|
91
|
+
salt = pow(
|
92
|
+
int.from_bytes(encrypted_salt, byteorder="big"),
|
93
|
+
private_key.private_numbers().d,
|
94
|
+
n,
|
95
|
+
).to_bytes(n.bit_length() // 8, byteorder="big")
|
96
|
+
|
97
|
+
kdf = PBKDF2HMAC(
|
98
|
+
algorithm=hashes.SHA512(), length=_KEY_LENGTH, salt=salt, iterations=iteration_count, backend=default_backend()
|
99
|
+
)
|
100
|
+
symmetric_key = kdf.derive(password.encode("utf-8"))
|
101
|
+
|
102
|
+
decrypted = AESGCM(symmetric_key).decrypt(nonce=iv, data=ciphertext, associated_data=None)
|
103
|
+
|
104
|
+
return decrypted.decode("ascii")
|
105
|
+
|
106
|
+
|
107
|
+
def kem_decrypt_export_result(*, result: ExportResult, password: str, private_key: rsa.RSAPrivateKey) -> str:
|
108
|
+
"""Decrypt the exported result of a KEM operation."""
|
109
|
+
cipher_encoded = result.private_key or result.key
|
110
|
+
if not cipher_encoded:
|
111
|
+
raise TypeError("`private_key` or `key` should be set.")
|
112
|
+
|
113
|
+
assert result.encrypted_salt
|
114
|
+
assert result.symmetric_algorithm
|
115
|
+
assert result.asymmetric_algorithm
|
116
|
+
assert result.iteration_count
|
117
|
+
assert result.hash_algorithm
|
118
|
+
|
119
|
+
cipher_with_iv = base64.b64decode(cipher_encoded)
|
120
|
+
encrypted_salt = base64.b64decode(result.encrypted_salt)
|
121
|
+
|
122
|
+
iv = cipher_with_iv[:_AES_GCM_IV_SIZE]
|
123
|
+
cipher = cipher_with_iv[_AES_GCM_IV_SIZE:]
|
124
|
+
|
125
|
+
return kem_decrypt(
|
126
|
+
private_key=private_key,
|
127
|
+
iv=iv,
|
128
|
+
ciphertext=cipher,
|
129
|
+
password=password,
|
130
|
+
encrypted_salt=encrypted_salt,
|
131
|
+
symmetric_algorithm=result.symmetric_algorithm,
|
132
|
+
asymmetric_algorithm=result.asymmetric_algorithm,
|
133
|
+
iteration_count=result.iteration_count,
|
134
|
+
hash_algorithm=result.hash_algorithm,
|
135
|
+
)
|
pangea/deep_verify.py
CHANGED
@@ -263,8 +263,14 @@ def main():
|
|
263
263
|
audit = init_audit(args.token, args.domain)
|
264
264
|
errors = deep_verify(audit, args.file)
|
265
265
|
|
266
|
-
print("\n\
|
266
|
+
print("\n\nWarnings:")
|
267
|
+
val = errors["not_persisted"]
|
268
|
+
print(f"\tnot_persisted: {val}")
|
269
|
+
|
270
|
+
print("\nTotal errors:")
|
267
271
|
for key, val in errors.items():
|
272
|
+
if key == "not_persisted":
|
273
|
+
continue
|
268
274
|
print(f"\t{key.title()}: {val}")
|
269
275
|
print()
|
270
276
|
|
pangea/dump_audit.py
CHANGED
@@ -19,7 +19,7 @@ from pangea.utils import default_encoder
|
|
19
19
|
|
20
20
|
|
21
21
|
def dump_event(output: io.TextIOWrapper, row: SearchEvent, resp: PangeaResponse[SearchOutput]):
|
22
|
-
row_data = filter_deep_none(row.
|
22
|
+
row_data = filter_deep_none(row.model_dump())
|
23
23
|
if resp.result and resp.result.root:
|
24
24
|
row_data["tree_size"] = resp.result.root.size
|
25
25
|
output.write(json.dumps(row_data, default=default_encoder) + "\n")
|
@@ -63,11 +63,12 @@ def dump_before(audit: Audit, output: io.TextIOWrapper, start: datetime) -> int:
|
|
63
63
|
cnt = 0
|
64
64
|
if search_res.result and search_res.result.count > 0:
|
65
65
|
leaf_index = search_res.result.events[0].leaf_index
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
if leaf_index is not None:
|
67
|
+
for row in reversed(search_res.result.events):
|
68
|
+
if row.leaf_index != leaf_index:
|
69
|
+
break
|
70
|
+
dump_event(output, row, search_res)
|
71
|
+
cnt += 1
|
71
72
|
print(f"Dumping before... {cnt} events")
|
72
73
|
return cnt
|
73
74
|
|
@@ -89,7 +90,7 @@ def dump_after(audit: Audit, output: io.TextIOWrapper, start: datetime, last_eve
|
|
89
90
|
cnt = 0
|
90
91
|
if search_res.result and search_res.result.count > 0:
|
91
92
|
leaf_index = search_res.result.events[0].leaf_index
|
92
|
-
if leaf_index == last_leaf_index:
|
93
|
+
if leaf_index is not None and leaf_index == last_leaf_index:
|
93
94
|
start_idx: int = 1 if last_event_hash == search_res.result.events[0].hash else 0
|
94
95
|
for row in search_res.result.events[start_idx:]:
|
95
96
|
if row.leaf_index != leaf_index:
|
@@ -124,7 +125,7 @@ def dump_page(
|
|
124
125
|
msg = f"Dumping... {search_res.result.count} events"
|
125
126
|
|
126
127
|
if search_res.result.count <= 1:
|
127
|
-
return end, 0
|
128
|
+
return end, 0, True, "", 0
|
128
129
|
|
129
130
|
offset = 0
|
130
131
|
result_id = search_res.result.id
|
pangea/file_uploader.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright 2022 Pangea Cyber Corporation
|
2
|
+
# Author: Pangea Cyber Corporation
|
3
|
+
import io
|
4
|
+
import logging
|
5
|
+
from typing import Dict, Optional
|
6
|
+
|
7
|
+
from pangea.request import PangeaConfig, PangeaRequest
|
8
|
+
from pangea.response import TransferMethod
|
9
|
+
|
10
|
+
|
11
|
+
class FileUploader:
|
12
|
+
def __init__(self):
|
13
|
+
self.logger = logging.getLogger("pangea")
|
14
|
+
self._request = PangeaRequest(
|
15
|
+
config=PangeaConfig(),
|
16
|
+
token="",
|
17
|
+
service="FileUploader",
|
18
|
+
logger=self.logger,
|
19
|
+
)
|
20
|
+
|
21
|
+
def upload_file(
|
22
|
+
self,
|
23
|
+
url: str,
|
24
|
+
file: io.BufferedReader,
|
25
|
+
transfer_method: TransferMethod = TransferMethod.PUT_URL,
|
26
|
+
file_details: Optional[Dict] = None,
|
27
|
+
):
|
28
|
+
if transfer_method == TransferMethod.PUT_URL:
|
29
|
+
files = [("file", ("filename", file, "application/octet-stream"))]
|
30
|
+
self._request.put_presigned_url(url=url, files=files)
|
31
|
+
elif transfer_method == TransferMethod.POST_URL:
|
32
|
+
files = [("file", ("filename", file, "application/octet-stream"))]
|
33
|
+
self._request.post_presigned_url(url=url, data=file_details, files=files)
|
34
|
+
else:
|
35
|
+
raise ValueError(f"Transfer method not supported: {transfer_method}")
|
pangea/request.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import copy
|
5
6
|
import json
|
6
7
|
import logging
|
7
8
|
import time
|
8
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
|
9
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Type, Union
|
9
10
|
|
10
11
|
import requests
|
12
|
+
from pydantic import BaseModel
|
11
13
|
from requests.adapters import HTTPAdapter, Retry
|
12
|
-
from requests_toolbelt import MultipartDecoder # type: ignore
|
14
|
+
from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
|
13
15
|
from typing_extensions import TypeVar
|
14
16
|
|
15
17
|
import pangea
|
@@ -22,7 +24,7 @@ if TYPE_CHECKING:
|
|
22
24
|
import aiohttp
|
23
25
|
|
24
26
|
|
25
|
-
class MultipartResponse
|
27
|
+
class MultipartResponse:
|
26
28
|
pangea_json: Dict[str, str]
|
27
29
|
attached_files: List = []
|
28
30
|
|
@@ -31,7 +33,7 @@ class MultipartResponse(object):
|
|
31
33
|
self.attached_files = attached_files
|
32
34
|
|
33
35
|
|
34
|
-
class PangeaRequestBase
|
36
|
+
class PangeaRequestBase:
|
35
37
|
def __init__(
|
36
38
|
self, config: PangeaConfig, token: str, service: str, logger: logging.Logger, config_id: Optional[str] = None
|
37
39
|
):
|
@@ -129,8 +131,7 @@ class PangeaRequestBase(object):
|
|
129
131
|
filename_parts = content_disposition.split("name=")
|
130
132
|
if len(filename_parts) > 1:
|
131
133
|
return filename_parts[1].split(";")[0].strip('"')
|
132
|
-
|
133
|
-
return None
|
134
|
+
return None
|
134
135
|
|
135
136
|
def _get_filename_from_url(self, url: str) -> Optional[str]:
|
136
137
|
return url.split("/")[-1].split("?")[0]
|
@@ -157,40 +158,40 @@ class PangeaRequestBase(object):
|
|
157
158
|
|
158
159
|
if status == ResponseStatus.VALIDATION_ERR.value:
|
159
160
|
raise pe.ValidationException(summary, response)
|
160
|
-
|
161
|
+
if status == ResponseStatus.TOO_MANY_REQUESTS.value:
|
161
162
|
raise pe.RateLimitException(summary, response)
|
162
|
-
|
163
|
+
if status == ResponseStatus.NO_CREDIT.value:
|
163
164
|
raise pe.NoCreditException(summary, response)
|
164
|
-
|
165
|
+
if status == ResponseStatus.UNAUTHORIZED.value:
|
165
166
|
raise pe.UnauthorizedException(self.service, response)
|
166
|
-
|
167
|
+
if status == ResponseStatus.SERVICE_NOT_ENABLED.value:
|
167
168
|
raise pe.ServiceNotEnabledException(self.service, response)
|
168
|
-
|
169
|
+
if status == ResponseStatus.PROVIDER_ERR.value:
|
169
170
|
raise pe.ProviderErrorException(summary, response)
|
170
|
-
|
171
|
+
if status in (ResponseStatus.MISSING_CONFIG_ID_SCOPE.value, ResponseStatus.MISSING_CONFIG_ID.value):
|
171
172
|
raise pe.MissingConfigID(self.service, response)
|
172
|
-
|
173
|
+
if status == ResponseStatus.SERVICE_NOT_AVAILABLE.value:
|
173
174
|
raise pe.ServiceNotAvailableException(summary, response)
|
174
|
-
|
175
|
+
if status == ResponseStatus.TREE_NOT_FOUND.value:
|
175
176
|
raise pe.TreeNotFoundException(summary, response)
|
176
|
-
|
177
|
+
if status == ResponseStatus.IP_NOT_FOUND.value:
|
177
178
|
raise pe.IPNotFoundException(summary, response)
|
178
|
-
|
179
|
+
if status == ResponseStatus.BAD_OFFSET.value:
|
179
180
|
raise pe.BadOffsetException(summary, response)
|
180
|
-
|
181
|
+
if status == ResponseStatus.FORBIDDEN_VAULT_OPERATION.value:
|
181
182
|
raise pe.ForbiddenVaultOperation(summary, response)
|
182
|
-
|
183
|
+
if status == ResponseStatus.VAULT_ITEM_NOT_FOUND.value:
|
183
184
|
raise pe.VaultItemNotFound(summary, response)
|
184
|
-
|
185
|
-
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
186
|
-
|
185
|
+
if status == ResponseStatus.NOT_FOUND.value:
|
186
|
+
raise pe.NotFound(str(response.raw_response.url) if response.raw_response is not None else "", response)
|
187
|
+
if status == ResponseStatus.INTERNAL_SERVER_ERROR.value:
|
187
188
|
raise pe.InternalServerError(response)
|
188
|
-
|
189
|
+
if status == ResponseStatus.ACCEPTED.value:
|
189
190
|
raise pe.AcceptedRequestException(response)
|
190
191
|
raise pe.PangeaAPIException(f"{summary} ", response)
|
191
192
|
|
192
193
|
|
193
|
-
TResult = TypeVar("TResult", bound=PangeaResponseResult
|
194
|
+
TResult = TypeVar("TResult", bound=PangeaResponseResult)
|
194
195
|
|
195
196
|
|
196
197
|
class PangeaRequest(PangeaRequestBase):
|
@@ -209,7 +210,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
209
210
|
self,
|
210
211
|
endpoint: str,
|
211
212
|
result_class: Type[TResult],
|
212
|
-
data:
|
213
|
+
data: str | BaseModel | dict[str, Any] | None = None,
|
213
214
|
files: Optional[List[Tuple]] = None,
|
214
215
|
poll_result: bool = True,
|
215
216
|
url: Optional[str] = None,
|
@@ -224,6 +225,13 @@ class PangeaRequest(PangeaRequestBase):
|
|
224
225
|
PangeaResponse which contains the response in its entirety and
|
225
226
|
various properties to retrieve individual fields
|
226
227
|
"""
|
228
|
+
|
229
|
+
if isinstance(data, BaseModel):
|
230
|
+
data = data.model_dump(exclude_none=True)
|
231
|
+
|
232
|
+
if data is None:
|
233
|
+
data = {}
|
234
|
+
|
227
235
|
if url is None:
|
228
236
|
url = self._url(endpoint)
|
229
237
|
|
@@ -324,28 +332,29 @@ class PangeaRequest(PangeaRequestBase):
|
|
324
332
|
return self.session.post(url, headers=headers, data=data_send, files=files)
|
325
333
|
|
326
334
|
def _http_post_process(
|
327
|
-
self,
|
335
|
+
self,
|
336
|
+
data: Union[str, Dict] = {},
|
337
|
+
files: Optional[Sequence[Tuple[str, Tuple[Any, str, str]]]] = None,
|
338
|
+
multipart_post: bool = True,
|
328
339
|
):
|
329
340
|
if files:
|
330
341
|
if multipart_post is True:
|
331
342
|
data_send: str = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
332
343
|
multi = [("request", (None, data_send, "application/json"))]
|
333
|
-
multi.extend(files)
|
334
|
-
files = multi
|
344
|
+
multi.extend(files)
|
345
|
+
files = multi
|
335
346
|
return None, files
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
348
|
-
return data_send, None
|
347
|
+
# Post to presigned url as form
|
348
|
+
data_send: list = [] # type: ignore[no-redef]
|
349
|
+
for k, v in data.items(): # type: ignore[union-attr]
|
350
|
+
data_send.append((k, v)) # type: ignore[attr-defined]
|
351
|
+
# When posting to presigned url, file key should be 'file'
|
352
|
+
files = { # type: ignore[assignment]
|
353
|
+
"file": files[0][1],
|
354
|
+
}
|
355
|
+
return data_send, files
|
356
|
+
data_send = json.dumps(data, default=default_encoder) if isinstance(data, dict) else data
|
357
|
+
return data_send, None
|
349
358
|
|
350
359
|
return data, files
|
351
360
|
|
@@ -393,7 +402,20 @@ class PangeaRequest(PangeaRequestBase):
|
|
393
402
|
|
394
403
|
return self._check_response(pangea_response)
|
395
404
|
|
396
|
-
def download_file(self, url: str, filename:
|
405
|
+
def download_file(self, url: str, filename: str | None = None) -> AttachedFile:
|
406
|
+
"""
|
407
|
+
Download file
|
408
|
+
|
409
|
+
Download a file from the specified URL and save it with the given
|
410
|
+
filename.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
url: URL of the file to download
|
414
|
+
filename: Name to save the downloaded file as. If not provided, the
|
415
|
+
filename will be determined from the Content-Disposition header or
|
416
|
+
the URL.
|
417
|
+
"""
|
418
|
+
|
397
419
|
self.logger.debug(
|
398
420
|
json.dumps(
|
399
421
|
{
|
@@ -429,8 +451,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
429
451
|
)
|
430
452
|
)
|
431
453
|
return AttachedFile(filename=filename, file=response.content, content_type=content_type)
|
432
|
-
|
433
|
-
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
454
|
+
raise pe.DownloadFileError(f"Failed to download file. Status: {response.status_code}", response.text)
|
434
455
|
|
435
456
|
def poll_result_by_id(
|
436
457
|
self, request_id: str, result_class: Type[TResult], check_response: bool = True
|
@@ -459,10 +480,8 @@ class PangeaRequest(PangeaRequestBase):
|
|
459
480
|
) -> PangeaResponse:
|
460
481
|
# Send request
|
461
482
|
try:
|
462
|
-
# This should return 202 (AcceptedRequestException)
|
463
|
-
|
464
|
-
raise pe.PresignedURLException("Should return 202", resp)
|
465
|
-
|
483
|
+
# This should return 202 (AcceptedRequestException) at least zero size file is sent
|
484
|
+
return self.post(endpoint=endpoint, result_class=result_class, data=data, poll_result=False)
|
466
485
|
except pe.AcceptedRequestException as e:
|
467
486
|
accepted_exception = e
|
468
487
|
except Exception as e:
|
@@ -520,6 +539,9 @@ class PangeaRequest(PangeaRequestBase):
|
|
520
539
|
raise AttributeError("files attribute should have at least 1 file")
|
521
540
|
|
522
541
|
response = self.request_presigned_url(endpoint=endpoint, result_class=result_class, data=data)
|
542
|
+
|
543
|
+
if response.success: # This should only happen when uploading a zero bytes file
|
544
|
+
return response.raw_response
|
523
545
|
if response.accepted_result is None:
|
524
546
|
raise pe.PangeaException("No accepted_result field when requesting presigned url")
|
525
547
|
if response.accepted_result.post_url is None:
|
@@ -586,8 +608,7 @@ class PangeaRequest(PangeaRequestBase):
|
|
586
608
|
|
587
609
|
if loop_resp.accepted_result is not None and not loop_resp.accepted_result.has_upload_url:
|
588
610
|
return loop_resp
|
589
|
-
|
590
|
-
raise loop_exc
|
611
|
+
raise loop_exc
|
591
612
|
|
592
613
|
def _init_session(self) -> requests.Session:
|
593
614
|
retry_config = Retry(
|
pangea/response.py
CHANGED
@@ -7,8 +7,8 @@ from typing import Any, Dict, Generic, List, Optional, Type, Union
|
|
7
7
|
|
8
8
|
import aiohttp
|
9
9
|
import requests
|
10
|
-
from pydantic import BaseModel
|
11
|
-
from typing_extensions import TypeVar
|
10
|
+
from pydantic import BaseModel, ConfigDict, PlainSerializer
|
11
|
+
from typing_extensions import Annotated, TypeVar
|
12
12
|
|
13
13
|
from pangea.utils import format_datetime
|
14
14
|
|
@@ -28,6 +28,7 @@ class AttachedFile(object):
|
|
28
28
|
filename = self.filename if self.filename else "default_save_filename"
|
29
29
|
|
30
30
|
filepath = os.path.join(dest_folder, filename)
|
31
|
+
filepath = self._find_available_file(filepath)
|
31
32
|
directory = os.path.dirname(filepath)
|
32
33
|
if not os.path.exists(directory):
|
33
34
|
os.makedirs(directory)
|
@@ -35,12 +36,37 @@ class AttachedFile(object):
|
|
35
36
|
with open(filepath, "wb") as file:
|
36
37
|
file.write(self.file)
|
37
38
|
|
39
|
+
def _find_available_file(self, file_path):
|
40
|
+
base_name, ext = os.path.splitext(file_path)
|
41
|
+
counter = 1
|
42
|
+
while os.path.exists(file_path):
|
43
|
+
if ext:
|
44
|
+
file_path = f"{base_name}_{counter}{ext}"
|
45
|
+
else:
|
46
|
+
file_path = f"{base_name}_{counter}"
|
47
|
+
counter += 1
|
48
|
+
return file_path
|
49
|
+
|
38
50
|
|
39
51
|
class TransferMethod(str, enum.Enum):
|
52
|
+
"""Transfer methods for uploading file data."""
|
53
|
+
|
40
54
|
MULTIPART = "multipart"
|
41
55
|
POST_URL = "post-url"
|
42
56
|
PUT_URL = "put-url"
|
43
57
|
SOURCE_URL = "source-url"
|
58
|
+
"""
|
59
|
+
A `source-url` is a caller-specified URL where the Pangea APIs can fetch the
|
60
|
+
contents of the input file. When calling a Pangea API with a
|
61
|
+
`transfer_method` of `source-url`, you must also specify a `source_url`
|
62
|
+
input parameter that provides a URL to the input file. The source URL can be
|
63
|
+
a presigned URL created by the caller, and it will be used to download the
|
64
|
+
content of the input file. The `source-url` transfer method is useful when
|
65
|
+
you already have a file in your storage and can provide a URL from which
|
66
|
+
Pangea API can fetch the input file—there is no need to transfer it to
|
67
|
+
Pangea with a separate POST or PUT request.
|
68
|
+
"""
|
69
|
+
|
44
70
|
DEST_URL = "dest-url"
|
45
71
|
|
46
72
|
def __str__(self):
|
@@ -50,24 +76,17 @@ class TransferMethod(str, enum.Enum):
|
|
50
76
|
return str(self.value)
|
51
77
|
|
52
78
|
|
79
|
+
PangeaDateTime = Annotated[datetime.datetime, PlainSerializer(format_datetime)]
|
80
|
+
|
81
|
+
|
53
82
|
# API response should accept arbitrary fields to make them accept possible new parameters
|
54
83
|
class APIResponseModel(BaseModel):
|
55
|
-
|
56
|
-
arbitrary_types_allowed = True
|
57
|
-
# allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
58
|
-
extra = "allow"
|
84
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
59
85
|
|
60
86
|
|
61
87
|
# API request models doesn't not allow arbitrary fields
|
62
88
|
class APIRequestModel(BaseModel):
|
63
|
-
|
64
|
-
arbitrary_types_allowed = True
|
65
|
-
extra = (
|
66
|
-
"allow" # allow parameters despite they are not declared in model. Make SDK accept server new parameters
|
67
|
-
)
|
68
|
-
json_encoders = {
|
69
|
-
datetime.datetime: format_datetime,
|
70
|
-
}
|
89
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
71
90
|
|
72
91
|
|
73
92
|
class PangeaResponseResult(APIResponseModel):
|
@@ -169,10 +188,10 @@ class ResponseHeader(APIResponseModel):
|
|
169
188
|
"""
|
170
189
|
|
171
190
|
|
172
|
-
T = TypeVar("T", bound=PangeaResponseResult
|
191
|
+
T = TypeVar("T", bound=PangeaResponseResult)
|
173
192
|
|
174
193
|
|
175
|
-
class PangeaResponse(Generic[T]
|
194
|
+
class PangeaResponse(ResponseHeader, Generic[T]):
|
176
195
|
raw_result: Optional[Dict[str, Any]] = None
|
177
196
|
raw_response: Optional[Union[requests.Response, aiohttp.ClientResponse]] = None
|
178
197
|
result: Optional[T] = None
|
@@ -229,4 +248,4 @@ class PangeaResponse(Generic[T], ResponseHeader):
|
|
229
248
|
|
230
249
|
@property
|
231
250
|
def url(self) -> str:
|
232
|
-
return str(self.raw_response.url) # type: ignore[
|
251
|
+
return str(self.raw_response.url) # type: ignore[union-attr]
|
pangea/services/__init__.py
CHANGED