pangea-sdk 3.8.0b1__py3-none-any.whl → 5.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pangea/__init__.py +1 -1
- pangea/asyncio/file_uploader.py +1 -1
- pangea/asyncio/request.py +49 -31
- pangea/asyncio/services/__init__.py +2 -0
- pangea/asyncio/services/audit.py +192 -31
- pangea/asyncio/services/authn.py +187 -109
- pangea/asyncio/services/authz.py +285 -0
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +108 -34
- pangea/asyncio/services/redact.py +72 -4
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +246 -73
- pangea/asyncio/services/vault.py +1710 -750
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/request.py +83 -59
- pangea/response.py +49 -31
- pangea/services/__init__.py +2 -0
- pangea/services/audit/audit.py +205 -42
- pangea/services/audit/models.py +56 -8
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +140 -70
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +400 -0
- pangea/services/base.py +39 -8
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +157 -32
- pangea/services/redact.py +152 -4
- pangea/services/sanitize.py +388 -0
- pangea/services/share/share.py +683 -107
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -749
- pangea/tools.py +6 -7
- pangea/utils.py +16 -27
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.3.0.dist-info}/METADATA +43 -35
- pangea_sdk-5.3.0.dist-info/RECORD +56 -0
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.3.0.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0b1.dist-info/RECORD +0 -50
pangea/services/redact.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import enum
|
5
6
|
from typing import Dict, List, Optional, Union
|
6
7
|
|
8
|
+
from pangea.config import PangeaConfig
|
7
9
|
from pangea.response import APIRequestModel, APIResponseModel, PangeaResponse, PangeaResponseResult
|
8
10
|
from pangea.services.base import ServiceBase
|
9
11
|
|
@@ -15,6 +17,44 @@ class RedactFormat(str, enum.Enum):
|
|
15
17
|
"""JSON format."""
|
16
18
|
|
17
19
|
|
20
|
+
class RedactType(str, enum.Enum):
|
21
|
+
MASK = "mask"
|
22
|
+
PARTIAL_MASKING = "partial_masking"
|
23
|
+
REPLACEMENT = "replacement"
|
24
|
+
DETECT_ONLY = "detect_only"
|
25
|
+
HASH = "hash"
|
26
|
+
FPE = "fpe"
|
27
|
+
|
28
|
+
|
29
|
+
class FPEAlphabet(str, enum.Enum):
|
30
|
+
NUMERIC = "numeric"
|
31
|
+
ALPHANUMERICLOWER = "alphanumericlower"
|
32
|
+
ALPHANUMERIC = "alphanumeric"
|
33
|
+
|
34
|
+
|
35
|
+
class MaskingType(str, enum.Enum):
|
36
|
+
MASK = "mask"
|
37
|
+
UNMASK = "unmask"
|
38
|
+
|
39
|
+
|
40
|
+
class PartialMasking(APIRequestModel):
|
41
|
+
masking_type: Optional[MaskingType] = None
|
42
|
+
unmasked_from_left: Optional[int] = None
|
43
|
+
unmasked_from_right: Optional[int] = None
|
44
|
+
masked_from_left: Optional[int] = None
|
45
|
+
masked_from_right: Optional[int] = None
|
46
|
+
chars_to_ignore: Optional[List[str]] = None
|
47
|
+
masking_char: Optional[List[str]] = None
|
48
|
+
|
49
|
+
|
50
|
+
class RedactionMethodOverrides(APIRequestModel):
|
51
|
+
redaction_type: RedactType
|
52
|
+
hash: Optional[Dict] = None
|
53
|
+
fpe_alphabet: Optional[FPEAlphabet] = None
|
54
|
+
partial_masking: Optional[PartialMasking] = None
|
55
|
+
redaction_value: Optional[str] = None
|
56
|
+
|
57
|
+
|
18
58
|
class RedactRequest(APIRequestModel):
|
19
59
|
"""
|
20
60
|
Input class to make a redact request
|
@@ -25,6 +65,18 @@ class RedactRequest(APIRequestModel):
|
|
25
65
|
rules: Optional[List[str]] = None
|
26
66
|
rulesets: Optional[List[str]] = None
|
27
67
|
return_result: Optional[bool] = None
|
68
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None
|
69
|
+
vault_parameters: Optional[VaultParameters] = None
|
70
|
+
llm_request: Optional[bool] = None
|
71
|
+
"""Is this redact call going to be used in an LLM request?"""
|
72
|
+
|
73
|
+
|
74
|
+
class VaultParameters(APIRequestModel):
|
75
|
+
fpe_key_id: Optional[str] = None
|
76
|
+
"""A vault key ID of an exportable key used to redact with FPE instead of using the service config default."""
|
77
|
+
|
78
|
+
salt_secret_id: Optional[str] = None
|
79
|
+
"""A vault secret ID of a secret used to salt a hash instead of using the service config default."""
|
28
80
|
|
29
81
|
|
30
82
|
class RecognizerResult(APIResponseModel):
|
@@ -67,11 +119,13 @@ class RedactResult(PangeaResponseResult):
|
|
67
119
|
redact_text: Redacted text result
|
68
120
|
count: Number of redactions present in the text
|
69
121
|
report: Describes the decision process for redactions
|
122
|
+
fpe_context: FPE context used to encrypt and redact data
|
70
123
|
"""
|
71
124
|
|
72
125
|
redacted_text: Optional[str] = None
|
73
126
|
count: int
|
74
127
|
report: Optional[DebugReport] = None
|
128
|
+
fpe_context: Optional[str] = None
|
75
129
|
|
76
130
|
|
77
131
|
class StructuredRequest(APIRequestModel):
|
@@ -92,6 +146,10 @@ class StructuredRequest(APIRequestModel):
|
|
92
146
|
rules: Optional[List[str]] = None
|
93
147
|
rulesets: Optional[List[str]] = None
|
94
148
|
return_result: Optional[bool] = None
|
149
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None
|
150
|
+
vault_parameters: Optional[VaultParameters] = None
|
151
|
+
llm_request: Optional[bool] = None
|
152
|
+
"""Is this redact call going to be used in an LLM request?"""
|
95
153
|
|
96
154
|
|
97
155
|
class StructuredResult(PangeaResponseResult):
|
@@ -105,6 +163,32 @@ class StructuredResult(PangeaResponseResult):
|
|
105
163
|
report: Optional[DebugReport] = None
|
106
164
|
|
107
165
|
|
166
|
+
class UnredactRequest(APIRequestModel):
|
167
|
+
"""
|
168
|
+
Class input to unredact data request
|
169
|
+
|
170
|
+
Arguments:
|
171
|
+
redacted_data: Data to unredact
|
172
|
+
fpe_context (base64): FPE context used to decrypt and unredact data
|
173
|
+
|
174
|
+
"""
|
175
|
+
|
176
|
+
redacted_data: RedactedData
|
177
|
+
fpe_context: str
|
178
|
+
|
179
|
+
|
180
|
+
RedactedData = Union[str, Dict]
|
181
|
+
|
182
|
+
|
183
|
+
class UnredactResult(PangeaResponseResult):
|
184
|
+
"""
|
185
|
+
Result class after an unredact request
|
186
|
+
|
187
|
+
"""
|
188
|
+
|
189
|
+
data: RedactedData
|
190
|
+
|
191
|
+
|
108
192
|
class Redact(ServiceBase):
|
109
193
|
"""Redact service client.
|
110
194
|
|
@@ -132,7 +216,24 @@ class Redact(ServiceBase):
|
|
132
216
|
|
133
217
|
service_name = "redact"
|
134
218
|
|
135
|
-
def __init__(
|
219
|
+
def __init__(
|
220
|
+
self, token: str, config: PangeaConfig | None = None, logger_name: str = "pangea", config_id: str | None = None
|
221
|
+
) -> None:
|
222
|
+
"""
|
223
|
+
Redact client
|
224
|
+
|
225
|
+
Initializes a new Redact client.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
token: Pangea API token.
|
229
|
+
config: Configuration.
|
230
|
+
logger_name: Logger name.
|
231
|
+
config_id: Configuration ID.
|
232
|
+
|
233
|
+
Examples:
|
234
|
+
config = PangeaConfig(domain="pangea_domain")
|
235
|
+
redact = Redact(token="pangea_token", config=config)
|
236
|
+
"""
|
136
237
|
super().__init__(token, config, logger_name, config_id=config_id)
|
137
238
|
|
138
239
|
def redact(
|
@@ -142,6 +243,9 @@ class Redact(ServiceBase):
|
|
142
243
|
rules: Optional[List[str]] = None,
|
143
244
|
rulesets: Optional[List[str]] = None,
|
144
245
|
return_result: Optional[bool] = None,
|
246
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None,
|
247
|
+
llm_request: Optional[bool] = None,
|
248
|
+
vault_parameters: Optional[VaultParameters] = None,
|
145
249
|
) -> PangeaResponse[RedactResult]:
|
146
250
|
"""
|
147
251
|
Redact
|
@@ -157,6 +261,9 @@ class Redact(ServiceBase):
|
|
157
261
|
rules (list[str], optional): An array of redact rule short names
|
158
262
|
rulesets (list[str], optional): An array of redact rulesets short names
|
159
263
|
return_result(bool, optional): Setting this value to false will omit the redacted result only returning count
|
264
|
+
redaction_method_overrides: A set of redaction method overrides for any enabled rule. These methods override the config declared methods
|
265
|
+
llm_request: Boolean flag to enable FPE redaction for LLM requests
|
266
|
+
vault_parameters: A set of vault parameters to use for redaction
|
160
267
|
|
161
268
|
Raises:
|
162
269
|
PangeaAPIException: If an API Error happens
|
@@ -170,8 +277,17 @@ class Redact(ServiceBase):
|
|
170
277
|
response = redact.redact(text="Jenny Jenny... 555-867-5309")
|
171
278
|
"""
|
172
279
|
|
173
|
-
input = RedactRequest(
|
174
|
-
|
280
|
+
input = RedactRequest(
|
281
|
+
text=text,
|
282
|
+
debug=debug,
|
283
|
+
rules=rules,
|
284
|
+
rulesets=rulesets,
|
285
|
+
return_result=return_result,
|
286
|
+
redaction_method_overrides=redaction_method_overrides,
|
287
|
+
llm_request=llm_request,
|
288
|
+
vault_parameters=vault_parameters,
|
289
|
+
)
|
290
|
+
return self.request.post("v1/redact", RedactResult, data=input.model_dump(exclude_none=True))
|
175
291
|
|
176
292
|
def redact_structured(
|
177
293
|
self,
|
@@ -182,6 +298,9 @@ class Redact(ServiceBase):
|
|
182
298
|
rules: Optional[List[str]] = None,
|
183
299
|
rulesets: Optional[List[str]] = None,
|
184
300
|
return_result: Optional[bool] = None,
|
301
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None,
|
302
|
+
llm_request: Optional[bool] = None,
|
303
|
+
vault_parameters: Optional[VaultParameters] = None,
|
185
304
|
) -> PangeaResponse[StructuredResult]:
|
186
305
|
"""
|
187
306
|
Redact structured
|
@@ -201,6 +320,9 @@ class Redact(ServiceBase):
|
|
201
320
|
rules (list[str], optional): An array of redact rule short names
|
202
321
|
rulesets (list[str], optional): An array of redact rulesets short names
|
203
322
|
return_result(bool, optional): Setting this value to false will omit the redacted result only returning count
|
323
|
+
redaction_method_overrides: A set of redaction method overrides for any enabled rule. These methods override the config declared methods
|
324
|
+
llm_request: Boolean flag to enable FPE redaction for LLM requests
|
325
|
+
vault_parameters: A set of vault parameters to use for redaction
|
204
326
|
|
205
327
|
Raises:
|
206
328
|
PangeaAPIException: If an API Error happens
|
@@ -227,5 +349,31 @@ class Redact(ServiceBase):
|
|
227
349
|
rules=rules,
|
228
350
|
rulesets=rulesets,
|
229
351
|
return_result=return_result,
|
352
|
+
redaction_method_overrides=redaction_method_overrides,
|
353
|
+
llm_request=llm_request,
|
354
|
+
vault_parameters=vault_parameters,
|
230
355
|
)
|
231
|
-
return self.request.post("v1/redact_structured", StructuredResult, data=input.
|
356
|
+
return self.request.post("v1/redact_structured", StructuredResult, data=input.model_dump(exclude_none=True))
|
357
|
+
|
358
|
+
def unredact(self, redacted_data: RedactedData, fpe_context: str) -> PangeaResponse[UnredactResult]:
|
359
|
+
"""
|
360
|
+
Unredact
|
361
|
+
|
362
|
+
Decrypt or unredact fpe redactions
|
363
|
+
|
364
|
+
OperationId: redact_post_v1_unredact
|
365
|
+
|
366
|
+
Args:
|
367
|
+
redacted_data: Data to unredact
|
368
|
+
fpe_context (base64): FPE context used to decrypt and unredact data
|
369
|
+
|
370
|
+
Raises:
|
371
|
+
PangeaAPIException: If an API Error happens
|
372
|
+
|
373
|
+
Returns:
|
374
|
+
Pangea Response with redacted data in the response.result field,
|
375
|
+
available response fields can be found in our
|
376
|
+
[API Documentation](https://pangea.cloud/docs/api/redact#unredact)
|
377
|
+
"""
|
378
|
+
input = UnredactRequest(redacted_data=redacted_data, fpe_context=fpe_context)
|
379
|
+
return self.request.post("v1/unredact", UnredactResult, data=input.model_dump(exclude_none=True))
|
@@ -0,0 +1,388 @@
|
|
1
|
+
# Copyright 2022 Pangea Cyber Corporation
|
2
|
+
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import io
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
7
|
+
|
8
|
+
from pydantic import Field
|
9
|
+
|
10
|
+
from pangea.config import PangeaConfig
|
11
|
+
from pangea.response import APIRequestModel, PangeaResponse, PangeaResponseResult, TransferMethod
|
12
|
+
from pangea.services.base import ServiceBase
|
13
|
+
from pangea.utils import FileUploadParams, get_file_upload_params
|
14
|
+
|
15
|
+
|
16
|
+
class SanitizeFile(APIRequestModel):
|
17
|
+
scan_provider: Optional[str] = None
|
18
|
+
"""Provider to use for File Scan."""
|
19
|
+
|
20
|
+
|
21
|
+
class SanitizeContent(APIRequestModel):
|
22
|
+
url_intel: Optional[bool] = None
|
23
|
+
"""Perform URL Intel lookup."""
|
24
|
+
|
25
|
+
url_intel_provider: Optional[str] = None
|
26
|
+
"""Provider to use for URL Intel."""
|
27
|
+
|
28
|
+
domain_intel: Optional[bool] = None
|
29
|
+
"""Perform Domain Intel lookup."""
|
30
|
+
|
31
|
+
domain_intel_provider: Optional[str] = None
|
32
|
+
"""Provider to use for Domain Intel lookup."""
|
33
|
+
|
34
|
+
defang: Optional[bool] = None
|
35
|
+
"""Defang external links."""
|
36
|
+
|
37
|
+
defang_threshold: Optional[int] = None
|
38
|
+
"""Defang risk threshold."""
|
39
|
+
|
40
|
+
redact: Optional[bool] = None
|
41
|
+
"""Redact sensitive content."""
|
42
|
+
|
43
|
+
redact_detect_only: Optional[bool] = None
|
44
|
+
"""
|
45
|
+
If redact is enabled, avoids redacting the file and instead returns the PII
|
46
|
+
analysis engine results. Only works if redact is enabled.
|
47
|
+
"""
|
48
|
+
|
49
|
+
remove_attachments: Optional[bool] = None
|
50
|
+
"""Remove file attachments (PDF only)."""
|
51
|
+
|
52
|
+
remove_interactive: Optional[bool] = None
|
53
|
+
"""Remove interactive content (PDF only)."""
|
54
|
+
|
55
|
+
|
56
|
+
class SanitizeShareOutput(APIRequestModel):
|
57
|
+
enabled: Optional[bool] = None
|
58
|
+
"""Store Sanitized files to Pangea Secure Share."""
|
59
|
+
|
60
|
+
output_folder: Optional[str] = None
|
61
|
+
"""
|
62
|
+
Store Sanitized files to this Secure Share folder (will be auto-created if
|
63
|
+
it does not exist)
|
64
|
+
"""
|
65
|
+
|
66
|
+
|
67
|
+
class SanitizeRequest(APIRequestModel):
|
68
|
+
transfer_method: TransferMethod = TransferMethod.POST_URL
|
69
|
+
"""The transfer method used to upload the file data."""
|
70
|
+
|
71
|
+
source_url: Optional[str] = None
|
72
|
+
"""A URL where the file to be sanitized can be downloaded."""
|
73
|
+
|
74
|
+
share_id: Optional[str] = None
|
75
|
+
"""A Pangea Secure Share ID where the file to be Sanitized is stored."""
|
76
|
+
|
77
|
+
file: Optional[SanitizeFile] = None
|
78
|
+
"""File."""
|
79
|
+
|
80
|
+
content: Optional[SanitizeContent] = None
|
81
|
+
"""Content."""
|
82
|
+
|
83
|
+
share_output: Optional[SanitizeShareOutput] = None
|
84
|
+
"""Share output."""
|
85
|
+
|
86
|
+
size: Optional[int] = None
|
87
|
+
"""The size (in bytes) of the file. If the upload doesn't match, the call will fail."""
|
88
|
+
|
89
|
+
crc32c: Optional[str] = None
|
90
|
+
"""The CRC32C hash of the file data, which will be verified by the server if provided."""
|
91
|
+
|
92
|
+
sha256: Optional[str] = None
|
93
|
+
"""The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided."""
|
94
|
+
|
95
|
+
uploaded_file_name: Optional[str] = None
|
96
|
+
"""Name of the user-uploaded file, required for transfer-method 'put-url' and 'post-url'."""
|
97
|
+
|
98
|
+
|
99
|
+
class DefangData(PangeaResponseResult):
|
100
|
+
external_urls_count: Optional[int] = None
|
101
|
+
"""Number of external links found."""
|
102
|
+
|
103
|
+
external_domains_count: Optional[int] = None
|
104
|
+
"""Number of external domains found."""
|
105
|
+
|
106
|
+
defanged_count: Optional[int] = None
|
107
|
+
"""Number of items defanged per provided rules and detections."""
|
108
|
+
|
109
|
+
url_intel_summary: Optional[str] = None
|
110
|
+
"""Processed N URLs: X are malicious, Y are suspicious, Z are unknown."""
|
111
|
+
|
112
|
+
domain_intel_summary: Optional[str] = None
|
113
|
+
"""Processed N Domains: X are malicious, Y are suspicious, Z are unknown."""
|
114
|
+
|
115
|
+
|
116
|
+
class RedactRecognizerResult(PangeaResponseResult):
|
117
|
+
field_type: str
|
118
|
+
"""The entity name."""
|
119
|
+
|
120
|
+
score: float
|
121
|
+
"""The certainty score that the entity matches this specific snippet."""
|
122
|
+
|
123
|
+
text: str
|
124
|
+
"""The text snippet that matched."""
|
125
|
+
|
126
|
+
start: int
|
127
|
+
"""The starting index of a snippet."""
|
128
|
+
|
129
|
+
end: int
|
130
|
+
"""The ending index of a snippet."""
|
131
|
+
|
132
|
+
redacted: bool
|
133
|
+
"""Indicates if this rule was used to anonymize a text snippet."""
|
134
|
+
|
135
|
+
|
136
|
+
class RedactData(PangeaResponseResult):
|
137
|
+
redaction_count: int
|
138
|
+
"""Number of items redacted"""
|
139
|
+
|
140
|
+
summary_counts: Dict[str, int] = Field(default_factory=dict)
|
141
|
+
"""Summary counts."""
|
142
|
+
|
143
|
+
recognizer_results: Optional[List[RedactRecognizerResult]] = None
|
144
|
+
"""The scoring result of a set of rules."""
|
145
|
+
|
146
|
+
|
147
|
+
class CDR(PangeaResponseResult):
|
148
|
+
file_attachments_removed: Optional[int] = None
|
149
|
+
"""Number of file attachments removed."""
|
150
|
+
|
151
|
+
interactive_contents_removed: Optional[int] = None
|
152
|
+
"""Number of interactive content items removed."""
|
153
|
+
|
154
|
+
|
155
|
+
class SanitizeData(PangeaResponseResult):
|
156
|
+
defang: Optional[DefangData] = None
|
157
|
+
"""Defang."""
|
158
|
+
|
159
|
+
redact: Optional[RedactData] = None
|
160
|
+
"""Redact."""
|
161
|
+
|
162
|
+
malicious_file: Optional[bool] = None
|
163
|
+
"""If the file scanned was malicious."""
|
164
|
+
|
165
|
+
cdr: Optional[CDR] = None
|
166
|
+
"""Content Disarm and Reconstruction."""
|
167
|
+
|
168
|
+
|
169
|
+
class SanitizeResult(PangeaResponseResult):
|
170
|
+
dest_url: Optional[str] = None
|
171
|
+
"""A URL where the Sanitized file can be downloaded."""
|
172
|
+
|
173
|
+
dest_share_id: Optional[str] = None
|
174
|
+
"""Pangea Secure Share ID of the Sanitized file."""
|
175
|
+
|
176
|
+
data: SanitizeData
|
177
|
+
"""Sanitize data."""
|
178
|
+
|
179
|
+
parameters: Dict = {}
|
180
|
+
"""The parameters, which were passed in the request, echoed back."""
|
181
|
+
|
182
|
+
|
183
|
+
class Sanitize(ServiceBase):
|
184
|
+
"""Sanitize service client.
|
185
|
+
|
186
|
+
Examples:
|
187
|
+
import os
|
188
|
+
|
189
|
+
# Pangea SDK
|
190
|
+
from pangea.config import PangeaConfig
|
191
|
+
from pangea.services import Sanitize
|
192
|
+
|
193
|
+
PANGEA_SANITIZE_TOKEN = os.getenv("PANGEA_SANITIZE_TOKEN")
|
194
|
+
config = PangeaConfig(domain="pangea.cloud")
|
195
|
+
|
196
|
+
sanitize = Sanitize(token=PANGEA_SANITIZE_TOKEN, config=config)
|
197
|
+
"""
|
198
|
+
|
199
|
+
service_name = "sanitize"
|
200
|
+
|
201
|
+
def __init__(
|
202
|
+
self, token: str, config: PangeaConfig | None = None, logger_name: str = "pangea", config_id: str | None = None
|
203
|
+
) -> None:
|
204
|
+
"""
|
205
|
+
Sanitize client
|
206
|
+
|
207
|
+
Initializes a new Sanitize client.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
token: Pangea API token.
|
211
|
+
config: Configuration.
|
212
|
+
logger_name: Logger name.
|
213
|
+
config_id: Configuration ID.
|
214
|
+
|
215
|
+
Examples:
|
216
|
+
config = PangeaConfig(domain="aws.us.pangea.cloud")
|
217
|
+
authz = Sanitize(token="pangea_token", config=config)
|
218
|
+
"""
|
219
|
+
|
220
|
+
super().__init__(token, config, logger_name, config_id=config_id)
|
221
|
+
|
222
|
+
def sanitize(
|
223
|
+
self,
|
224
|
+
transfer_method: TransferMethod = TransferMethod.POST_URL,
|
225
|
+
file_path: Optional[str] = None,
|
226
|
+
file: Optional[io.BufferedReader] = None,
|
227
|
+
source_url: Optional[str] = None,
|
228
|
+
share_id: Optional[str] = None,
|
229
|
+
file_scan: Optional[SanitizeFile] = None,
|
230
|
+
content: Optional[SanitizeContent] = None,
|
231
|
+
share_output: Optional[SanitizeShareOutput] = None,
|
232
|
+
size: Optional[int] = None,
|
233
|
+
crc32c: Optional[str] = None,
|
234
|
+
sha256: Optional[str] = None,
|
235
|
+
uploaded_file_name: Optional[str] = None,
|
236
|
+
sync_call: bool = True,
|
237
|
+
) -> PangeaResponse[SanitizeResult]:
|
238
|
+
"""
|
239
|
+
Sanitize
|
240
|
+
|
241
|
+
Apply file sanitization actions according to specified rules.
|
242
|
+
|
243
|
+
OperationId: sanitize_post_v1_sanitize
|
244
|
+
|
245
|
+
Args:
|
246
|
+
transfer_method: The transfer method used to upload the file data.
|
247
|
+
file_path: Path to file to sanitize.
|
248
|
+
file: File to sanitize.
|
249
|
+
source_url: A URL where the file to be sanitized can be downloaded.
|
250
|
+
share_id: A Pangea Secure Share ID where the file to be sanitized is stored.
|
251
|
+
file_scan: Options for File Scan.
|
252
|
+
content: Options for how the file should be sanitized.
|
253
|
+
share_output: Integration with Secure Share.
|
254
|
+
size: The size (in bytes) of the file. If the upload doesn't match, the call will fail.
|
255
|
+
crc32c: The CRC32C hash of the file data, which will be verified by the server if provided.
|
256
|
+
sha256: The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided.
|
257
|
+
uploaded_file_name: Name of the user-uploaded file, required for `TransferMethod.PUT_URL` and `TransferMethod.POST_URL`.
|
258
|
+
sync_call: Whether or not to poll on HTTP/202.
|
259
|
+
|
260
|
+
Raises:
|
261
|
+
PangeaAPIException: If an API error happens.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
The sanitized file and information on the sanitization that was
|
265
|
+
performed.
|
266
|
+
|
267
|
+
Examples:
|
268
|
+
with open("/path/to/file.pdf", "rb") as f:
|
269
|
+
response = sanitize.sanitize(
|
270
|
+
file=f,
|
271
|
+
transfer_method=TransferMethod.POST_URL,
|
272
|
+
uploaded_file_name="uploaded_file",
|
273
|
+
)
|
274
|
+
"""
|
275
|
+
|
276
|
+
if transfer_method == TransferMethod.SOURCE_URL and source_url is None:
|
277
|
+
raise ValueError("`source_url` argument is required when using `TransferMethod.SOURCE_URL`.")
|
278
|
+
|
279
|
+
if source_url is not None and transfer_method != TransferMethod.SOURCE_URL:
|
280
|
+
raise ValueError(
|
281
|
+
"`transfer_method` should be `TransferMethod.SOURCE_URL` when using the `source_url` argument."
|
282
|
+
)
|
283
|
+
|
284
|
+
files: Optional[List[Tuple]] = None
|
285
|
+
if file or file_path:
|
286
|
+
if file_path:
|
287
|
+
file = open(file_path, "rb")
|
288
|
+
if (
|
289
|
+
transfer_method == TransferMethod.POST_URL
|
290
|
+
and file
|
291
|
+
and (sha256 is None or crc32c is None or size is None)
|
292
|
+
):
|
293
|
+
params = get_file_upload_params(file)
|
294
|
+
crc32c = params.crc_hex if crc32c is None else crc32c
|
295
|
+
sha256 = params.sha256_hex if sha256 is None else sha256
|
296
|
+
size = params.size if size is None else size
|
297
|
+
else:
|
298
|
+
crc32c, sha256, size = None, None, None
|
299
|
+
files = [("upload", ("filename", file, "application/octet-stream"))]
|
300
|
+
elif source_url is None:
|
301
|
+
raise ValueError("Need to set one of `file_path`, `file`, or `source_url` arguments.")
|
302
|
+
|
303
|
+
input = SanitizeRequest(
|
304
|
+
transfer_method=transfer_method,
|
305
|
+
source_url=source_url,
|
306
|
+
share_id=share_id,
|
307
|
+
file=file_scan,
|
308
|
+
content=content,
|
309
|
+
share_output=share_output,
|
310
|
+
crc32c=crc32c,
|
311
|
+
sha256=sha256,
|
312
|
+
size=size,
|
313
|
+
uploaded_file_name=uploaded_file_name,
|
314
|
+
)
|
315
|
+
data = input.model_dump(exclude_none=True)
|
316
|
+
try:
|
317
|
+
response = self.request.post("v1/sanitize", SanitizeResult, data=data, files=files, poll_result=sync_call)
|
318
|
+
finally:
|
319
|
+
if file_path and file is not None:
|
320
|
+
file.close()
|
321
|
+
return response
|
322
|
+
|
323
|
+
def request_upload_url(
|
324
|
+
self,
|
325
|
+
transfer_method: TransferMethod = TransferMethod.PUT_URL,
|
326
|
+
params: Optional[FileUploadParams] = None,
|
327
|
+
file_scan: Optional[SanitizeFile] = None,
|
328
|
+
content: Optional[SanitizeContent] = None,
|
329
|
+
share_output: Optional[SanitizeShareOutput] = None,
|
330
|
+
size: Optional[int] = None,
|
331
|
+
crc32c: Optional[str] = None,
|
332
|
+
sha256: Optional[str] = None,
|
333
|
+
uploaded_file_name: Optional[str] = None,
|
334
|
+
) -> PangeaResponse[SanitizeResult]:
|
335
|
+
"""
|
336
|
+
Sanitize via presigned URL
|
337
|
+
|
338
|
+
Apply file sanitization actions according to specified rules via a
|
339
|
+
[presigned URL](https://pangea.cloud/docs/api/transfer-methods).
|
340
|
+
|
341
|
+
OperationId: sanitize_post_v1_sanitize 2
|
342
|
+
|
343
|
+
Args:
|
344
|
+
transfer_method: The transfer method used to upload the file data.
|
345
|
+
params: File upload parameters.
|
346
|
+
file_scan: Options for File Scan.
|
347
|
+
content: Options for how the file should be sanitized.
|
348
|
+
share_output: Integration with Secure Share.
|
349
|
+
size: The size (in bytes) of the file. If the upload doesn't match, the call will fail.
|
350
|
+
crc32c: The CRC32C hash of the file data, which will be verified by the server if provided.
|
351
|
+
sha256: The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided.
|
352
|
+
uploaded_file_name: Name of the user-uploaded file, required for `TransferMethod.PUT_URL` and `TransferMethod.POST_URL`.
|
353
|
+
|
354
|
+
Raises:
|
355
|
+
PangeaAPIException: If an API error happens.
|
356
|
+
|
357
|
+
Returns:
|
358
|
+
A presigned URL.
|
359
|
+
|
360
|
+
Examples:
|
361
|
+
presignedUrl = sanitize.request_upload_url(
|
362
|
+
transfer_method=TransferMethod.PUT_URL,
|
363
|
+
uploaded_file_name="uploaded_file",
|
364
|
+
)
|
365
|
+
|
366
|
+
# Upload file to `presignedUrl.accepted_result.put_url`.
|
367
|
+
|
368
|
+
# Poll for Sanitize's result.
|
369
|
+
response: PangeaResponse[SanitizeResult] = sanitize.poll_result(response=presignedUrl)
|
370
|
+
"""
|
371
|
+
|
372
|
+
input = SanitizeRequest(
|
373
|
+
transfer_method=transfer_method,
|
374
|
+
file=file_scan,
|
375
|
+
content=content,
|
376
|
+
share_output=share_output,
|
377
|
+
crc32c=crc32c,
|
378
|
+
sha256=sha256,
|
379
|
+
size=size,
|
380
|
+
uploaded_file_name=uploaded_file_name,
|
381
|
+
)
|
382
|
+
if params is not None and (transfer_method == TransferMethod.POST_URL):
|
383
|
+
input.crc32c = params.crc_hex
|
384
|
+
input.sha256 = params.sha256_hex
|
385
|
+
input.size = params.size
|
386
|
+
|
387
|
+
data = input.model_dump(exclude_none=True)
|
388
|
+
return self.request.request_presigned_url("v1/sanitize", SanitizeResult, data=data)
|