pangea-sdk 3.8.0b1__py3-none-any.whl → 5.3.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pangea/__init__.py +1 -1
- pangea/asyncio/file_uploader.py +1 -1
- pangea/asyncio/request.py +49 -31
- pangea/asyncio/services/__init__.py +2 -0
- pangea/asyncio/services/audit.py +192 -31
- pangea/asyncio/services/authn.py +187 -109
- pangea/asyncio/services/authz.py +285 -0
- pangea/asyncio/services/base.py +21 -2
- pangea/asyncio/services/embargo.py +2 -2
- pangea/asyncio/services/file_scan.py +24 -9
- pangea/asyncio/services/intel.py +108 -34
- pangea/asyncio/services/redact.py +72 -4
- pangea/asyncio/services/sanitize.py +217 -0
- pangea/asyncio/services/share.py +246 -73
- pangea/asyncio/services/vault.py +1710 -750
- pangea/crypto/rsa.py +135 -0
- pangea/deep_verify.py +7 -1
- pangea/dump_audit.py +9 -8
- pangea/request.py +83 -59
- pangea/response.py +49 -31
- pangea/services/__init__.py +2 -0
- pangea/services/audit/audit.py +205 -42
- pangea/services/audit/models.py +56 -8
- pangea/services/audit/signing.py +6 -5
- pangea/services/audit/util.py +3 -3
- pangea/services/authn/authn.py +140 -70
- pangea/services/authn/models.py +167 -11
- pangea/services/authz.py +400 -0
- pangea/services/base.py +39 -8
- pangea/services/embargo.py +2 -2
- pangea/services/file_scan.py +32 -15
- pangea/services/intel.py +157 -32
- pangea/services/redact.py +152 -4
- pangea/services/sanitize.py +388 -0
- pangea/services/share/share.py +683 -107
- pangea/services/vault/models/asymmetric.py +120 -18
- pangea/services/vault/models/common.py +439 -141
- pangea/services/vault/models/keys.py +94 -0
- pangea/services/vault/models/secret.py +27 -3
- pangea/services/vault/models/symmetric.py +68 -22
- pangea/services/vault/vault.py +1690 -749
- pangea/tools.py +6 -7
- pangea/utils.py +16 -27
- pangea/verify_audit.py +270 -83
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.3.0.dist-info}/METADATA +43 -35
- pangea_sdk-5.3.0.dist-info/RECORD +56 -0
- {pangea_sdk-3.8.0b1.dist-info → pangea_sdk-5.3.0.dist-info}/WHEEL +1 -1
- pangea_sdk-3.8.0b1.dist-info/RECORD +0 -50
pangea/services/redact.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# Copyright 2022 Pangea Cyber Corporation
|
2
2
|
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
3
4
|
|
4
5
|
import enum
|
5
6
|
from typing import Dict, List, Optional, Union
|
6
7
|
|
8
|
+
from pangea.config import PangeaConfig
|
7
9
|
from pangea.response import APIRequestModel, APIResponseModel, PangeaResponse, PangeaResponseResult
|
8
10
|
from pangea.services.base import ServiceBase
|
9
11
|
|
@@ -15,6 +17,44 @@ class RedactFormat(str, enum.Enum):
|
|
15
17
|
"""JSON format."""
|
16
18
|
|
17
19
|
|
20
|
+
class RedactType(str, enum.Enum):
|
21
|
+
MASK = "mask"
|
22
|
+
PARTIAL_MASKING = "partial_masking"
|
23
|
+
REPLACEMENT = "replacement"
|
24
|
+
DETECT_ONLY = "detect_only"
|
25
|
+
HASH = "hash"
|
26
|
+
FPE = "fpe"
|
27
|
+
|
28
|
+
|
29
|
+
class FPEAlphabet(str, enum.Enum):
|
30
|
+
NUMERIC = "numeric"
|
31
|
+
ALPHANUMERICLOWER = "alphanumericlower"
|
32
|
+
ALPHANUMERIC = "alphanumeric"
|
33
|
+
|
34
|
+
|
35
|
+
class MaskingType(str, enum.Enum):
|
36
|
+
MASK = "mask"
|
37
|
+
UNMASK = "unmask"
|
38
|
+
|
39
|
+
|
40
|
+
class PartialMasking(APIRequestModel):
|
41
|
+
masking_type: Optional[MaskingType] = None
|
42
|
+
unmasked_from_left: Optional[int] = None
|
43
|
+
unmasked_from_right: Optional[int] = None
|
44
|
+
masked_from_left: Optional[int] = None
|
45
|
+
masked_from_right: Optional[int] = None
|
46
|
+
chars_to_ignore: Optional[List[str]] = None
|
47
|
+
masking_char: Optional[List[str]] = None
|
48
|
+
|
49
|
+
|
50
|
+
class RedactionMethodOverrides(APIRequestModel):
|
51
|
+
redaction_type: RedactType
|
52
|
+
hash: Optional[Dict] = None
|
53
|
+
fpe_alphabet: Optional[FPEAlphabet] = None
|
54
|
+
partial_masking: Optional[PartialMasking] = None
|
55
|
+
redaction_value: Optional[str] = None
|
56
|
+
|
57
|
+
|
18
58
|
class RedactRequest(APIRequestModel):
|
19
59
|
"""
|
20
60
|
Input class to make a redact request
|
@@ -25,6 +65,18 @@ class RedactRequest(APIRequestModel):
|
|
25
65
|
rules: Optional[List[str]] = None
|
26
66
|
rulesets: Optional[List[str]] = None
|
27
67
|
return_result: Optional[bool] = None
|
68
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None
|
69
|
+
vault_parameters: Optional[VaultParameters] = None
|
70
|
+
llm_request: Optional[bool] = None
|
71
|
+
"""Is this redact call going to be used in an LLM request?"""
|
72
|
+
|
73
|
+
|
74
|
+
class VaultParameters(APIRequestModel):
|
75
|
+
fpe_key_id: Optional[str] = None
|
76
|
+
"""A vault key ID of an exportable key used to redact with FPE instead of using the service config default."""
|
77
|
+
|
78
|
+
salt_secret_id: Optional[str] = None
|
79
|
+
"""A vault secret ID of a secret used to salt a hash instead of using the service config default."""
|
28
80
|
|
29
81
|
|
30
82
|
class RecognizerResult(APIResponseModel):
|
@@ -67,11 +119,13 @@ class RedactResult(PangeaResponseResult):
|
|
67
119
|
redact_text: Redacted text result
|
68
120
|
count: Number of redactions present in the text
|
69
121
|
report: Describes the decision process for redactions
|
122
|
+
fpe_context: FPE context used to encrypt and redact data
|
70
123
|
"""
|
71
124
|
|
72
125
|
redacted_text: Optional[str] = None
|
73
126
|
count: int
|
74
127
|
report: Optional[DebugReport] = None
|
128
|
+
fpe_context: Optional[str] = None
|
75
129
|
|
76
130
|
|
77
131
|
class StructuredRequest(APIRequestModel):
|
@@ -92,6 +146,10 @@ class StructuredRequest(APIRequestModel):
|
|
92
146
|
rules: Optional[List[str]] = None
|
93
147
|
rulesets: Optional[List[str]] = None
|
94
148
|
return_result: Optional[bool] = None
|
149
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None
|
150
|
+
vault_parameters: Optional[VaultParameters] = None
|
151
|
+
llm_request: Optional[bool] = None
|
152
|
+
"""Is this redact call going to be used in an LLM request?"""
|
95
153
|
|
96
154
|
|
97
155
|
class StructuredResult(PangeaResponseResult):
|
@@ -105,6 +163,32 @@ class StructuredResult(PangeaResponseResult):
|
|
105
163
|
report: Optional[DebugReport] = None
|
106
164
|
|
107
165
|
|
166
|
+
class UnredactRequest(APIRequestModel):
|
167
|
+
"""
|
168
|
+
Class input to unredact data request
|
169
|
+
|
170
|
+
Arguments:
|
171
|
+
redacted_data: Data to unredact
|
172
|
+
fpe_context (base64): FPE context used to decrypt and unredact data
|
173
|
+
|
174
|
+
"""
|
175
|
+
|
176
|
+
redacted_data: RedactedData
|
177
|
+
fpe_context: str
|
178
|
+
|
179
|
+
|
180
|
+
RedactedData = Union[str, Dict]
|
181
|
+
|
182
|
+
|
183
|
+
class UnredactResult(PangeaResponseResult):
|
184
|
+
"""
|
185
|
+
Result class after an unredact request
|
186
|
+
|
187
|
+
"""
|
188
|
+
|
189
|
+
data: RedactedData
|
190
|
+
|
191
|
+
|
108
192
|
class Redact(ServiceBase):
|
109
193
|
"""Redact service client.
|
110
194
|
|
@@ -132,7 +216,24 @@ class Redact(ServiceBase):
|
|
132
216
|
|
133
217
|
service_name = "redact"
|
134
218
|
|
135
|
-
def __init__(
|
219
|
+
def __init__(
|
220
|
+
self, token: str, config: PangeaConfig | None = None, logger_name: str = "pangea", config_id: str | None = None
|
221
|
+
) -> None:
|
222
|
+
"""
|
223
|
+
Redact client
|
224
|
+
|
225
|
+
Initializes a new Redact client.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
token: Pangea API token.
|
229
|
+
config: Configuration.
|
230
|
+
logger_name: Logger name.
|
231
|
+
config_id: Configuration ID.
|
232
|
+
|
233
|
+
Examples:
|
234
|
+
config = PangeaConfig(domain="pangea_domain")
|
235
|
+
redact = Redact(token="pangea_token", config=config)
|
236
|
+
"""
|
136
237
|
super().__init__(token, config, logger_name, config_id=config_id)
|
137
238
|
|
138
239
|
def redact(
|
@@ -142,6 +243,9 @@ class Redact(ServiceBase):
|
|
142
243
|
rules: Optional[List[str]] = None,
|
143
244
|
rulesets: Optional[List[str]] = None,
|
144
245
|
return_result: Optional[bool] = None,
|
246
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None,
|
247
|
+
llm_request: Optional[bool] = None,
|
248
|
+
vault_parameters: Optional[VaultParameters] = None,
|
145
249
|
) -> PangeaResponse[RedactResult]:
|
146
250
|
"""
|
147
251
|
Redact
|
@@ -157,6 +261,9 @@ class Redact(ServiceBase):
|
|
157
261
|
rules (list[str], optional): An array of redact rule short names
|
158
262
|
rulesets (list[str], optional): An array of redact rulesets short names
|
159
263
|
return_result(bool, optional): Setting this value to false will omit the redacted result only returning count
|
264
|
+
redaction_method_overrides: A set of redaction method overrides for any enabled rule. These methods override the config declared methods
|
265
|
+
llm_request: Boolean flag to enable FPE redaction for LLM requests
|
266
|
+
vault_parameters: A set of vault parameters to use for redaction
|
160
267
|
|
161
268
|
Raises:
|
162
269
|
PangeaAPIException: If an API Error happens
|
@@ -170,8 +277,17 @@ class Redact(ServiceBase):
|
|
170
277
|
response = redact.redact(text="Jenny Jenny... 555-867-5309")
|
171
278
|
"""
|
172
279
|
|
173
|
-
input = RedactRequest(
|
174
|
-
|
280
|
+
input = RedactRequest(
|
281
|
+
text=text,
|
282
|
+
debug=debug,
|
283
|
+
rules=rules,
|
284
|
+
rulesets=rulesets,
|
285
|
+
return_result=return_result,
|
286
|
+
redaction_method_overrides=redaction_method_overrides,
|
287
|
+
llm_request=llm_request,
|
288
|
+
vault_parameters=vault_parameters,
|
289
|
+
)
|
290
|
+
return self.request.post("v1/redact", RedactResult, data=input.model_dump(exclude_none=True))
|
175
291
|
|
176
292
|
def redact_structured(
|
177
293
|
self,
|
@@ -182,6 +298,9 @@ class Redact(ServiceBase):
|
|
182
298
|
rules: Optional[List[str]] = None,
|
183
299
|
rulesets: Optional[List[str]] = None,
|
184
300
|
return_result: Optional[bool] = None,
|
301
|
+
redaction_method_overrides: Optional[RedactionMethodOverrides] = None,
|
302
|
+
llm_request: Optional[bool] = None,
|
303
|
+
vault_parameters: Optional[VaultParameters] = None,
|
185
304
|
) -> PangeaResponse[StructuredResult]:
|
186
305
|
"""
|
187
306
|
Redact structured
|
@@ -201,6 +320,9 @@ class Redact(ServiceBase):
|
|
201
320
|
rules (list[str], optional): An array of redact rule short names
|
202
321
|
rulesets (list[str], optional): An array of redact rulesets short names
|
203
322
|
return_result(bool, optional): Setting this value to false will omit the redacted result only returning count
|
323
|
+
redaction_method_overrides: A set of redaction method overrides for any enabled rule. These methods override the config declared methods
|
324
|
+
llm_request: Boolean flag to enable FPE redaction for LLM requests
|
325
|
+
vault_parameters: A set of vault parameters to use for redaction
|
204
326
|
|
205
327
|
Raises:
|
206
328
|
PangeaAPIException: If an API Error happens
|
@@ -227,5 +349,31 @@ class Redact(ServiceBase):
|
|
227
349
|
rules=rules,
|
228
350
|
rulesets=rulesets,
|
229
351
|
return_result=return_result,
|
352
|
+
redaction_method_overrides=redaction_method_overrides,
|
353
|
+
llm_request=llm_request,
|
354
|
+
vault_parameters=vault_parameters,
|
230
355
|
)
|
231
|
-
return self.request.post("v1/redact_structured", StructuredResult, data=input.
|
356
|
+
return self.request.post("v1/redact_structured", StructuredResult, data=input.model_dump(exclude_none=True))
|
357
|
+
|
358
|
+
def unredact(self, redacted_data: RedactedData, fpe_context: str) -> PangeaResponse[UnredactResult]:
|
359
|
+
"""
|
360
|
+
Unredact
|
361
|
+
|
362
|
+
Decrypt or unredact fpe redactions
|
363
|
+
|
364
|
+
OperationId: redact_post_v1_unredact
|
365
|
+
|
366
|
+
Args:
|
367
|
+
redacted_data: Data to unredact
|
368
|
+
fpe_context (base64): FPE context used to decrypt and unredact data
|
369
|
+
|
370
|
+
Raises:
|
371
|
+
PangeaAPIException: If an API Error happens
|
372
|
+
|
373
|
+
Returns:
|
374
|
+
Pangea Response with redacted data in the response.result field,
|
375
|
+
available response fields can be found in our
|
376
|
+
[API Documentation](https://pangea.cloud/docs/api/redact#unredact)
|
377
|
+
"""
|
378
|
+
input = UnredactRequest(redacted_data=redacted_data, fpe_context=fpe_context)
|
379
|
+
return self.request.post("v1/unredact", UnredactResult, data=input.model_dump(exclude_none=True))
|
@@ -0,0 +1,388 @@
|
|
1
|
+
# Copyright 2022 Pangea Cyber Corporation
|
2
|
+
# Author: Pangea Cyber Corporation
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import io
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
7
|
+
|
8
|
+
from pydantic import Field
|
9
|
+
|
10
|
+
from pangea.config import PangeaConfig
|
11
|
+
from pangea.response import APIRequestModel, PangeaResponse, PangeaResponseResult, TransferMethod
|
12
|
+
from pangea.services.base import ServiceBase
|
13
|
+
from pangea.utils import FileUploadParams, get_file_upload_params
|
14
|
+
|
15
|
+
|
16
|
+
class SanitizeFile(APIRequestModel):
|
17
|
+
scan_provider: Optional[str] = None
|
18
|
+
"""Provider to use for File Scan."""
|
19
|
+
|
20
|
+
|
21
|
+
class SanitizeContent(APIRequestModel):
|
22
|
+
url_intel: Optional[bool] = None
|
23
|
+
"""Perform URL Intel lookup."""
|
24
|
+
|
25
|
+
url_intel_provider: Optional[str] = None
|
26
|
+
"""Provider to use for URL Intel."""
|
27
|
+
|
28
|
+
domain_intel: Optional[bool] = None
|
29
|
+
"""Perform Domain Intel lookup."""
|
30
|
+
|
31
|
+
domain_intel_provider: Optional[str] = None
|
32
|
+
"""Provider to use for Domain Intel lookup."""
|
33
|
+
|
34
|
+
defang: Optional[bool] = None
|
35
|
+
"""Defang external links."""
|
36
|
+
|
37
|
+
defang_threshold: Optional[int] = None
|
38
|
+
"""Defang risk threshold."""
|
39
|
+
|
40
|
+
redact: Optional[bool] = None
|
41
|
+
"""Redact sensitive content."""
|
42
|
+
|
43
|
+
redact_detect_only: Optional[bool] = None
|
44
|
+
"""
|
45
|
+
If redact is enabled, avoids redacting the file and instead returns the PII
|
46
|
+
analysis engine results. Only works if redact is enabled.
|
47
|
+
"""
|
48
|
+
|
49
|
+
remove_attachments: Optional[bool] = None
|
50
|
+
"""Remove file attachments (PDF only)."""
|
51
|
+
|
52
|
+
remove_interactive: Optional[bool] = None
|
53
|
+
"""Remove interactive content (PDF only)."""
|
54
|
+
|
55
|
+
|
56
|
+
class SanitizeShareOutput(APIRequestModel):
|
57
|
+
enabled: Optional[bool] = None
|
58
|
+
"""Store Sanitized files to Pangea Secure Share."""
|
59
|
+
|
60
|
+
output_folder: Optional[str] = None
|
61
|
+
"""
|
62
|
+
Store Sanitized files to this Secure Share folder (will be auto-created if
|
63
|
+
it does not exist)
|
64
|
+
"""
|
65
|
+
|
66
|
+
|
67
|
+
class SanitizeRequest(APIRequestModel):
|
68
|
+
transfer_method: TransferMethod = TransferMethod.POST_URL
|
69
|
+
"""The transfer method used to upload the file data."""
|
70
|
+
|
71
|
+
source_url: Optional[str] = None
|
72
|
+
"""A URL where the file to be sanitized can be downloaded."""
|
73
|
+
|
74
|
+
share_id: Optional[str] = None
|
75
|
+
"""A Pangea Secure Share ID where the file to be Sanitized is stored."""
|
76
|
+
|
77
|
+
file: Optional[SanitizeFile] = None
|
78
|
+
"""File."""
|
79
|
+
|
80
|
+
content: Optional[SanitizeContent] = None
|
81
|
+
"""Content."""
|
82
|
+
|
83
|
+
share_output: Optional[SanitizeShareOutput] = None
|
84
|
+
"""Share output."""
|
85
|
+
|
86
|
+
size: Optional[int] = None
|
87
|
+
"""The size (in bytes) of the file. If the upload doesn't match, the call will fail."""
|
88
|
+
|
89
|
+
crc32c: Optional[str] = None
|
90
|
+
"""The CRC32C hash of the file data, which will be verified by the server if provided."""
|
91
|
+
|
92
|
+
sha256: Optional[str] = None
|
93
|
+
"""The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided."""
|
94
|
+
|
95
|
+
uploaded_file_name: Optional[str] = None
|
96
|
+
"""Name of the user-uploaded file, required for transfer-method 'put-url' and 'post-url'."""
|
97
|
+
|
98
|
+
|
99
|
+
class DefangData(PangeaResponseResult):
|
100
|
+
external_urls_count: Optional[int] = None
|
101
|
+
"""Number of external links found."""
|
102
|
+
|
103
|
+
external_domains_count: Optional[int] = None
|
104
|
+
"""Number of external domains found."""
|
105
|
+
|
106
|
+
defanged_count: Optional[int] = None
|
107
|
+
"""Number of items defanged per provided rules and detections."""
|
108
|
+
|
109
|
+
url_intel_summary: Optional[str] = None
|
110
|
+
"""Processed N URLs: X are malicious, Y are suspicious, Z are unknown."""
|
111
|
+
|
112
|
+
domain_intel_summary: Optional[str] = None
|
113
|
+
"""Processed N Domains: X are malicious, Y are suspicious, Z are unknown."""
|
114
|
+
|
115
|
+
|
116
|
+
class RedactRecognizerResult(PangeaResponseResult):
|
117
|
+
field_type: str
|
118
|
+
"""The entity name."""
|
119
|
+
|
120
|
+
score: float
|
121
|
+
"""The certainty score that the entity matches this specific snippet."""
|
122
|
+
|
123
|
+
text: str
|
124
|
+
"""The text snippet that matched."""
|
125
|
+
|
126
|
+
start: int
|
127
|
+
"""The starting index of a snippet."""
|
128
|
+
|
129
|
+
end: int
|
130
|
+
"""The ending index of a snippet."""
|
131
|
+
|
132
|
+
redacted: bool
|
133
|
+
"""Indicates if this rule was used to anonymize a text snippet."""
|
134
|
+
|
135
|
+
|
136
|
+
class RedactData(PangeaResponseResult):
|
137
|
+
redaction_count: int
|
138
|
+
"""Number of items redacted"""
|
139
|
+
|
140
|
+
summary_counts: Dict[str, int] = Field(default_factory=dict)
|
141
|
+
"""Summary counts."""
|
142
|
+
|
143
|
+
recognizer_results: Optional[List[RedactRecognizerResult]] = None
|
144
|
+
"""The scoring result of a set of rules."""
|
145
|
+
|
146
|
+
|
147
|
+
class CDR(PangeaResponseResult):
|
148
|
+
file_attachments_removed: Optional[int] = None
|
149
|
+
"""Number of file attachments removed."""
|
150
|
+
|
151
|
+
interactive_contents_removed: Optional[int] = None
|
152
|
+
"""Number of interactive content items removed."""
|
153
|
+
|
154
|
+
|
155
|
+
class SanitizeData(PangeaResponseResult):
|
156
|
+
defang: Optional[DefangData] = None
|
157
|
+
"""Defang."""
|
158
|
+
|
159
|
+
redact: Optional[RedactData] = None
|
160
|
+
"""Redact."""
|
161
|
+
|
162
|
+
malicious_file: Optional[bool] = None
|
163
|
+
"""If the file scanned was malicious."""
|
164
|
+
|
165
|
+
cdr: Optional[CDR] = None
|
166
|
+
"""Content Disarm and Reconstruction."""
|
167
|
+
|
168
|
+
|
169
|
+
class SanitizeResult(PangeaResponseResult):
|
170
|
+
dest_url: Optional[str] = None
|
171
|
+
"""A URL where the Sanitized file can be downloaded."""
|
172
|
+
|
173
|
+
dest_share_id: Optional[str] = None
|
174
|
+
"""Pangea Secure Share ID of the Sanitized file."""
|
175
|
+
|
176
|
+
data: SanitizeData
|
177
|
+
"""Sanitize data."""
|
178
|
+
|
179
|
+
parameters: Dict = {}
|
180
|
+
"""The parameters, which were passed in the request, echoed back."""
|
181
|
+
|
182
|
+
|
183
|
+
class Sanitize(ServiceBase):
|
184
|
+
"""Sanitize service client.
|
185
|
+
|
186
|
+
Examples:
|
187
|
+
import os
|
188
|
+
|
189
|
+
# Pangea SDK
|
190
|
+
from pangea.config import PangeaConfig
|
191
|
+
from pangea.services import Sanitize
|
192
|
+
|
193
|
+
PANGEA_SANITIZE_TOKEN = os.getenv("PANGEA_SANITIZE_TOKEN")
|
194
|
+
config = PangeaConfig(domain="pangea.cloud")
|
195
|
+
|
196
|
+
sanitize = Sanitize(token=PANGEA_SANITIZE_TOKEN, config=config)
|
197
|
+
"""
|
198
|
+
|
199
|
+
service_name = "sanitize"
|
200
|
+
|
201
|
+
def __init__(
|
202
|
+
self, token: str, config: PangeaConfig | None = None, logger_name: str = "pangea", config_id: str | None = None
|
203
|
+
) -> None:
|
204
|
+
"""
|
205
|
+
Sanitize client
|
206
|
+
|
207
|
+
Initializes a new Sanitize client.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
token: Pangea API token.
|
211
|
+
config: Configuration.
|
212
|
+
logger_name: Logger name.
|
213
|
+
config_id: Configuration ID.
|
214
|
+
|
215
|
+
Examples:
|
216
|
+
config = PangeaConfig(domain="aws.us.pangea.cloud")
|
217
|
+
authz = Sanitize(token="pangea_token", config=config)
|
218
|
+
"""
|
219
|
+
|
220
|
+
super().__init__(token, config, logger_name, config_id=config_id)
|
221
|
+
|
222
|
+
def sanitize(
|
223
|
+
self,
|
224
|
+
transfer_method: TransferMethod = TransferMethod.POST_URL,
|
225
|
+
file_path: Optional[str] = None,
|
226
|
+
file: Optional[io.BufferedReader] = None,
|
227
|
+
source_url: Optional[str] = None,
|
228
|
+
share_id: Optional[str] = None,
|
229
|
+
file_scan: Optional[SanitizeFile] = None,
|
230
|
+
content: Optional[SanitizeContent] = None,
|
231
|
+
share_output: Optional[SanitizeShareOutput] = None,
|
232
|
+
size: Optional[int] = None,
|
233
|
+
crc32c: Optional[str] = None,
|
234
|
+
sha256: Optional[str] = None,
|
235
|
+
uploaded_file_name: Optional[str] = None,
|
236
|
+
sync_call: bool = True,
|
237
|
+
) -> PangeaResponse[SanitizeResult]:
|
238
|
+
"""
|
239
|
+
Sanitize
|
240
|
+
|
241
|
+
Apply file sanitization actions according to specified rules.
|
242
|
+
|
243
|
+
OperationId: sanitize_post_v1_sanitize
|
244
|
+
|
245
|
+
Args:
|
246
|
+
transfer_method: The transfer method used to upload the file data.
|
247
|
+
file_path: Path to file to sanitize.
|
248
|
+
file: File to sanitize.
|
249
|
+
source_url: A URL where the file to be sanitized can be downloaded.
|
250
|
+
share_id: A Pangea Secure Share ID where the file to be sanitized is stored.
|
251
|
+
file_scan: Options for File Scan.
|
252
|
+
content: Options for how the file should be sanitized.
|
253
|
+
share_output: Integration with Secure Share.
|
254
|
+
size: The size (in bytes) of the file. If the upload doesn't match, the call will fail.
|
255
|
+
crc32c: The CRC32C hash of the file data, which will be verified by the server if provided.
|
256
|
+
sha256: The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided.
|
257
|
+
uploaded_file_name: Name of the user-uploaded file, required for `TransferMethod.PUT_URL` and `TransferMethod.POST_URL`.
|
258
|
+
sync_call: Whether or not to poll on HTTP/202.
|
259
|
+
|
260
|
+
Raises:
|
261
|
+
PangeaAPIException: If an API error happens.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
The sanitized file and information on the sanitization that was
|
265
|
+
performed.
|
266
|
+
|
267
|
+
Examples:
|
268
|
+
with open("/path/to/file.pdf", "rb") as f:
|
269
|
+
response = sanitize.sanitize(
|
270
|
+
file=f,
|
271
|
+
transfer_method=TransferMethod.POST_URL,
|
272
|
+
uploaded_file_name="uploaded_file",
|
273
|
+
)
|
274
|
+
"""
|
275
|
+
|
276
|
+
if transfer_method == TransferMethod.SOURCE_URL and source_url is None:
|
277
|
+
raise ValueError("`source_url` argument is required when using `TransferMethod.SOURCE_URL`.")
|
278
|
+
|
279
|
+
if source_url is not None and transfer_method != TransferMethod.SOURCE_URL:
|
280
|
+
raise ValueError(
|
281
|
+
"`transfer_method` should be `TransferMethod.SOURCE_URL` when using the `source_url` argument."
|
282
|
+
)
|
283
|
+
|
284
|
+
files: Optional[List[Tuple]] = None
|
285
|
+
if file or file_path:
|
286
|
+
if file_path:
|
287
|
+
file = open(file_path, "rb")
|
288
|
+
if (
|
289
|
+
transfer_method == TransferMethod.POST_URL
|
290
|
+
and file
|
291
|
+
and (sha256 is None or crc32c is None or size is None)
|
292
|
+
):
|
293
|
+
params = get_file_upload_params(file)
|
294
|
+
crc32c = params.crc_hex if crc32c is None else crc32c
|
295
|
+
sha256 = params.sha256_hex if sha256 is None else sha256
|
296
|
+
size = params.size if size is None else size
|
297
|
+
else:
|
298
|
+
crc32c, sha256, size = None, None, None
|
299
|
+
files = [("upload", ("filename", file, "application/octet-stream"))]
|
300
|
+
elif source_url is None:
|
301
|
+
raise ValueError("Need to set one of `file_path`, `file`, or `source_url` arguments.")
|
302
|
+
|
303
|
+
input = SanitizeRequest(
|
304
|
+
transfer_method=transfer_method,
|
305
|
+
source_url=source_url,
|
306
|
+
share_id=share_id,
|
307
|
+
file=file_scan,
|
308
|
+
content=content,
|
309
|
+
share_output=share_output,
|
310
|
+
crc32c=crc32c,
|
311
|
+
sha256=sha256,
|
312
|
+
size=size,
|
313
|
+
uploaded_file_name=uploaded_file_name,
|
314
|
+
)
|
315
|
+
data = input.model_dump(exclude_none=True)
|
316
|
+
try:
|
317
|
+
response = self.request.post("v1/sanitize", SanitizeResult, data=data, files=files, poll_result=sync_call)
|
318
|
+
finally:
|
319
|
+
if file_path and file is not None:
|
320
|
+
file.close()
|
321
|
+
return response
|
322
|
+
|
323
|
+
def request_upload_url(
|
324
|
+
self,
|
325
|
+
transfer_method: TransferMethod = TransferMethod.PUT_URL,
|
326
|
+
params: Optional[FileUploadParams] = None,
|
327
|
+
file_scan: Optional[SanitizeFile] = None,
|
328
|
+
content: Optional[SanitizeContent] = None,
|
329
|
+
share_output: Optional[SanitizeShareOutput] = None,
|
330
|
+
size: Optional[int] = None,
|
331
|
+
crc32c: Optional[str] = None,
|
332
|
+
sha256: Optional[str] = None,
|
333
|
+
uploaded_file_name: Optional[str] = None,
|
334
|
+
) -> PangeaResponse[SanitizeResult]:
|
335
|
+
"""
|
336
|
+
Sanitize via presigned URL
|
337
|
+
|
338
|
+
Apply file sanitization actions according to specified rules via a
|
339
|
+
[presigned URL](https://pangea.cloud/docs/api/transfer-methods).
|
340
|
+
|
341
|
+
OperationId: sanitize_post_v1_sanitize 2
|
342
|
+
|
343
|
+
Args:
|
344
|
+
transfer_method: The transfer method used to upload the file data.
|
345
|
+
params: File upload parameters.
|
346
|
+
file_scan: Options for File Scan.
|
347
|
+
content: Options for how the file should be sanitized.
|
348
|
+
share_output: Integration with Secure Share.
|
349
|
+
size: The size (in bytes) of the file. If the upload doesn't match, the call will fail.
|
350
|
+
crc32c: The CRC32C hash of the file data, which will be verified by the server if provided.
|
351
|
+
sha256: The hexadecimal-encoded SHA256 hash of the file data, which will be verified by the server if provided.
|
352
|
+
uploaded_file_name: Name of the user-uploaded file, required for `TransferMethod.PUT_URL` and `TransferMethod.POST_URL`.
|
353
|
+
|
354
|
+
Raises:
|
355
|
+
PangeaAPIException: If an API error happens.
|
356
|
+
|
357
|
+
Returns:
|
358
|
+
A presigned URL.
|
359
|
+
|
360
|
+
Examples:
|
361
|
+
presignedUrl = sanitize.request_upload_url(
|
362
|
+
transfer_method=TransferMethod.PUT_URL,
|
363
|
+
uploaded_file_name="uploaded_file",
|
364
|
+
)
|
365
|
+
|
366
|
+
# Upload file to `presignedUrl.accepted_result.put_url`.
|
367
|
+
|
368
|
+
# Poll for Sanitize's result.
|
369
|
+
response: PangeaResponse[SanitizeResult] = sanitize.poll_result(response=presignedUrl)
|
370
|
+
"""
|
371
|
+
|
372
|
+
input = SanitizeRequest(
|
373
|
+
transfer_method=transfer_method,
|
374
|
+
file=file_scan,
|
375
|
+
content=content,
|
376
|
+
share_output=share_output,
|
377
|
+
crc32c=crc32c,
|
378
|
+
sha256=sha256,
|
379
|
+
size=size,
|
380
|
+
uploaded_file_name=uploaded_file_name,
|
381
|
+
)
|
382
|
+
if params is not None and (transfer_method == TransferMethod.POST_URL):
|
383
|
+
input.crc32c = params.crc_hex
|
384
|
+
input.sha256 = params.sha256_hex
|
385
|
+
input.size = params.size
|
386
|
+
|
387
|
+
data = input.model_dump(exclude_none=True)
|
388
|
+
return self.request.request_presigned_url("v1/sanitize", SanitizeResult, data=data)
|