pangea-sdk 3.8.0__py3-none-any.whl → 5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. pangea/__init__.py +2 -1
  2. pangea/asyncio/__init__.py +1 -0
  3. pangea/asyncio/file_uploader.py +39 -0
  4. pangea/asyncio/request.py +46 -23
  5. pangea/asyncio/services/__init__.py +2 -0
  6. pangea/asyncio/services/audit.py +46 -20
  7. pangea/asyncio/services/authn.py +123 -61
  8. pangea/asyncio/services/authz.py +57 -31
  9. pangea/asyncio/services/base.py +21 -2
  10. pangea/asyncio/services/embargo.py +2 -2
  11. pangea/asyncio/services/file_scan.py +24 -9
  12. pangea/asyncio/services/intel.py +104 -30
  13. pangea/asyncio/services/redact.py +52 -3
  14. pangea/asyncio/services/sanitize.py +217 -0
  15. pangea/asyncio/services/share.py +733 -0
  16. pangea/asyncio/services/vault.py +1709 -766
  17. pangea/crypto/rsa.py +135 -0
  18. pangea/deep_verify.py +7 -1
  19. pangea/dump_audit.py +9 -8
  20. pangea/file_uploader.py +35 -0
  21. pangea/request.py +70 -49
  22. pangea/response.py +36 -17
  23. pangea/services/__init__.py +2 -0
  24. pangea/services/audit/audit.py +57 -29
  25. pangea/services/audit/models.py +12 -3
  26. pangea/services/audit/signing.py +6 -5
  27. pangea/services/audit/util.py +3 -3
  28. pangea/services/authn/authn.py +120 -66
  29. pangea/services/authn/models.py +167 -11
  30. pangea/services/authz.py +53 -30
  31. pangea/services/base.py +16 -2
  32. pangea/services/embargo.py +2 -2
  33. pangea/services/file_scan.py +32 -15
  34. pangea/services/intel.py +155 -30
  35. pangea/services/redact.py +132 -3
  36. pangea/services/sanitize.py +388 -0
  37. pangea/services/share/file_format.py +170 -0
  38. pangea/services/share/share.py +1440 -0
  39. pangea/services/vault/models/asymmetric.py +120 -18
  40. pangea/services/vault/models/common.py +439 -141
  41. pangea/services/vault/models/keys.py +94 -0
  42. pangea/services/vault/models/secret.py +27 -3
  43. pangea/services/vault/models/symmetric.py +68 -22
  44. pangea/services/vault/vault.py +1690 -766
  45. pangea/tools.py +6 -7
  46. pangea/utils.py +94 -33
  47. pangea/verify_audit.py +270 -83
  48. {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/METADATA +21 -29
  49. pangea_sdk-5.3.0.dist-info/RECORD +56 -0
  50. {pangea_sdk-3.8.0.dist-info → pangea_sdk-5.3.0.dist-info}/WHEEL +1 -1
  51. pangea_sdk-3.8.0.dist-info/RECORD +0 -46
pangea/tools.py CHANGED
@@ -95,7 +95,7 @@ def file_events(root_hashes: Dict[int, str], f: io.TextIOWrapper) -> Iterator[Ev
95
95
  else:
96
96
  raise ValueError("invalid data")
97
97
  except (json.JSONDecodeError, ValueError, KeyError) as e:
98
- exit_with_error(f"failed to parse line {idx}: {str(e)}")
98
+ exit_with_error(f"failed to parse line {idx}: {e!s}")
99
99
 
100
100
 
101
101
  def init_audit(token: str, domain: str) -> Audit:
@@ -108,15 +108,14 @@ def init_audit(token: str, domain: str) -> Audit:
108
108
  def make_aware_datetime(d: datetime) -> datetime:
109
109
  if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
110
110
  return d.replace(tzinfo=timezone.utc)
111
- else:
112
- return d
111
+ return d
113
112
 
114
113
 
115
114
  def filter_deep_none(data: Dict) -> Dict:
116
115
  return {k: v if not isinstance(v, Dict) else filter_deep_none(v) for k, v in data.items() if v is not None}
117
116
 
118
117
 
119
- def _load_env_var(env_var_name: str):
118
+ def _load_env_var(env_var_name: str) -> str:
120
119
  value = os.getenv(env_var_name)
121
120
  if not value:
122
121
  raise PangeaException(f"{env_var_name} env var need to be set")
@@ -124,12 +123,12 @@ def _load_env_var(env_var_name: str):
124
123
  return value
125
124
 
126
125
 
127
- def get_test_domain(environment: TestEnvironment):
126
+ def get_test_domain(environment: TestEnvironment) -> str:
128
127
  env_var_name = f"PANGEA_INTEGRATION_DOMAIN_{environment}"
129
128
  return _load_env_var(env_var_name)
130
129
 
131
130
 
132
- def get_test_token(environment: TestEnvironment):
131
+ def get_test_token(environment: TestEnvironment) -> str:
133
132
  env_var_name = f"PANGEA_INTEGRATION_TOKEN_{environment}"
134
133
  return _load_env_var(env_var_name)
135
134
 
@@ -200,7 +199,7 @@ loggers: Dict[str, bool] = {}
200
199
 
201
200
 
202
201
  def logger_set_pangea_config(logger_name: str, level=logging.DEBUG):
203
- if loggers.get(logger_name, None) is not None:
202
+ if loggers.get(logger_name) is not None:
204
203
  return
205
204
 
206
205
  loggers[logger_name] = True
pangea/utils.py CHANGED
@@ -1,12 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import copy
3
5
  import datetime
4
6
  import io
5
7
  import json
6
- from collections import OrderedDict
7
- from hashlib import new, sha1, sha256, sha512
8
+ from hashlib import md5, new, sha1, sha256, sha512
8
9
 
9
- from google_crc32c import Checksum as CRC32C # type: ignore[import]
10
+ from google_crc32c import Checksum as CRC32C
10
11
  from pydantic import BaseModel
11
12
 
12
13
 
@@ -34,20 +35,9 @@ def str2str_b64(data: str, encoding: str = "utf-8") -> str:
34
35
  return base64.b64encode(data.encode(encoding)).decode("ascii")
35
36
 
36
37
 
37
- def dict_order_keys(data: dict) -> OrderedDict:
38
- if isinstance(data, dict):
39
- return OrderedDict(sorted(data.items()))
40
- else:
41
- return data
42
-
43
-
44
- def dict_order_keys_recursive(data: dict) -> OrderedDict:
45
- if isinstance(data, dict):
46
- for k, v in data.items():
47
- if type(v) is dict:
48
- data[k] = dict_order_keys_recursive(v)
49
-
50
- return data # type: ignore[return-value]
38
+ def str_b64_2bytes(data: str) -> bytes:
39
+ data += "=" * ((4 - len(data) % 4) % 4) # add padding if needed
40
+ return base64.urlsafe_b64decode(data)
51
41
 
52
42
 
53
43
  def canonicalize_nested_json(data: dict) -> dict:
@@ -76,33 +66,97 @@ def canonicalize(data: dict) -> str:
76
66
  return str(data)
77
67
 
78
68
 
79
- def hash_sha256(data: str) -> str:
80
- # Return sha256 hash in hex format
81
- return sha256(data.encode("ascii")).hexdigest()
69
+ def hash_sha256(input: str | io.BufferedReader) -> str:
70
+ # Return SHA256 hash in hex format
71
+ hash = sha256()
72
+ if isinstance(input, io.BufferedReader):
73
+ input.seek(0) # restart reading
74
+ while True:
75
+ chunk = input.read(1024 * 1024)
76
+ if not chunk:
77
+ break
78
+ hash.update(chunk)
82
79
 
80
+ input.seek(0) # restart reading
81
+ else:
82
+ hash.update(input.encode("utf-8"))
83
+
84
+ return hash.hexdigest()
83
85
 
84
- def hash_256_filepath(filepath: str) -> str:
85
- data = open(filepath, "rb")
86
- hash = sha256(data.read()).hexdigest()
87
- data.close()
88
- return hash
89
86
 
87
+ def hash_sha1(input: str | io.BufferedReader) -> str:
88
+ # Return SHA1 hash in hex format
89
+ hash = sha1()
90
+ if isinstance(input, io.BufferedReader):
91
+ input.seek(0) # restart reading
92
+ while True:
93
+ chunk = input.read(1024 * 1024)
94
+ if not chunk:
95
+ break
96
+ hash.update(chunk)
97
+
98
+ input.seek(0) # restart reading
99
+ else:
100
+ hash.update(input.encode("utf-8"))
101
+
102
+ return hash.hexdigest()
90
103
 
91
- def hash_sha1(data: str) -> str:
92
- # Return sha1 hash in hex format
93
- return sha1(data.encode("ascii")).hexdigest()
94
104
 
105
+ def hash_sha512(input: str | io.BufferedReader) -> str:
106
+ # Return SHA512 hash in hex format
107
+ hash = sha512()
108
+ if isinstance(input, io.BufferedReader):
109
+ input.seek(0) # restart reading
110
+ while True:
111
+ chunk = input.read(1024 * 1024)
112
+ if not chunk:
113
+ break
114
+ hash.update(chunk)
115
+
116
+ input.seek(0) # restart reading
117
+ else:
118
+ hash.update(input.encode("utf-8"))
95
119
 
96
- def hash_sha512(data: str) -> str:
97
- # Return sha512 hash in hex format
98
- return sha512(data.encode("ascii")).hexdigest()
120
+ return hash.hexdigest()
99
121
 
100
122
 
101
- def hash_ntlm(data: str):
102
- # Calculate the NTLM hash
123
+ def hash_ntlm(data: str) -> str:
124
+ # Return NTLM hash in hex format
103
125
  return new("md4", data.encode("utf-16le")).hexdigest()
104
126
 
105
127
 
128
+ def hash_md5(input: str | io.BufferedReader) -> str:
129
+ # Return MD5 hash in hex format
130
+ hash = md5()
131
+ if isinstance(input, io.BufferedReader):
132
+ input.seek(0) # restart reading
133
+
134
+ while True:
135
+ chunk = input.read(1024 * 1024)
136
+ if not chunk:
137
+ break
138
+ hash.update(chunk)
139
+
140
+ input.seek(0) # restart reading
141
+ else:
142
+ hash.update(input.encode("utf-8"))
143
+
144
+ return hash.hexdigest()
145
+
146
+
147
+ def get_crc32c(data: str) -> str:
148
+ crc = CRC32C()
149
+ crc.update(data)
150
+ return crc.hexdigest().decode("utf-8")
151
+
152
+
153
+ def hash_256_filepath(filepath: str) -> str:
154
+ data = open(filepath, "rb")
155
+ hash = sha256(data.read()).hexdigest()
156
+ data.close()
157
+ return hash
158
+
159
+
106
160
  def get_prefix(hash: str, len: int = 5):
107
161
  return hash[0:len]
108
162
 
@@ -132,3 +186,10 @@ def get_file_upload_params(file: io.BufferedReader) -> FileUploadParams:
132
186
 
133
187
  file.seek(0) # restart reading
134
188
  return FileUploadParams(crc_hex=crc.hexdigest().decode("utf-8"), sha256_hex=sha.hexdigest(), size=size)
189
+
190
+
191
+ def get_file_size(file: io.BufferedReader) -> int:
192
+ file.seek(0, io.SEEK_END)
193
+ size = file.tell()
194
+ file.seek(0) # restart reading
195
+ return size