clue-api 1.0.0.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clue/.gitignore +21 -0
- clue/__init__.py +0 -0
- clue/api/__init__.py +211 -0
- clue/api/base.py +99 -0
- clue/api/v1/__init__.py +82 -0
- clue/api/v1/actions.py +92 -0
- clue/api/v1/auth.py +243 -0
- clue/api/v1/configs.py +83 -0
- clue/api/v1/fetchers.py +94 -0
- clue/api/v1/lookup.py +221 -0
- clue/api/v1/registration.py +109 -0
- clue/api/v1/static.py +94 -0
- clue/app.py +166 -0
- clue/cache/__init__.py +129 -0
- clue/common/__init__.py +0 -0
- clue/common/classification.py +1006 -0
- clue/common/classification.yml +130 -0
- clue/common/dict_utils.py +130 -0
- clue/common/exceptions.py +199 -0
- clue/common/forge.py +152 -0
- clue/common/json_utils.py +10 -0
- clue/common/list_utils.py +11 -0
- clue/common/logging/__init__.py +291 -0
- clue/common/logging/audit.py +157 -0
- clue/common/logging/format.py +42 -0
- clue/common/regex.py +31 -0
- clue/common/str_utils.py +213 -0
- clue/common/swagger.py +139 -0
- clue/common/uid.py +47 -0
- clue/config.py +60 -0
- clue/constants/__init__.py +0 -0
- clue/constants/supported_types.py +38 -0
- clue/cronjobs/__init__.py +30 -0
- clue/cronjobs/plugins.py +32 -0
- clue/error.py +129 -0
- clue/gunicorn_config.py +29 -0
- clue/healthz.py +74 -0
- clue/helper/discover.py +53 -0
- clue/helper/headers.py +30 -0
- clue/helper/oauth.py +128 -0
- clue/models/__init__.py +0 -0
- clue/models/actions.py +243 -0
- clue/models/config.py +456 -0
- clue/models/fetchers.py +136 -0
- clue/models/graph.py +162 -0
- clue/models/model_list.py +52 -0
- clue/models/network.py +430 -0
- clue/models/results/__init__.py +34 -0
- clue/models/results/base.py +10 -0
- clue/models/results/graph.py +26 -0
- clue/models/results/image.py +22 -0
- clue/models/results/status.py +55 -0
- clue/models/results/validation.py +57 -0
- clue/models/selector.py +67 -0
- clue/models/utils.py +52 -0
- clue/models/validators.py +19 -0
- clue/patched.py +8 -0
- clue/plugin/__init__.py +1008 -0
- clue/plugin/helpers/__init__.py +0 -0
- clue/plugin/helpers/central_server.py +27 -0
- clue/plugin/helpers/email_render.py +228 -0
- clue/plugin/helpers/token.py +34 -0
- clue/plugin/helpers/trino.py +103 -0
- clue/plugin/interactive.py +270 -0
- clue/plugin/models.py +19 -0
- clue/plugin/utils.py +78 -0
- clue/remote/__init__.py +0 -0
- clue/remote/datatypes/__init__.py +130 -0
- clue/remote/datatypes/cache.py +62 -0
- clue/remote/datatypes/events.py +118 -0
- clue/remote/datatypes/hash.py +193 -0
- clue/remote/datatypes/queues/__init__.py +0 -0
- clue/remote/datatypes/queues/comms.py +62 -0
- clue/remote/datatypes/set.py +96 -0
- clue/remote/datatypes/user_quota_tracker.py +54 -0
- clue/security/__init__.py +211 -0
- clue/security/obo.py +95 -0
- clue/security/utils.py +34 -0
- clue/services/action_service.py +186 -0
- clue/services/auth_service.py +348 -0
- clue/services/config_service.py +38 -0
- clue/services/fetcher_service.py +203 -0
- clue/services/jwt_service.py +233 -0
- clue/services/lookup_service.py +786 -0
- clue/services/type_service.py +165 -0
- clue/services/user_service.py +152 -0
- clue_api-1.0.0.dev7.dist-info/METADATA +111 -0
- clue_api-1.0.0.dev7.dist-info/RECORD +91 -0
- clue_api-1.0.0.dev7.dist-info/WHEEL +4 -0
- clue_api-1.0.0.dev7.dist-info/entry_points.txt +8 -0
- clue_api-1.0.0.dev7.dist-info/licenses/LICENSE +11 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
import elasticapm
|
|
6
|
+
from flask import request
|
|
7
|
+
|
|
8
|
+
from clue.common.exceptions import (
|
|
9
|
+
AccessDeniedException,
|
|
10
|
+
AuthenticationException,
|
|
11
|
+
ClueException,
|
|
12
|
+
ClueNotImplementedError,
|
|
13
|
+
InvalidDataException,
|
|
14
|
+
)
|
|
15
|
+
from clue.common.logging import get_logger
|
|
16
|
+
from clue.config import config, get_redis
|
|
17
|
+
from clue.models.config import ExternalSource
|
|
18
|
+
from clue.remote.datatypes.set import ExpiringSet
|
|
19
|
+
from clue.security.obo import get_obo_token
|
|
20
|
+
from clue.security.utils import decode_jwt_payload, generate_random_secret
|
|
21
|
+
from clue.services import jwt_service, user_service
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__file__)
|
|
24
|
+
|
|
25
|
+
redis_config: dict[str, Union[str, int]] = {
|
|
26
|
+
"host": config.core.redis.host,
|
|
27
|
+
"port": config.core.redis.port,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
redis = get_redis()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _get_token_store(user: str) -> ExpiringSet:
|
|
34
|
+
"""Get an expiring redis set in which to add a token
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
user (str): The user the token corresponds to
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
ExpiringSet: The set in which we'll store the token
|
|
41
|
+
"""
|
|
42
|
+
return ExpiringSet(f"token_{user}", host=redis, ttl=60 * 60) # 1 Hour expiry
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _get_priv_store(user: str, token: str) -> ExpiringSet:
|
|
46
|
+
"""Get an expiring redis set in which to add the privileges
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
user (str): The user the token corresponds to
|
|
50
|
+
token (str): The token the privileges correspond to
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
ExpiringSet: The set in which we'll store the privileges
|
|
54
|
+
"""
|
|
55
|
+
return ExpiringSet(
|
|
56
|
+
# For security reasons, we won't save the whole token in redis. Just in case :)
|
|
57
|
+
f"token_priv_{user}_{token[:10]}",
|
|
58
|
+
host=redis,
|
|
59
|
+
# 1 Hour expiry
|
|
60
|
+
ttl=60 * 60,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def create_token(user: str, priv: list[str]) -> str:
|
|
65
|
+
"""Generate a new token associated with the given user with the given privileges
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
user (str): The user to create the token as
|
|
69
|
+
priv (list[str]): The privileges to give the token
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
str: The new token
|
|
73
|
+
"""
|
|
74
|
+
token = hashlib.sha256(str(generate_random_secret()).encode("utf-8", errors="replace")).hexdigest()
|
|
75
|
+
|
|
76
|
+
_get_token_store(user).add(token)
|
|
77
|
+
priv_store = _get_priv_store(user, token)
|
|
78
|
+
priv_store.pop_all()
|
|
79
|
+
priv_store.add(",".join(priv))
|
|
80
|
+
|
|
81
|
+
return token
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def check_token(user: str, token: str) -> Optional[list[str]]:
|
|
85
|
+
"""Check if a token exists, and return its list of privileges
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
user (str): The user corresponding to the token to check
|
|
89
|
+
token (str): The token
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Optional[list[str]]: The list of privileges associated with the token
|
|
93
|
+
"""
|
|
94
|
+
if _get_token_store(user).exist(token):
|
|
95
|
+
members = _get_priv_store(user, token).members()
|
|
96
|
+
if len(members) > 0:
|
|
97
|
+
priv_str = members[0]
|
|
98
|
+
return priv_str.split(",")
|
|
99
|
+
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def validate_token(username: str, token: str) -> Optional[list[str]]:
|
|
104
|
+
"""This function identifies the user via the internal token functionality
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
username (str): The username corresponding to the provided token
|
|
108
|
+
token (str): The token generated by our API to check for
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
AuthenticationException: Invalid token
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
tuple[Optional[User], Optional[list[str]]]: The user odm object and privileges, if validated
|
|
115
|
+
"""
|
|
116
|
+
if token:
|
|
117
|
+
priv = check_token(username, token)
|
|
118
|
+
if priv:
|
|
119
|
+
return priv
|
|
120
|
+
|
|
121
|
+
raise AuthenticationException("Invalid token")
|
|
122
|
+
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@elasticapm.capture_span(span_type="authentication")
|
|
127
|
+
def bearer_auth(
|
|
128
|
+
data: str, skip_jwt: bool = False, skip_internal: bool = False
|
|
129
|
+
) -> tuple[Optional[dict[str, Any]], Optional[list[str]]]:
|
|
130
|
+
"""This function handles Bearer type Authorization headers.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
data (str): The corresponding data in the Authorization header.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
tuple[Optional[User], Optional[list[str]]]: The user odm object and privileges, if validated
|
|
137
|
+
"""
|
|
138
|
+
if "." in data:
|
|
139
|
+
if not skip_jwt:
|
|
140
|
+
try:
|
|
141
|
+
jwt_data = jwt_service.decode(data, validate_audience=True)
|
|
142
|
+
except ClueException as e:
|
|
143
|
+
logger.exception("Exception when decoding JWT:")
|
|
144
|
+
raise AuthenticationException(
|
|
145
|
+
"Something went wrong when decoding your key. Please reauthenticate.",
|
|
146
|
+
cause=e,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if not jwt_data:
|
|
150
|
+
logger.error("Invalid JWT provided.")
|
|
151
|
+
raise AuthenticationException("Invalid JWT, please reauthenticate.")
|
|
152
|
+
|
|
153
|
+
logger.debug("User successfully authenticated using JWT.")
|
|
154
|
+
|
|
155
|
+
cur_user = user_service.parse_user_data(jwt_data, jwt_service.get_provider(data))
|
|
156
|
+
|
|
157
|
+
return cur_user, ["R", "W"]
|
|
158
|
+
else:
|
|
159
|
+
raise InvalidDataException("Not a valid authentication type for this endpoint.")
|
|
160
|
+
else:
|
|
161
|
+
if not skip_internal:
|
|
162
|
+
raise ClueNotImplementedError("Internal bearer auth is not yet supported.")
|
|
163
|
+
else:
|
|
164
|
+
raise InvalidDataException("Not a valid authentication type for this endpoint.")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@elasticapm.capture_span(span_type="authentication")
|
|
168
|
+
def validate_apikey(name: str, apikey: str) -> tuple[Optional[dict[str, Any]], Optional[list[str]]]:
|
|
169
|
+
"""This function identifies the user via the internal API key functionality.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
name (str): Name of the APIKey to check against
|
|
173
|
+
apikey (str): The apikey used to authenticate as the user
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
AccessDeniedException: Api Key authentication was disabled, or the api was not valid for impersonation,
|
|
177
|
+
or it was an impersonation api key incorrectly provided in the Authorization header.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
tuple[Optional[User], Optional[list[str]]]: The user odm object and privileges, if validated
|
|
181
|
+
"""
|
|
182
|
+
if not config.auth.allow_apikeys:
|
|
183
|
+
raise AccessDeniedException("API Key authentication disabled")
|
|
184
|
+
|
|
185
|
+
if name and apikey:
|
|
186
|
+
config_apikey = config.auth.apikeys.get(name, None)
|
|
187
|
+
if not config_apikey:
|
|
188
|
+
raise AccessDeniedException("API Key does not exist")
|
|
189
|
+
|
|
190
|
+
if config_apikey != apikey:
|
|
191
|
+
raise AccessDeniedException("Invalid API key")
|
|
192
|
+
|
|
193
|
+
uname = request.headers.get("X-USERID", None)
|
|
194
|
+
classification = request.headers.get("X-CLASSIFICATION", None)
|
|
195
|
+
user_name = request.headers.get("X-USERNAME", None)
|
|
196
|
+
email = request.headers.get("X-EMAIL", None)
|
|
197
|
+
if not uname or not classification:
|
|
198
|
+
raise AccessDeniedException(
|
|
199
|
+
"You must also provide X-USERID and X-CLASSIFICATION headers along with you API key."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return {"uname": uname, "name": user_name, "classification": classification, "email": email}, ["R", "W"]
|
|
203
|
+
else:
|
|
204
|
+
raise AccessDeniedException("You must provide your API key in the proper format in the Authorization header.")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def validate_userpass(username: str, password: str) -> tuple[Optional[dict[str, Any]], Optional[list[str]]]:
|
|
208
|
+
"""This function identifies the user via the user/pass functionality. (NOT IMPLEMENTED)
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
username (str): The username corresponding to the provided password
|
|
212
|
+
password (str): The password used to authenticate as the user
|
|
213
|
+
|
|
214
|
+
Raises:
|
|
215
|
+
AccessDeniedException: Username/Password authentication is currently disabled
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
tuple[Optional[User], Optional[list[str]]]: The user odm object and privileges, if validated
|
|
219
|
+
"""
|
|
220
|
+
raise ClueNotImplementedError("API key auth is not yet supported.")
|
|
221
|
+
|
|
222
|
+
# if config.auth.internal.enabled and username and password:
|
|
223
|
+
# raise ClueNotImplementedError("API key auth is not yet supported.")
|
|
224
|
+
# else:
|
|
225
|
+
# raise AccessDeniedException("Username/Password authentication disabled")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def decode_b64(b64_str: str) -> str:
|
|
229
|
+
"""Decode a base64 string into plain text.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
b64_str (str): The base64 string
|
|
233
|
+
|
|
234
|
+
Raises:
|
|
235
|
+
InvalidDataException: The data was not base64.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
str: A plain text representation of the data.
|
|
239
|
+
"""
|
|
240
|
+
try:
|
|
241
|
+
return base64.b64decode(b64_str).decode("utf-8")
|
|
242
|
+
except UnicodeDecodeError as e:
|
|
243
|
+
raise InvalidDataException("Basic authentication data must be base64 encoded") from e
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@elasticapm.capture_span(span_type="authentication")
|
|
247
|
+
def basic_auth(
|
|
248
|
+
data: str, is_base64: bool = True, skip_apikey: bool = False, skip_password: bool = False
|
|
249
|
+
) -> tuple[Optional[dict[str, Any]], Optional[list[str]]]:
|
|
250
|
+
"""This function handles Basic type Authorization headers.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
data (str): The corresponding data in the Authorization header.
|
|
254
|
+
is_base64 (bool, optional): Whether the provided data is base64 encoded. Defaults to True.
|
|
255
|
+
skip_apikey (bool, optional): Whether to skip apikey validation. Defaults to False.
|
|
256
|
+
skip_password (bool, optional): Whether to skip password validation. Defaults to False.
|
|
257
|
+
|
|
258
|
+
Raises:
|
|
259
|
+
AuthenticationException: The login information is invalid, or the maximum password retry for the account
|
|
260
|
+
has been reached.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
tuple[Optional[User], Optional[list[str]]]: The user odm object and privileges, if validated
|
|
264
|
+
"""
|
|
265
|
+
key_pair = decode_b64(data) if is_base64 else data
|
|
266
|
+
|
|
267
|
+
[username, data] = key_pair.split(":", maxsplit=1)
|
|
268
|
+
|
|
269
|
+
validated_user = None
|
|
270
|
+
if not skip_apikey:
|
|
271
|
+
validated_user, priv = validate_apikey(username, data)
|
|
272
|
+
|
|
273
|
+
# Bruteforce protection
|
|
274
|
+
# auth_fail_queue: NamedQueue = NamedQueue(f"ui-failed-{username}", **redis_config) # type: ignore
|
|
275
|
+
# if auth_fail_queue.length() >= config.auth.internal.max_failures:
|
|
276
|
+
# # Failed 'max_failures' times, stop trying... This will timeout in 'failure_ttl' seconds
|
|
277
|
+
# raise AuthenticationException(
|
|
278
|
+
# "Maximum password retry of {retry} was reached. "
|
|
279
|
+
# "This account is locked for the next {ttl} "
|
|
280
|
+
# "seconds...".format(
|
|
281
|
+
# retry=config.auth.internal.max_failures,
|
|
282
|
+
# ttl=config.auth.internal.failure_ttl,
|
|
283
|
+
# )
|
|
284
|
+
# )
|
|
285
|
+
|
|
286
|
+
if not validated_user and not skip_password:
|
|
287
|
+
validated_user, priv = validate_userpass(username, data)
|
|
288
|
+
|
|
289
|
+
if not validated_user:
|
|
290
|
+
# auth_fail_queue.push(
|
|
291
|
+
# {
|
|
292
|
+
# "remote_addr": request.remote_addr,
|
|
293
|
+
# "host": request.host,
|
|
294
|
+
# "full_path": request.full_path,
|
|
295
|
+
# }
|
|
296
|
+
# )
|
|
297
|
+
raise AuthenticationException("Invalid login information")
|
|
298
|
+
|
|
299
|
+
return validated_user, priv
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def extract_audience(access_token: str) -> list[str]:
|
|
303
|
+
"Extract the audience from an encoded JWT."
|
|
304
|
+
audience: list[str] | str | None = decode_jwt_payload(access_token).get("aud", None)
|
|
305
|
+
|
|
306
|
+
if not audience:
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
return [audience] if not isinstance(audience, list) else audience
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# TODO: sa-clue support
|
|
313
|
+
def check_obo(source: ExternalSource, access_token: str, username: str) -> tuple[Optional[str], Optional[str]]:
|
|
314
|
+
"""Checks whether a token's audience matches the source, and if it doesn't, tries to get an OBO token for the source
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
source (ExternalSource): The source for which we want an access token.
|
|
318
|
+
access_token (str): The access token to verify.
|
|
319
|
+
username (str): The name of the user.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
tuple[Optional[str], Optional[str]]: A tuple with either the valid access token in the first arg, or the error
|
|
323
|
+
message in the second arg.
|
|
324
|
+
"""
|
|
325
|
+
obo_access_token: Optional[str] = None
|
|
326
|
+
if source.obo_target:
|
|
327
|
+
logger.debug("Source %s requires OBO to %s", source.name, source.obo_target)
|
|
328
|
+
|
|
329
|
+
if "." not in access_token:
|
|
330
|
+
logger.warning("JWT not provided, using service account.")
|
|
331
|
+
if not (sa_token := jwt_service.fetch_sa_token()):
|
|
332
|
+
return None, None
|
|
333
|
+
|
|
334
|
+
access_token = sa_token
|
|
335
|
+
|
|
336
|
+
audience = extract_audience(access_token)
|
|
337
|
+
|
|
338
|
+
# Check if this is a standard clue token
|
|
339
|
+
if jwt_service.get_audience(jwt_service.get_provider(access_token)) in audience:
|
|
340
|
+
obo_access_token = get_obo_token(source.obo_target, access_token, username)
|
|
341
|
+
if not obo_access_token:
|
|
342
|
+
return None, "An error occurred when OBOing token"
|
|
343
|
+
|
|
344
|
+
# Check if the scope already matches (could happen in cases like howler -> clue -> howler)
|
|
345
|
+
elif config.api.obo_targets[source.obo_target].scope.split("/")[0] not in audience:
|
|
346
|
+
return None, "Invalid token for this enrichment"
|
|
347
|
+
|
|
348
|
+
return obo_access_token, None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from clue.common.str_utils import default_string_value
|
|
2
|
+
from clue.config import CLASSIFICATION, config, get_branch, get_commit, get_version
|
|
3
|
+
from clue.helper.discover import get_apps_list
|
|
4
|
+
|
|
5
|
+
classification_definition = CLASSIFICATION.get_parsed_classification_definition()
|
|
6
|
+
|
|
7
|
+
apps = get_apps_list()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_configuration():
|
|
11
|
+
"""Get system configration data for the Clue API
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
user (User): The user making the request
|
|
15
|
+
"""
|
|
16
|
+
return {
|
|
17
|
+
"configuration": {
|
|
18
|
+
"auth": {
|
|
19
|
+
"oauth_providers": [
|
|
20
|
+
name
|
|
21
|
+
for name, p in config.auth.oauth.providers.items()
|
|
22
|
+
if default_string_value(p.client_secret, env_name=f"{name.upper()}_CLIENT_SECRET")
|
|
23
|
+
],
|
|
24
|
+
# "internal": {"enabled": config.auth.internal.enabled},
|
|
25
|
+
},
|
|
26
|
+
"system": {
|
|
27
|
+
# "type": config.system.type,
|
|
28
|
+
"version": get_version(),
|
|
29
|
+
"branch": get_branch(),
|
|
30
|
+
"commit": get_commit(),
|
|
31
|
+
},
|
|
32
|
+
"ui": {
|
|
33
|
+
"apps": apps,
|
|
34
|
+
"cors_origins": config.ui.cors_origins,
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
"c12nDef": classification_definition,
|
|
38
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
from urllib.parse import urljoin
|
|
3
|
+
|
|
4
|
+
import elasticapm
|
|
5
|
+
import requests
|
|
6
|
+
from flask import request
|
|
7
|
+
from pydantic import TypeAdapter, ValidationError
|
|
8
|
+
from requests import JSONDecodeError, exceptions
|
|
9
|
+
|
|
10
|
+
from clue.common.exceptions import (
|
|
11
|
+
AuthenticationException,
|
|
12
|
+
ClueException,
|
|
13
|
+
ClueValueError,
|
|
14
|
+
NotFoundException,
|
|
15
|
+
)
|
|
16
|
+
from clue.common.logging import get_logger
|
|
17
|
+
from clue.config import CLASSIFICATION, DEBUG, cache, config
|
|
18
|
+
from clue.models.config import ExternalSource
|
|
19
|
+
from clue.models.fetchers import FetcherDefinition, FetcherResult
|
|
20
|
+
from clue.models.selector import Selector
|
|
21
|
+
from clue.services import auth_service
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__file__)
|
|
24
|
+
|
|
25
|
+
# Either cache for one second in debug mode, or five minutes in production
|
|
26
|
+
CACHE_TIMEOUT: int = 1 if DEBUG else 5 * 60
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@cache.memoize(timeout=1 if DEBUG else 5 * 60, args_to_ignore=["access_token"]) # Cached for 5 minutes
|
|
30
|
+
def get_supported_fetchers(
|
|
31
|
+
source: ExternalSource, user: dict[str, Any], access_token: Optional[str] = None
|
|
32
|
+
) -> dict[str, FetcherDefinition]:
|
|
33
|
+
"""Gets all supported fetchers for a source
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
source_url (str): The URL of the source
|
|
37
|
+
access_token (Optional[str], optional): The access token to use, if necessary. Defaults to None.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
dict[str, FetcherDefinition]: A dict of each ids mapped to fetcher metadata
|
|
41
|
+
"""
|
|
42
|
+
logger.info("Requesting fetchers for source %s", source.name)
|
|
43
|
+
|
|
44
|
+
url = urljoin(source.url, "fetchers/")
|
|
45
|
+
|
|
46
|
+
obo_access_token = None
|
|
47
|
+
if access_token:
|
|
48
|
+
obo_access_token, error = auth_service.check_obo(source, access_token, user["uname"])
|
|
49
|
+
|
|
50
|
+
if error:
|
|
51
|
+
logger.error("%s: %s", source.name, error)
|
|
52
|
+
return {}
|
|
53
|
+
|
|
54
|
+
headers = {"Accept": "application/json"}
|
|
55
|
+
if obo_access_token or access_token:
|
|
56
|
+
headers["Authorization"] = f"Bearer {obo_access_token or access_token}"
|
|
57
|
+
|
|
58
|
+
with elasticapm.capture_span(f"GET {url}", span_type="http"):
|
|
59
|
+
try:
|
|
60
|
+
rsp = requests.get(url, headers=headers, timeout=5.0)
|
|
61
|
+
result = rsp.json()
|
|
62
|
+
|
|
63
|
+
if not rsp.ok:
|
|
64
|
+
err = result["api_error_message"]
|
|
65
|
+
logger.error(f"Error from upstream server: {rsp.status_code=}, {err=}")
|
|
66
|
+
|
|
67
|
+
return TypeAdapter(dict[str, FetcherDefinition]).validate_python(result["api_response"])
|
|
68
|
+
except exceptions.ConnectionError:
|
|
69
|
+
# any errors are logged and no result is saved to local cache to enable retry on next query
|
|
70
|
+
logger.exception("Unable to connect: %s", url)
|
|
71
|
+
return {}
|
|
72
|
+
except (requests.exceptions.JSONDecodeError, KeyError):
|
|
73
|
+
logger.exception("External API did not return expected format:")
|
|
74
|
+
return {}
|
|
75
|
+
except ValidationError:
|
|
76
|
+
logger.exception("ValidationError in response from %s:", source.url)
|
|
77
|
+
return {}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def all_supported_fetchers(user: dict[str, Any], access_token: Optional[str] = None) -> dict[str, FetcherDefinition]:
|
|
81
|
+
"""Gets all supported fetchers for all sources
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
access_token (Optional[str], optional): The access token to use, if necessary. Defaults to None.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
dict[str, FetcherDefinition]: A dict of all fetchers and their matching schema
|
|
88
|
+
"""
|
|
89
|
+
all_fetchers: dict[str, FetcherDefinition] = {}
|
|
90
|
+
|
|
91
|
+
for source in config.api.external_sources:
|
|
92
|
+
supported_fetchers = get_supported_fetchers(source, user, access_token=access_token)
|
|
93
|
+
total_fetchers = 0
|
|
94
|
+
for key, action in supported_fetchers.items():
|
|
95
|
+
total_fetchers += 1
|
|
96
|
+
all_fetchers[f"{source.name}.{key}"] = action
|
|
97
|
+
logger.debug("Plugin %s exposes %s fetcher(s)", source.name, total_fetchers)
|
|
98
|
+
|
|
99
|
+
return all_fetchers
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_plugins_supported_fetchers(user: dict[str, Any]) -> dict[str, FetcherDefinition]:
|
|
103
|
+
"""Return the supported fetchers of each external service, filtered to what the user has access to."""
|
|
104
|
+
available_fetchers: dict[str, FetcherDefinition] = {}
|
|
105
|
+
|
|
106
|
+
access_token = request.headers.get("Authorization", type=str)
|
|
107
|
+
if access_token:
|
|
108
|
+
access_token = access_token.split(" ")[1]
|
|
109
|
+
|
|
110
|
+
all_fetchers = all_supported_fetchers(
|
|
111
|
+
user,
|
|
112
|
+
access_token=access_token,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
logger.info("Retrieving fetchers for classification %s", user["classification"])
|
|
116
|
+
|
|
117
|
+
for fetcher_id, fetcher in all_fetchers.items():
|
|
118
|
+
# Validate if the user is allow to even see the source
|
|
119
|
+
if user and not CLASSIFICATION.is_accessible(user["classification"], fetcher.classification):
|
|
120
|
+
logger.info(
|
|
121
|
+
"Not including fetchers from source %s at classification %s", fetcher.id, user["classification"]
|
|
122
|
+
)
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# user can view source, now filter types user cannot see
|
|
126
|
+
available_fetchers[fetcher_id] = fetcher
|
|
127
|
+
|
|
128
|
+
logger.info("%s fetchers are available for user %s", len(available_fetchers), user["uname"])
|
|
129
|
+
|
|
130
|
+
return available_fetchers
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def run_fetcher(plugin_id: str, fetcher_id: str, user: dict[str, Any]) -> FetcherResult:
|
|
134
|
+
"""Executes a specified fetcher.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
plugin_id (str): The ID of the plugin.
|
|
138
|
+
fetcher_id (str): The ID of the action to run.
|
|
139
|
+
user (dict[str, Any]): The user dict of the user running the action.
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
NotFoundException: Raised whenever the plugin or the action doesn't exist.
|
|
143
|
+
ClueException: Raised whenever an error is returned by the plugin endpoint.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
ActionResult: The result of the action.
|
|
147
|
+
"""
|
|
148
|
+
plugin = next((source for source in config.api.external_sources if source.name == plugin_id), None)
|
|
149
|
+
|
|
150
|
+
if not plugin:
|
|
151
|
+
raise NotFoundException(f"Plugin {plugin_id} does not exist.")
|
|
152
|
+
|
|
153
|
+
access_token = request.headers.get("Authorization", type=str)
|
|
154
|
+
if access_token:
|
|
155
|
+
access_token = access_token.split(" ")[1]
|
|
156
|
+
|
|
157
|
+
obo_access_token = None
|
|
158
|
+
if access_token:
|
|
159
|
+
obo_access_token, error = auth_service.check_obo(plugin, access_token, user["uname"])
|
|
160
|
+
|
|
161
|
+
if error:
|
|
162
|
+
logger.error("%s: %s", plugin.name, error)
|
|
163
|
+
raise AuthenticationException("Invalid token provided for this enrichment.")
|
|
164
|
+
|
|
165
|
+
headers = {"Accept": "application/json"}
|
|
166
|
+
if obo_access_token or access_token:
|
|
167
|
+
headers["Authorization"] = f"Bearer {obo_access_token or access_token}"
|
|
168
|
+
|
|
169
|
+
if request.content_type == "application/json":
|
|
170
|
+
parameters = request.json
|
|
171
|
+
else:
|
|
172
|
+
# TODO: Pass parameters via urlencode?
|
|
173
|
+
parameters = {}
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
Selector.model_validate(parameters)
|
|
177
|
+
|
|
178
|
+
response = requests.post(
|
|
179
|
+
urljoin(plugin.url, f"fetchers/{fetcher_id}"),
|
|
180
|
+
json=parameters,
|
|
181
|
+
headers=headers,
|
|
182
|
+
timeout=request.args.get("max_timeout", 60.0, type=float),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
result = response.json()
|
|
186
|
+
|
|
187
|
+
if not response.ok:
|
|
188
|
+
raise ClueException(
|
|
189
|
+
result["api_error_message"] or result["api_response"].get("error", ""), status_code=response.status_code
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return FetcherResult.model_validate(result["api_response"], context={"is_response": True})
|
|
193
|
+
except ValidationError as err:
|
|
194
|
+
logger.exception("Invalid Request Body:")
|
|
195
|
+
raise ClueValueError(
|
|
196
|
+
"Validation error encountered on request body. Ensure your request body is properly formatted.",
|
|
197
|
+
status_code=400,
|
|
198
|
+
) from err
|
|
199
|
+
except (JSONDecodeError, exceptions.ConnectionError) as err:
|
|
200
|
+
logger.exception(f"Something went wrong when running fetcher from plugin '{plugin_id}'")
|
|
201
|
+
raise ClueException(
|
|
202
|
+
f"Something went wrong when running fetcher from plugin '{plugin_id}': {err.__class__.__name__}."
|
|
203
|
+
) from err
|