clue-api 1.0.0.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. clue/.gitignore +21 -0
  2. clue/__init__.py +0 -0
  3. clue/api/__init__.py +211 -0
  4. clue/api/base.py +99 -0
  5. clue/api/v1/__init__.py +82 -0
  6. clue/api/v1/actions.py +92 -0
  7. clue/api/v1/auth.py +243 -0
  8. clue/api/v1/configs.py +83 -0
  9. clue/api/v1/fetchers.py +94 -0
  10. clue/api/v1/lookup.py +221 -0
  11. clue/api/v1/registration.py +109 -0
  12. clue/api/v1/static.py +94 -0
  13. clue/app.py +166 -0
  14. clue/cache/__init__.py +129 -0
  15. clue/common/__init__.py +0 -0
  16. clue/common/classification.py +1006 -0
  17. clue/common/classification.yml +130 -0
  18. clue/common/dict_utils.py +130 -0
  19. clue/common/exceptions.py +199 -0
  20. clue/common/forge.py +152 -0
  21. clue/common/json_utils.py +10 -0
  22. clue/common/list_utils.py +11 -0
  23. clue/common/logging/__init__.py +291 -0
  24. clue/common/logging/audit.py +157 -0
  25. clue/common/logging/format.py +42 -0
  26. clue/common/regex.py +31 -0
  27. clue/common/str_utils.py +213 -0
  28. clue/common/swagger.py +139 -0
  29. clue/common/uid.py +47 -0
  30. clue/config.py +60 -0
  31. clue/constants/__init__.py +0 -0
  32. clue/constants/supported_types.py +38 -0
  33. clue/cronjobs/__init__.py +30 -0
  34. clue/cronjobs/plugins.py +32 -0
  35. clue/error.py +129 -0
  36. clue/gunicorn_config.py +29 -0
  37. clue/healthz.py +74 -0
  38. clue/helper/discover.py +53 -0
  39. clue/helper/headers.py +30 -0
  40. clue/helper/oauth.py +128 -0
  41. clue/models/__init__.py +0 -0
  42. clue/models/actions.py +243 -0
  43. clue/models/config.py +456 -0
  44. clue/models/fetchers.py +136 -0
  45. clue/models/graph.py +162 -0
  46. clue/models/model_list.py +52 -0
  47. clue/models/network.py +430 -0
  48. clue/models/results/__init__.py +34 -0
  49. clue/models/results/base.py +10 -0
  50. clue/models/results/graph.py +26 -0
  51. clue/models/results/image.py +22 -0
  52. clue/models/results/status.py +55 -0
  53. clue/models/results/validation.py +57 -0
  54. clue/models/selector.py +67 -0
  55. clue/models/utils.py +52 -0
  56. clue/models/validators.py +19 -0
  57. clue/patched.py +8 -0
  58. clue/plugin/__init__.py +1008 -0
  59. clue/plugin/helpers/__init__.py +0 -0
  60. clue/plugin/helpers/central_server.py +27 -0
  61. clue/plugin/helpers/email_render.py +228 -0
  62. clue/plugin/helpers/token.py +34 -0
  63. clue/plugin/helpers/trino.py +103 -0
  64. clue/plugin/interactive.py +270 -0
  65. clue/plugin/models.py +19 -0
  66. clue/plugin/utils.py +78 -0
  67. clue/remote/__init__.py +0 -0
  68. clue/remote/datatypes/__init__.py +130 -0
  69. clue/remote/datatypes/cache.py +62 -0
  70. clue/remote/datatypes/events.py +118 -0
  71. clue/remote/datatypes/hash.py +193 -0
  72. clue/remote/datatypes/queues/__init__.py +0 -0
  73. clue/remote/datatypes/queues/comms.py +62 -0
  74. clue/remote/datatypes/set.py +96 -0
  75. clue/remote/datatypes/user_quota_tracker.py +54 -0
  76. clue/security/__init__.py +211 -0
  77. clue/security/obo.py +95 -0
  78. clue/security/utils.py +34 -0
  79. clue/services/action_service.py +186 -0
  80. clue/services/auth_service.py +348 -0
  81. clue/services/config_service.py +38 -0
  82. clue/services/fetcher_service.py +203 -0
  83. clue/services/jwt_service.py +233 -0
  84. clue/services/lookup_service.py +786 -0
  85. clue/services/type_service.py +165 -0
  86. clue/services/user_service.py +152 -0
  87. clue_api-1.0.0.dev7.dist-info/METADATA +111 -0
  88. clue_api-1.0.0.dev7.dist-info/RECORD +91 -0
  89. clue_api-1.0.0.dev7.dist-info/WHEEL +4 -0
  90. clue_api-1.0.0.dev7.dist-info/entry_points.txt +8 -0
  91. clue_api-1.0.0.dev7.dist-info/licenses/LICENSE +11 -0
@@ -0,0 +1,1008 @@
1
+ import ipaddress
2
+ import json
3
+ import logging
4
+ import os
5
+ import time
6
+ from typing import Any, Callable, Literal, Self, Union, cast
7
+ from urllib import parse as ul
8
+
9
+ import gevent
10
+ import gevent.pool
11
+ from flask import Flask, Response, jsonify, make_response, request
12
+ from flask.globals import _cv_request
13
+ from gevent import Greenlet
14
+ from pydantic import BaseModel, TypeAdapter, ValidationError
15
+
16
+ from clue.cache import Cache
17
+ from clue.common.exceptions import (
18
+ AuthenticationException,
19
+ ClueException,
20
+ InvalidDataException,
21
+ NotFoundException,
22
+ TimeoutException,
23
+ UnprocessableException,
24
+ )
25
+ from clue.common.logging.format import BRL_DATE_FORMAT, BRL_LOG_FORMAT
26
+ from clue.models.actions import (
27
+ Action,
28
+ ActionBase,
29
+ ActionResult,
30
+ ActionSpec,
31
+ ExecuteRequest,
32
+ )
33
+ from clue.models.fetchers import FetcherDefinition, FetcherResult
34
+ from clue.models.network import QueryEntry
35
+ from clue.models.selector import Selector
36
+ from clue.plugin.helpers.token import get_username
37
+ from clue.plugin.models import BulkEntry
38
+ from clue.plugin.utils import Params
39
+
40
+
41
+ def default_validate_token():
42
+ """A default validation function that pulls from the authorization header. Not used by default."""
43
+ token = request.headers.get("Authorization", None, type=str)
44
+ if token and " " in token:
45
+ token = token.split()[1]
46
+
47
+ if token:
48
+ return token, None
49
+
50
+ return None, "No bearer token was provided. Please provide a Bearer token in the Authorization header"
51
+
52
+
53
+ def liveness(**_):
54
+ "Default liveness probe"
55
+ return make_response("OK")
56
+
57
+
58
+ def readyness(**_):
59
+ "Default readyness probe"
60
+ return make_response("OK")
61
+
62
+
63
+ def build_default_logger() -> logging.Logger:
64
+ "Configure a default logger if one is not provided."
65
+ logger = logging.getLogger("clue.plugin.default")
66
+ logger.setLevel(logging.INFO)
67
+ console = logging.StreamHandler()
68
+ console.setLevel(logging.INFO)
69
+ console.setFormatter(logging.Formatter(BRL_LOG_FORMAT, BRL_DATE_FORMAT))
70
+ logger.addHandler(console)
71
+
72
+ return logger
73
+
74
+
75
+ class CluePlugin:
76
+ """Helper class for creating clue plugins with proper server responses and behaviour.
77
+
78
+ Includes a default bulk lookup function that multithreads requests to the resource being queried.
79
+
80
+ Attributes:
81
+ alternate_bulk_lookup:
82
+ Provides an alternative implementation for bulk enrichment.
83
+
84
+ By default, clue plugins will split bulk enrichments into many parallel threads, allowing the plugin to
85
+ use the same enrich function implemented without the need for complex bulk enrichment responses. However,
86
+ in cases where it is necessary to streamline enrichment of bulk selectors (i.e. making hundreds of SQL
87
+ queries instead of one), this alternate lookup function can be used.
88
+ app:
89
+ The underlying Flask object representing the server recieving and responding to requests from the
90
+ central API.
91
+ app_name:
92
+ The name of this clue plugin. Used to configure the cache and for logging.
93
+ cache:
94
+ The instantiated cache for this clue plugin. Can be used directly to cache and retrieve additional data.
95
+ classification:
96
+ The classification level of selectors this plugin accepts.
97
+
98
+ Enrichment requests exceeding this classification level will not be processed by this plugin.
99
+ logger:
100
+ The logging instance used internally.
101
+ supported_types:
102
+ The list of types supported by this plugin for enrichment.
103
+ actions:
104
+ A list of action definitions this plugin supports.
105
+ setup_actions:
106
+ An optional function called when a list of supported actions is asked for.
107
+
108
+ Useful for runtime generation of actions - for example, returning a list of valid arguments that changes on
109
+ a per-user basis. Can be used instead of the actions attribute.
110
+ validate_token:
111
+ A user-provided function for validating the authentication token provided from the central API.
112
+
113
+ Can be used to ensure the audience matches the expected value, ensuring specific fields are present in the
114
+ JWT, etc.
115
+ enrich:
116
+ The main enrichment function.
117
+
118
+ Accepts the type and value of the selector, a list of parameters relevant to the enrichment, and the token
119
+ provided from the central API (assuming authentication is enabled). Returns QueryEntry object(s) denoting
120
+ the enrichments for the given selector.
121
+ run_action:
122
+ The main function for running actions.
123
+
124
+ Accepts the selected action definition as well as the ExecuteRequest. If the Action definition's parameters
125
+ were extended with a custom ExecuteRequest (i.e. to add additional user parameters) that instance will be
126
+ passed instead, and casting the argument will be necessary.
127
+ fetchers:
128
+ A list of fetcher definitions this plugin supports.
129
+
130
+ run_fetcher:
131
+ The main function for running fetchers.
132
+
133
+ Accepts the selected fetcher definition as well as the selector to execute the fetcher on. Returns a
134
+ completed FetcherResult.
135
+ liveness:
136
+ A liveness probe for kubernetes implementations of clue.
137
+ readyness:
138
+ A readyness probe for kubernetes implementations of clue.
139
+ """
140
+
141
+ alternate_bulk_lookup: Callable[[list[dict[str, str]], Params], dict[str, dict[str, BulkEntry]]] | None
142
+ """Provides an alternative implementation for bulk enrichment.
143
+
144
+ By default, clue plugins will split bulk enrichments into many parallel threads, allowing the plugin to
145
+ use the same enrich function implemented without the need for complex bulk enrichment responses. However,
146
+ in cases where it is necessary to streamline enrichment of bulk selectors (i.e. making hundreds of SQL
147
+ queries instead of one), this alternate lookup function can be used.
148
+ """
149
+
150
+ app: Flask
151
+ "The underlying Flask object representing the server recieving and responding to requests from the central API."
152
+
153
+ app_name: str
154
+ "The name of this clue plugin. Used to configure the cache and for logging."
155
+
156
+ cache: Cache | None
157
+ "The instantiated cache for this clue plugin. Can be used directly to cache and retrieve additional data."
158
+
159
+ classification: str
160
+ """The classification level of selectors this plugin accepts.
161
+
162
+ Enrichment requests exceeding this classification level will not be processed by this plugin.
163
+ """
164
+
165
+ logger: logging.Logger
166
+ "The logging instance used internally."
167
+
168
+ supported_types: set[str] | None
169
+ "The list of types supported by this plugin for enrichment."
170
+
171
+ actions: list[Action]
172
+ "A list of action definitions this plugin supports."
173
+
174
+ setup_actions: Callable[[list[Action], str | None], list[Action]] | None
175
+ """An optional function called when a list of supported actions is asked for.
176
+
177
+ Useful for runtime generation of actions - for example, returning a list of valid arguments that changes on a
178
+ per-user basis. Can be used instead of the actions attribute.
179
+ """
180
+
181
+ validate_token: Callable[[], tuple[str | None, str | None]] | None
182
+ """A user-provided function for validating the authentication token provided from the central API.
183
+
184
+ Can be used to ensure the audience matches the expected value, ensuring specific fields are present in the
185
+ JWT, etc.
186
+ """
187
+
188
+ enrich: Callable[[str, str, Params, str | None], Union[list[QueryEntry], QueryEntry]] | None
189
+ """The main enrichment function.
190
+
191
+ Accepts the type and value of the selector, a list of parameters relevant to the enrichment, and the token provided
192
+ from the central API (assuming authentication is enabled). Returns QueryEntry object(s) denoting the enrichments for
193
+ the given selector.
194
+ """
195
+
196
+ run_action: Callable[[Action, ExecuteRequest, str | None], ActionResult] | None
197
+ """The main function for running actions.
198
+
199
+ Accepts the selected action definition as well as the ExecuteRequest. If the Action definition's parameters were
200
+ extended with a custom ExecuteRequest (i.e. to add additional user parameters) that instance will be passed instead,
201
+ and casting the argument will be necessary.
202
+ """
203
+
204
+ fetchers: list[FetcherDefinition] | None
205
+ "A list of fetcher definitions this plugin supports."
206
+
207
+ run_fetcher: Callable[[FetcherDefinition, Selector, str | None], FetcherResult] | None
208
+ """The main function for running fetchers.
209
+
210
+ Accepts the selected fetcher definition as well as the selector to execute the fetcher on. Returns a completed
211
+ FetcherResult.
212
+ """
213
+
214
+ liveness: Callable[[], Response]
215
+ "A liveness probe for kubernetes implementations of clue."
216
+
217
+ readyness: Callable[[], Response]
218
+ "A readyness probe for kubernetes implementations of clue."
219
+
220
+ def __init__(
221
+ self: Self,
222
+ app_name: str,
223
+ actions: list[Action] = [],
224
+ alternate_bulk_lookup: Callable[[list[dict[str, str]], Params], dict[str, dict[str, BulkEntry]]] | None = None,
225
+ cache_timeout: int = 5 * 60, # five minute timeout
226
+ classification: str = os.environ.get("CLASSIFICATION", "TLP:CLEAR"),
227
+ enable_apm: bool = False,
228
+ enable_cache: Union[bool, Literal["redis"], Literal["local"]] = True,
229
+ enrich: Callable[[str, str, Params, str | None], Union[list[QueryEntry], QueryEntry]] | None = None,
230
+ fetchers: list[FetcherDefinition] | None = None,
231
+ liveness: Callable[[], Response] = liveness,
232
+ local_cache_options: dict[str, Any] | None = None,
233
+ logger: logging.Logger | None = None,
234
+ readyness: Callable[[], Response] = readyness,
235
+ run_action: Callable[[Action, ExecuteRequest, str | None], ActionResult] | None = None,
236
+ run_fetcher: Callable[[FetcherDefinition, Selector, str | None], FetcherResult] | None = None,
237
+ setup_actions: Callable[[list[Action], str | None], list[Action]] | None = None,
238
+ supported_types: set[str] | None = None,
239
+ validate_token: Callable[[], tuple[str | None, str | None]] | None = None,
240
+ ) -> None:
241
+ """Helper class for creating clue plugins with proper server responses and behaviour.
242
+
243
+ Includes a default bulk lookup function that multithreads requests to the resource being queried.
244
+
245
+ Args:
246
+ app_name:
247
+ The name of this clue plugin. Used to configure the cache and for logging.
248
+ actions:
249
+ A list of action definitions this plugin supports.
250
+ alternate_bulk_lookup:
251
+ Provides an alternative implementation for bulk enrichment.
252
+
253
+ By default, clue plugins will split bulk enrichments into many parallel threads, allowing the plugin
254
+ to use the same enrich function implemented without the need for complex bulk enrichment responses.
255
+ However, in cases where it is necessary to streamline enrichment of bulk selectors (i.e. making
256
+ hundreds of SQL queries instead of one), this alternate lookup function can be used.
257
+ cache_timeout:
258
+ How long should the cache store cached data before purging it?
259
+ classification:
260
+ The classification level of selectors this plugin accepts.
261
+
262
+ Enrichment requests exceeding this classification level will not be processed by this plugin.
263
+ logger:
264
+ The logging instance used internally.
265
+ supported_types:
266
+ The list of types supported by this plugin for enrichment.
267
+ setup_actions:
268
+ An optional function called when a list of supported actions is asked for.
269
+
270
+ Useful for runtime generation of actions - for example, returning a list of valid arguments that
271
+ changes on a per-user basis. Can be used instead of the actions attribute.
272
+ validate_token:
273
+ A user-provided function for validating the authentication token provided from the central API.
274
+
275
+ Can be used to ensure the audience matches the expected value, ensuring specific fields are present in
276
+ the JWT, etc.
277
+ enrich:
278
+ The main enrichment function.
279
+
280
+ Accepts the type and value of the selector, a list of parameters relevant to the enrichment, and the
281
+ token provided from the central API (assuming authentication is enabled). Returns QueryEntry object(s)
282
+ denoting the enrichments for the given selector.
283
+ run_action:
284
+ The main function for running actions.
285
+
286
+ Accepts the selected action definition as well as the ExecuteRequest. If the Action definition's
287
+ parameters were extended with a custom ExecuteRequest (i.e. to add additional user parameters) that
288
+ instance will be passed instead, and casting the argument will be necessary.
289
+ fetchers:
290
+ A list of fetcher definitions this plugin supports.
291
+
292
+ run_fetcher:
293
+ The main function for running fetchers.
294
+
295
+ Accepts the selected fetcher definition as well as the selector to execute the fetcher on. Returns a
296
+ completed FetcherResult.
297
+ liveness:
298
+ A liveness probe for kubernetes implementations of Clue.
299
+ readyness:
300
+ A readyness probe for kubernetes implementations of Clue.
301
+ """
302
+ self.alternate_bulk_lookup = alternate_bulk_lookup
303
+ self.app = Flask(__name__.split(".")[0])
304
+ self.app_name = app_name
305
+ self.classification = classification
306
+ self.liveness = liveness
307
+ self.readyness = readyness
308
+ self.supported_types = supported_types
309
+
310
+ self.actions = actions
311
+ self.setup_actions = setup_actions
312
+
313
+ self.app.url_map.strict_slashes = False
314
+
315
+ self.logger = logger if logger else build_default_logger()
316
+
317
+ self.enrich = enrich
318
+ self.run_action = run_action
319
+ self.validate_token = validate_token
320
+
321
+ self.fetchers = fetchers
322
+ self.run_fetcher = run_fetcher
323
+
324
+ self.__init_routes()
325
+
326
+ if enable_apm:
327
+ self.__init_apm()
328
+
329
+ if enable_cache:
330
+ # We support either using a boolean to use the redis default caching, or
331
+ if isinstance(enable_cache, bool):
332
+ self.cache = Cache(
333
+ self.app_name,
334
+ self.app,
335
+ cast(Union[Literal["redis"], Literal["local"]], os.environ.get("CACHE_TYPE", "redis")),
336
+ timeout=cache_timeout,
337
+ local_cache_options=local_cache_options,
338
+ )
339
+ else:
340
+ self.cache = Cache(
341
+ self.app_name,
342
+ self.app,
343
+ enable_cache,
344
+ timeout=cache_timeout,
345
+ local_cache_options=local_cache_options,
346
+ )
347
+ else:
348
+ self.cache = None
349
+
350
+ wlog = logging.getLogger("werkzeug")
351
+ wlog.setLevel(logging.WARNING)
352
+ if self.logger.parent: # pragma: no cover
353
+ for h in self.logger.parent.handlers:
354
+ wlog.addHandler(h)
355
+
356
+ self.logger.debug("Initialization complete!")
357
+
358
+ def __check_actions(self) -> list[Action] | None:
359
+ if self.setup_actions:
360
+ if self.validate_token:
361
+ token, error = self.validate_token()
362
+
363
+ if error:
364
+ self.logger.error("Error on token validation: %s", error)
365
+
366
+ raise AuthenticationException(error)
367
+ else:
368
+ token = None
369
+
370
+ return self.setup_actions(self.actions or [], token)
371
+
372
+ return None
373
+
374
+ def __init_apm(self):
375
+ "Initializes the APM connection if enabled"
376
+ # Setup APMs
377
+
378
+ apm_server_url = os.environ.get("APM_SERVER_URL")
379
+ if apm_server_url is None:
380
+ return
381
+
382
+ self.logger.debug("Initializing apm")
383
+
384
+ import elasticapm
385
+ from elasticapm.contrib.flask import ElasticAPM
386
+
387
+ self.logger.info(f"Exporting application metrics to: {apm_server_url}")
388
+
389
+ ElasticAPM(self.app, client=elasticapm.Client(server_url=apm_server_url, service_name=self.app_name))
390
+
391
+ def __build_ctx(self):
392
+ "Returns a wrap_ctx function to push the flask context into the greenlets"
393
+ # Make a copy of the current context to pass it in the greenlets
394
+ current_req_ctx = _cv_request.get(None)
395
+ reqctx = current_req_ctx.copy() if current_req_ctx else None
396
+
397
+ # Push the request context into the greenlet
398
+ def wrap_ctx(func: Callable, *args: Any, **kwargs) -> tuple[Any, Exception | None]:
399
+ if reqctx:
400
+ reqctx.push()
401
+
402
+ try:
403
+ self.logger.debug("Executing enrichment function")
404
+
405
+ return func(*args, **kwargs), None
406
+ except NotFoundException:
407
+ self.logger.warning("NotFoundException thrown in greenlet")
408
+
409
+ return [], None
410
+ except ClueException as e:
411
+ self.logger.exception("ClueException thrown in greenlet")
412
+
413
+ return None, e
414
+
415
+ return wrap_ctx
416
+
417
+ def __default_bulk_lookup( # noqa: C901
418
+ self: Self,
419
+ bulk_result: dict[str, dict[str, BulkEntry]],
420
+ items: list[dict[str, str]],
421
+ params: Params,
422
+ token: str | None,
423
+ ):
424
+ "Default bulk lookup that harnesses greenlets to multithread the provided enrich function"
425
+ self.logger.debug("Using default bulk lookup")
426
+
427
+ # Submit the different requested items to the threadpool executor
428
+ wrap_ctx = self.__build_ctx()
429
+ thread_pool = gevent.pool.Pool(min(len(items), (os.cpu_count() or 0) * 5 + 4))
430
+ greenlets: list[tuple[str, str, Greenlet]] = []
431
+
432
+ for entry in items:
433
+ # Request results for the type/value tuple
434
+ greenlets.append(
435
+ (
436
+ entry["type"],
437
+ entry["value"],
438
+ thread_pool.spawn(
439
+ wrap_ctx,
440
+ self.enrich,
441
+ entry["type"],
442
+ entry["value"],
443
+ params,
444
+ token,
445
+ ),
446
+ )
447
+ )
448
+
449
+ timeout = params.deadline + params.max_timeout - time.time()
450
+ self.logger.debug("Joining threadpool (timeout=%s)", timeout)
451
+
452
+ thread_pool.join(timeout=timeout)
453
+
454
+ for type_name, value, greenlet in greenlets:
455
+ greenlet_result = greenlet.value
456
+
457
+ if greenlet_result is not None and greenlet_result[0] is not None:
458
+ results: Union[list[QueryEntry], QueryEntry] = greenlet_result[0]
459
+ if not isinstance(results, list):
460
+ results = [results]
461
+
462
+ bulk_result[type_name][value] = BulkEntry(items=results)
463
+
464
+ if self.cache:
465
+ self.logger.info("Caching results for selector %s:%s", type_name, value)
466
+
467
+ try:
468
+ self.cache.set(type_name, value, params, results)
469
+ except KeyError:
470
+ self.logger.warning("Selector not present in bulk result, skipping cache step")
471
+ else:
472
+ error = "Request Timed Out"
473
+ if greenlet_result is not None and greenlet_result[1] is not None:
474
+ error = str(greenlet_result[1])
475
+
476
+ bulk_result[type_name][value] = BulkEntry(
477
+ error=(error if not greenlet.exception else str(greenlet.exception))
478
+ )
479
+
480
+ self.logger.debug(
481
+ "Completing bulk lookup (%s threads remaining)",
482
+ len(list(not greenlet[2].dead for greenlet in greenlets)),
483
+ )
484
+
485
+ def __init_routes(self):
486
+ "Set up the routes for the flask server."
487
+ self.logger.debug("Initializing routes")
488
+
489
+ self.app.add_url_rule("/actions/", self.get_actions.__name__, self.get_actions, methods=["GET"])
490
+ self.app.add_url_rule(
491
+ "/actions/<action_id>/", self.execute_action.__name__, self.execute_action, methods=["POST"]
492
+ )
493
+ self.app.add_url_rule("/fetchers/", self.get_fetchers.__name__, self.get_fetchers, methods=["GET"])
494
+ self.app.add_url_rule(
495
+ "/fetchers/<fetcher_id>", self.execute_fetcher.__name__, self.execute_fetcher, methods=["POST"]
496
+ )
497
+ self.app.add_url_rule("/types/", self.get_type_names.__name__, self.get_type_names, methods=["GET"])
498
+ self.app.add_url_rule("/lookup/<type_name>/<value>/", self.lookup.__name__, self.lookup, methods=["GET"])
499
+ self.app.add_url_rule("/lookup/", self.bulk_lookup.__name__, self.bulk_lookup, methods=["POST"])
500
+ self.app.add_url_rule("/healthz/live", self.liveness.__name__, self.liveness)
501
+ self.app.add_url_rule("/healthz/ready", self.readyness.__name__, self.readyness)
502
+
503
+ def make_api_response(self: Self, data: Any, err: str = "", status_code: int = 200) -> Response:
504
+ "Create a standard response for this API."
505
+ if isinstance(data, FetcherResult) and data.outcome == "failure" and not err:
506
+ err = data.error or err
507
+
508
+ if isinstance(data, ActionResult) and data.outcome == "failure" and not err:
509
+ err = data.summary or err
510
+
511
+ if isinstance(data, BaseModel):
512
+ data = data.model_dump(mode="json", exclude_none=True)
513
+
514
+ self.logger.info("%s %s - %s%s", request.method, request.path, status_code, f": {err}" if err else "")
515
+
516
+ return make_response(
517
+ jsonify(
518
+ {
519
+ "api_response": data,
520
+ "api_error_message": err,
521
+ "api_status_code": status_code,
522
+ }
523
+ ),
524
+ status_code,
525
+ )
526
+
527
+ def get_type_names(self: Self) -> Response:
528
+ "Return supported type names."
529
+ return self.make_api_response({tname: self.classification for tname in sorted(self.supported_types or [])})
530
+
531
+ def lookup(self: Self, type_name: str, value: str) -> Response: # noqa: C901
532
+ """Run a lookup on a single type/value.
533
+
534
+ Variables:
535
+ type_name => Type to look up in the external system.
536
+ value => Value to lookup. *Must be double URL encoded.*
537
+
538
+ Query Params:
539
+ max_timeout => Maximum execution time for the call in seconds
540
+ limit => Maximum number of items to return
541
+ no_annotation => If specified, do not return the annotation data
542
+ include_raw => Include raw data
543
+
544
+ Returns:
545
+ # List of:
546
+ [
547
+ {
548
+ "count": <count of results from the external system>,
549
+ "link": <url to search results in external system>,
550
+ "classification": <access control>, # Classification of the returned data
551
+ "annotation": [
552
+ <Annotation entries >
553
+ ]
554
+ },
555
+ ...,
556
+ ]
557
+ """
558
+ if not self.enrich or not self.supported_types:
559
+ return self.make_api_response({}, err="Enrichment is not supported by this plugin.", status_code=400)
560
+
561
+ if type_name == "ip":
562
+ is_ipv4 = isinstance(ipaddress.ip_address(value), ipaddress.IPv4Address)
563
+ type_name = "ipv4" if is_ipv4 else "ipv6"
564
+
565
+ try:
566
+ params = Params.from_request()
567
+ except RuntimeError as e:
568
+ self.logger.exception("Error on params parsing.")
569
+
570
+ return self.make_api_response(None, str(e), 504)
571
+
572
+ value = ul.unquote(ul.unquote(value))
573
+ # Invalid types must either be ignored, or return a 422
574
+ if type_name not in self.supported_types:
575
+ return self.make_api_response(
576
+ None,
577
+ f"Invalid type name: {type_name} [Valid types: {', '.join(self.supported_types)}].",
578
+ 422,
579
+ )
580
+
581
+ token: str | None = None
582
+ if self.validate_token:
583
+ token, error = self.validate_token()
584
+
585
+ if error:
586
+ return self.make_api_response(None, f"Error on token validation: {error}", status_code=401)
587
+ try:
588
+ if self.cache and params.use_cache:
589
+ if result := self.cache.get(type_name, value, params):
590
+ self.logger.debug("Cache hit")
591
+
592
+ return self.make_api_response(
593
+ TypeAdapter(list[QueryEntry]).dump_python(result, mode="json", exclude_none=True),
594
+ status_code=200,
595
+ )
596
+ else:
597
+ self.logger.debug("Cache miss")
598
+ except Exception:
599
+ self.logger.exception("Unknown internal exception on cache check, continuing to standard enrichment")
600
+
601
+ try:
602
+ results = self.enrich(type_name, value, params, token)
603
+
604
+ if not isinstance(results, list):
605
+ results = [results]
606
+ except InvalidDataException as e:
607
+ return self.make_api_response(None, e.message, 400)
608
+ except NotFoundException:
609
+ return self.make_api_response([], "", 404)
610
+ except TimeoutException as e:
611
+ return self.make_api_response(None, e.message or "Request timed out", 408)
612
+ except UnprocessableException as e:
613
+ return self.make_api_response(None, e.message, 422)
614
+ except Exception as e:
615
+ self.logger.exception("Unknown internal exception")
616
+ return self.make_api_response(None, f"Something went wrong when enriching: {e}", 500)
617
+
618
+ if self.cache:
619
+ self.cache.set(type_name, value, params, results)
620
+
621
+ return self.make_api_response(
622
+ TypeAdapter(list[QueryEntry]).dump_python(results, mode="json", exclude_none=True),
623
+ status_code=200,
624
+ )
625
+
626
+ def bulk_lookup(self: Self) -> Response: # noqa: C901
627
+ """This is the default bulk support for Clue plugins.
628
+
629
+ It is a wrapper on top of the single item route that will use a threadpool to perform the
630
+ request simultaneously.
631
+
632
+ Variables:
633
+ None
634
+
635
+ Query Params:
636
+ max_timeout => Maximum execution time for the call in seconds
637
+ limit => Maximum number of items to return
638
+ no_annotation => If specified, do not return the annotation data
639
+ include_raw => Include raw data
640
+ deadline => The POSIX timestamp the plugin should aim to return by
641
+
642
+ Data Block:
643
+ [
644
+ {"type": "ip", "value": "127.0.0.1"},
645
+ ...
646
+ ]
647
+
648
+ Returns:
649
+ { # Dictionary of data source queried
650
+ "ip": {
651
+ "127.0.0.1":{
652
+ "error": null, # Error message returned by data source
653
+ "items": [ # list of results from the source
654
+ ...,
655
+ ],
656
+
657
+ },
658
+ ...
659
+ },
660
+ ...
661
+ }
662
+ """
663
+ if not (self.enrich or self.alternate_bulk_lookup) or not self.supported_types:
664
+ return self.make_api_response({}, err="Bulk enrichment is not supported by this plugin.", status_code=400)
665
+
666
+ try:
667
+ params = Params.from_request()
668
+ except RuntimeError as e:
669
+ return self.make_api_response(None, str(e), 504)
670
+
671
+ # Get and validate POST data
672
+ post_data = request.json
673
+ if not isinstance(post_data, list):
674
+ return self.make_api_response(None, "Request data is not in the correct format", 422)
675
+
676
+ self.logger.info(f"Starting bulk lookup on {len(post_data)} entries")
677
+ bulk_result: dict[str, dict[str, BulkEntry]] = {}
678
+
679
+ remaining_items: list[dict[str, str]] = []
680
+ "Valid, non-cached items that must be enriched"
681
+
682
+ for entry in post_data:
683
+ if "type" not in entry or "value" not in entry:
684
+ return self.make_api_response(None, "Request data is not in the correct format", 422)
685
+
686
+ type_name = entry["type"]
687
+ bulk_result.setdefault(type_name, {})
688
+ if type_name not in self.supported_types:
689
+ self.logger.warning("Invalid type name provided: %s", type_name)
690
+
691
+ bulk_result[entry["type"]][entry["value"]] = BulkEntry(
692
+ error=f"Invalid type name: {type_name}. [valid types: {', '.join(self.supported_types)}]"
693
+ )
694
+ continue
695
+
696
+ try:
697
+ if self.cache and params.use_cache:
698
+ if result := self.cache.get(entry["type"], entry["value"], params):
699
+ self.logger.debug("Cache hit")
700
+
701
+ bulk_result[entry["type"]][entry["value"]] = BulkEntry(items=result)
702
+ continue
703
+ else:
704
+ self.logger.debug("Cache miss")
705
+ except Exception:
706
+ self.logger.exception("Exception on caching - continuing to execution")
707
+
708
+ remaining_items.append(entry)
709
+
710
+ token: str | None = None
711
+ if self.validate_token:
712
+ if self.logger:
713
+ self.logger.debug("Executing plugin-provided token validator")
714
+
715
+ token, error = self.validate_token()
716
+
717
+ if error:
718
+ return self.make_api_response(None, f"Error on token validation: {error}", status_code=401)
719
+
720
+ if self.logger:
721
+ self.logger.debug("Token is valid")
722
+ else:
723
+ if self.logger:
724
+ self.logger.debug("No token validator provided")
725
+
726
+ # All results were cached
727
+ if len(remaining_items) == 0:
728
+ if self.logger:
729
+ self.logger.info("All values retrieved from cache")
730
+ # Alternate bulk lookup is provided
731
+ elif self.alternate_bulk_lookup:
732
+ if self.logger:
733
+ self.logger.debug("Executing plugin-provided alternate bulk lookup script")
734
+
735
+ try:
736
+ alternate_results = self.alternate_bulk_lookup(remaining_items, params)
737
+
738
+ for _type, _values in alternate_results.items():
739
+ for _value, _result in _values.items():
740
+ bulk_result[_type][_value] = _result
741
+ except InvalidDataException as e:
742
+ return self.make_api_response(None, e.message, 400)
743
+ except NotFoundException:
744
+ return self.make_api_response([], "", 404)
745
+ except TimeoutException as e:
746
+ return self.make_api_response(None, e.message or "Request timed out", 408)
747
+ except UnprocessableException as e:
748
+ return self.make_api_response(None, e.message, 422)
749
+ except Exception as e:
750
+ if self.logger:
751
+ self.logger.exception("Unknown internal exception")
752
+ return self.make_api_response(None, f"Something went wrong when enriching: {e}", 500)
753
+
754
+ if self.cache and len(remaining_items) > 0:
755
+ if self.logger:
756
+ self.logger.info("Caching results for %s selectors", len(remaining_items))
757
+
758
+ for entry in remaining_items:
759
+ try:
760
+ items = bulk_result[entry["type"]][entry["value"]].items
761
+ self.cache.set(entry["type"], entry["value"], params, items)
762
+ except KeyError:
763
+ if self.logger:
764
+ self.logger.warning("Selector not present in bulk result, skipping cache step")
765
+ # Default bulk lookup
766
+ else:
767
+ self.__default_bulk_lookup(bulk_result, remaining_items, params, token)
768
+
769
+ variance = params.deadline - time.time()
770
+
771
+ if self.logger:
772
+ if variance < 0:
773
+ self.logger.warning(f"Deadline missed by {-round(variance * 1000)}ms")
774
+ else:
775
+ self.logger.debug(f"Deadline met, {round(variance * 1000)}ms to spare")
776
+
777
+ return self.make_api_response(
778
+ TypeAdapter(dict[str, dict[str, BulkEntry]]).dump_python(bulk_result, mode="json", exclude_none=True)
779
+ )
780
+
781
+ def get_actions(self: Self) -> Response:
782
+ """Gets all the possible actions for this plugin.
783
+
784
+ Variables:
785
+ None
786
+
787
+ Returns:
788
+ { # Dictionary of actions
789
+ "action1": {
790
+ ... # schema of the action
791
+ },
792
+ ...
793
+ }
794
+ """
795
+ try:
796
+ actions = self.__check_actions()
797
+ except Exception:
798
+ self.logger.exception("Exception on setup actions:")
799
+
800
+ return self.make_api_response({}, err="Error on action setup.", status_code=500)
801
+
802
+ if actions is None:
803
+ actions = self.actions or []
804
+
805
+ if not self.validate_token or not (token := self.validate_token()[0]):
806
+ self.logger.debug("Returning %s actions for unknown user", len(actions))
807
+ else:
808
+ self.logger.debug("Returning %s actions for user %s", len(actions), get_username(token))
809
+
810
+ results: dict[str, dict[str, Any]] = {}
811
+ for action in actions:
812
+ schema = action.model_dump(mode="json", include=set(ActionBase.model_fields.keys()), exclude_none=True)
813
+ schema["params"] = cast(
814
+ BaseModel, cast(type[Any], action.model_fields["params"].annotation).__args__[0]
815
+ ).model_json_schema()
816
+
817
+ results[action.id] = ActionSpec.model_validate(schema).model_dump(mode="json", exclude_none=True)
818
+
819
+ return self.make_api_response(results)
820
+
821
+ def execute_action(self: Self, action_id: str): # noqa: C901
822
+ """Executes the specified action.
823
+
824
+ Args:
825
+ action_id (str): The ID of the action to execute
826
+
827
+ Returns:
828
+ Response: A Response object with an ActionResult as the body.
829
+ """
830
+ if not self.run_action:
831
+ return self.make_api_response({}, err=f"{self.app_name} does not support any actions.", status_code=400)
832
+
833
+ try:
834
+ actions = self.__check_actions()
835
+ except Exception:
836
+ self.logger.exception("Exception on setup actions:")
837
+
838
+ return self.make_api_response({}, err="Error on action setup.", status_code=500)
839
+
840
+ if actions is None:
841
+ actions = self.actions or []
842
+
843
+ action_to_run = next((action for action in actions if action.id == action_id), None)
844
+ if not action_to_run:
845
+ return self.make_api_response({}, err="Action does not exist", status_code=404)
846
+
847
+ token: str | None = None
848
+ if self.validate_token:
849
+ self.logger.debug("Executing plugin-provided token validator")
850
+
851
+ token, error = self.validate_token()
852
+
853
+ if error:
854
+ return self.make_api_response(None, f"Error on token validation: {error}", status_code=401)
855
+
856
+ self.logger.debug("Token is valid")
857
+ else:
858
+ self.logger.warning("No token validation provided. The access token will not be provided to the action.")
859
+
860
+ param_type: Any = action_to_run.model_fields["params"].annotation or Any
861
+
862
+ try:
863
+ raw_request = request.json
864
+ if not raw_request:
865
+ self.logger.warning("No request body specified.")
866
+
867
+ return self.make_api_response(ActionResult(outcome="failure", summary="No request body specified."))
868
+
869
+ action_request: ExecuteRequest = TypeAdapter(param_type.__args__[0]).validate_python(
870
+ raw_request, context={"action": action_to_run}
871
+ )
872
+
873
+ self.logger.info(
874
+ "Executing Action '%s' on %s selectors",
875
+ action_id,
876
+ len(action_request.selectors) if action_request.selectors else 1,
877
+ )
878
+
879
+ result = self.run_action(action_to_run, action_request, token)
880
+ except json.JSONDecodeError as e:
881
+ self.logger.warning("JSON decoding error during execution: %s", str(e))
882
+
883
+ result = ActionResult(
884
+ outcome="failure",
885
+ summary=f"Invalid request format. Request body must be valid JSON. Error: {str(e)}",
886
+ )
887
+ except ValidationError as err:
888
+ self.logger.warning("Validation error during execution: %s", str(err))
889
+
890
+ result = ActionResult(outcome="failure", summary=f"Validation error on execution: {str(err)}")
891
+ except ClueException as e:
892
+ self.logger.exception("ClueException during execution:")
893
+
894
+ result = ActionResult(outcome="failure", summary=f"Error encountered during execution: {e.message}")
895
+ except Exception as e:
896
+ self.logger.exception("%s during execution:", e.__class__.__name__)
897
+
898
+ result = ActionResult(outcome="failure", summary=f"An unknown error occurred during execution: {str(e)}")
899
+ finally:
900
+ self.logger.info("Execution finished.")
901
+
902
+ self.logger.info("Action result: %s", result.outcome)
903
+
904
+ return self.make_api_response(result)
905
+
906
+ def get_fetchers(self: Self) -> Response:
907
+ """Gets all the fetchers for this plugin.
908
+
909
+ Variables:
910
+ None
911
+
912
+ Returns:
913
+ { # Dictionary of fetchers
914
+ "fetcher1": {
915
+ ... # schema of the fetcher
916
+ },
917
+ ...
918
+ }
919
+ """
920
+ if not self.fetchers:
921
+ self.logger.debug("No fetchers to show")
922
+
923
+ return self.make_api_response({})
924
+
925
+ results: dict[str, dict[str, Any]] = {}
926
+ for fetcher in self.fetchers:
927
+ schema = fetcher.model_dump(mode="json", exclude_none=True)
928
+ results[fetcher.id] = schema
929
+
930
+ return self.make_api_response(results)
931
+
932
+ def execute_fetcher(self: Self, fetcher_id: str): # noqa: C901
933
+ """Runs the specified fetcher.
934
+
935
+ Args:
936
+ fetcher_id (str): The ID of the fetcher to execute
937
+
938
+ Returns:
939
+ Response: A Response object with a FetcherResult as the body.
940
+ """
941
+ if not self.run_fetcher or not self.fetchers:
942
+ return self.make_api_response({}, err=f"{self.app_name} does not support any fetchers.", status_code=400)
943
+
944
+ fetcher_to_run = next((fetcher for fetcher in self.fetchers if fetcher.id == fetcher_id), None)
945
+ if not fetcher_to_run:
946
+ return self.make_api_response({}, err=f"Fetcher {fetcher_id} does not exist", status_code=404)
947
+
948
+ token: str | None = None
949
+ if self.validate_token:
950
+ self.logger.debug("Executing plugin-provided token validator")
951
+
952
+ token, error = self.validate_token()
953
+
954
+ if error:
955
+ return self.make_api_response(None, f"Error on token validation: {error}", status_code=401)
956
+
957
+ self.logger.debug("Token is valid")
958
+
959
+ status_code = 200
960
+ try:
961
+ if not request.json:
962
+ return self.make_api_response(
963
+ FetcherResult(outcome="failure", format="error", error="No request body specified."),
964
+ status_code=400,
965
+ )
966
+
967
+ raw_request = Selector.model_validate(request.json)
968
+
969
+ self.logger.info("Running fetcher '%s'", fetcher_id)
970
+
971
+ result = self.run_fetcher(fetcher_to_run, raw_request, token)
972
+ except json.JSONDecodeError as e:
973
+ self.logger.warning("JSON decoding error during execution: %s", str(e))
974
+
975
+ status_code = 400
976
+ result = FetcherResult(
977
+ outcome="failure",
978
+ format="error",
979
+ error=f"Invalid request format. Request body must be valid JSON. Error: {str(e)}",
980
+ )
981
+ except ValidationError as err:
982
+ self.logger.warning("Validation error during execution: %s", str(err))
983
+
984
+ status_code = 400
985
+ result = FetcherResult(outcome="failure", format="error", error=str(err))
986
+ except ClueException as e:
987
+ self.logger.exception("ClueException during execution:")
988
+
989
+ status_code = 500
990
+ result = FetcherResult(
991
+ outcome="failure", format="error", error=f"Error encountered during execution: {e.message}"
992
+ )
993
+ except Exception as e:
994
+ self.logger.exception("%s during execution:", e.__class__.__name__)
995
+
996
+ status_code = 500
997
+ result = FetcherResult(
998
+ outcome="failure", format="error", error=f"An unknown error occurred during execution: {str(e)}"
999
+ )
1000
+ finally:
1001
+ self.logger.info("Fetcher completed.")
1002
+
1003
+ self.logger.info("Fetcher outcome: %s", result.outcome)
1004
+
1005
+ if result.error:
1006
+ self.logger.info("Error Message: %s", result.error)
1007
+
1008
+ return self.make_api_response(result, status_code=status_code)