clue-api 1.0.1.dev57__tar.gz → 1.0.1.dev61__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/PKG-INFO +1 -1
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/config.py +6 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/__init__.py +248 -58
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/pyproject.toml +1 -1
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/LICENSE +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/README.md +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/.gitignore +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/base.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/actions.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/auth.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/configs.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/fetchers.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/lookup.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/registration.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/static.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/app.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cache/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/classification.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/classification.yml +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/dict_utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/exceptions.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/forge.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/json_utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/list_utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/audit.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/format.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/regex.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/str_utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/swagger.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/uid.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/config.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/constants/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/constants/supported_types.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cronjobs/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cronjobs/plugins.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/error.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/extensions/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/extensions/config.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/gunicorn_config.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/healthz.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/discover.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/headers.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/oauth.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/actions.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/fetchers.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/graph.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/model_list.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/network.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/base.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/graph.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/image.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/status.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/validation.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/selector.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/validators.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/patched.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/central_server.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/email_render.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/token.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/trino.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/interactive.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/models.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/py.typed +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/cache.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/events.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/hash.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/queues/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/queues/comms.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/set.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/user_quota_tracker.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/__init__.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/obo.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/utils.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/action_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/auth_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/config_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/fetcher_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/jwt_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/lookup_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/type_service.py +0 -0
- {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/user_service.py +0 -0
|
@@ -455,6 +455,12 @@ class Config(BaseSettings):
|
|
|
455
455
|
|
|
456
456
|
if __name__ == "__main__":
|
|
457
457
|
# When executed, the config model will print the default values of the configuration
|
|
458
|
+
import json
|
|
459
|
+
|
|
458
460
|
import yaml
|
|
459
461
|
|
|
462
|
+
print("Schema: ") # noqa: T201
|
|
463
|
+
print(json.dumps(Config.model_json_schema(), indent=2)) # noqa: T201
|
|
464
|
+
|
|
465
|
+
print("\n\nConfig:") # noqa: T201
|
|
460
466
|
print(yaml.safe_dump(Config().model_dump(mode="json"))) # noqa: T201
|
|
@@ -40,24 +40,40 @@ from clue.plugin.helpers.token import get_username
|
|
|
40
40
|
from clue.plugin.models import BulkEntry
|
|
41
41
|
from clue.plugin.utils import Params
|
|
42
42
|
|
|
43
|
+
# Load environment variables from .env file if present
|
|
43
44
|
load_dotenv()
|
|
44
45
|
|
|
46
|
+
# List of function names that can be overridden using the @plugin.use decorator
|
|
47
|
+
# These functions define the core plugin behavior and can be customized per plugin
|
|
45
48
|
OVERRIDABLE_FUNCTIONS = [
|
|
46
|
-
"enrich",
|
|
47
|
-
"alternate_bulk_lookup",
|
|
48
|
-
"liveness",
|
|
49
|
-
"readyness",
|
|
50
|
-
"run_action",
|
|
51
|
-
"run_fetcher",
|
|
52
|
-
"setup_actions",
|
|
53
|
-
"validate_token",
|
|
49
|
+
"enrich", # Main enrichment function for processing selectors
|
|
50
|
+
"alternate_bulk_lookup", # Alternative bulk enrichment implementation
|
|
51
|
+
"liveness", # Kubernetes liveness probe endpoint
|
|
52
|
+
"readyness", # Kubernetes readiness probe endpoint
|
|
53
|
+
"run_action", # Function to execute plugin actions
|
|
54
|
+
"run_fetcher", # Function to execute plugin fetchers
|
|
55
|
+
"setup_actions", # Runtime action definition generation
|
|
56
|
+
"validate_token", # Custom authentication token validation
|
|
54
57
|
]
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
def default_validate_token():
|
|
58
|
-
"""A default validation function that
|
|
61
|
+
"""A default validation function that extracts Bearer tokens from the Authorization header.
|
|
62
|
+
|
|
63
|
+
This function is provided as a reference implementation but is not used by default.
|
|
64
|
+
Plugin developers can use this as a starting point for their own token validation.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
tuple[str | None, str | None]: A tuple containing (token, error_message).
|
|
68
|
+
- If successful: (extracted_token, None)
|
|
69
|
+
- If failed: (None, error_description)
|
|
70
|
+
|
|
71
|
+
Note:
|
|
72
|
+
Expects Authorization header format: "Bearer <token>"
|
|
73
|
+
"""
|
|
59
74
|
token = request.headers.get("Authorization", None, type=str)
|
|
60
75
|
if token and " " in token:
|
|
76
|
+
# Split "Bearer <token>" and extract the token part
|
|
61
77
|
token = token.split()[1]
|
|
62
78
|
|
|
63
79
|
if token:
|
|
@@ -67,21 +83,46 @@ def default_validate_token():
|
|
|
67
83
|
|
|
68
84
|
|
|
69
85
|
def liveness(**_):
|
|
70
|
-
"Default liveness probe
|
|
86
|
+
"""Default liveness probe for Kubernetes health checks.
|
|
87
|
+
|
|
88
|
+
This endpoint indicates whether the application is running and alive.
|
|
89
|
+
Returns a simple "OK" response with 200 status code.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Response: Flask response with "OK" message
|
|
93
|
+
"""
|
|
71
94
|
return make_response("OK")
|
|
72
95
|
|
|
73
96
|
|
|
74
97
|
def readyness(**_):
|
|
75
|
-
"Default
|
|
98
|
+
"""Default readiness probe for Kubernetes health checks.
|
|
99
|
+
|
|
100
|
+
This endpoint indicates whether the application is ready to serve traffic.
|
|
101
|
+
Returns a simple "OK" response with 200 status code.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Response: Flask response with "OK" message
|
|
105
|
+
"""
|
|
76
106
|
return make_response("OK")
|
|
77
107
|
|
|
78
108
|
|
|
79
109
|
def build_default_logger() -> logging.Logger:
|
|
80
|
-
"Configure a default logger
|
|
110
|
+
"""Configure a default logger with standard Clue formatting when none is provided.
|
|
111
|
+
|
|
112
|
+
Creates a logger with INFO level that outputs to console using the standard
|
|
113
|
+
Clue log format and date format for consistency across all plugins.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
logging.Logger: Configured logger instance ready for use
|
|
117
|
+
|
|
118
|
+
Note:
|
|
119
|
+
Uses logger name "clue.plugin.default" to distinguish from user-provided loggers
|
|
120
|
+
"""
|
|
81
121
|
logger = logging.getLogger("clue.plugin.default")
|
|
82
122
|
logger.setLevel(logging.INFO)
|
|
83
123
|
console = logging.StreamHandler()
|
|
84
124
|
console.setLevel(logging.INFO)
|
|
125
|
+
# Apply standard Clue log formatting for consistency
|
|
85
126
|
console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
|
|
86
127
|
logger.addHandler(console)
|
|
87
128
|
|
|
@@ -316,18 +357,22 @@ class CluePlugin:
|
|
|
316
357
|
A readyness probe for kubernetes implementations of Clue.
|
|
317
358
|
"""
|
|
318
359
|
self.alternate_bulk_lookup = alternate_bulk_lookup
|
|
360
|
+
# Create Flask app using the module name (before first dot) as app name
|
|
319
361
|
self.app = Flask(__name__.split(".")[0])
|
|
320
362
|
self.app_name = app_name
|
|
321
363
|
|
|
364
|
+
# Classification is required for security - must be specified via env var or parameter
|
|
322
365
|
if classification is None:
|
|
323
366
|
raise ClueValueError(
|
|
324
|
-
"
|
|
367
|
+
"Classification must be specified, either via the CLASSIFICATION environment variable, or when "
|
|
325
368
|
"intializing the plugin."
|
|
326
369
|
)
|
|
327
370
|
|
|
328
371
|
self.classification = classification
|
|
329
372
|
self.liveness = liveness
|
|
330
373
|
self.readyness = readyness
|
|
374
|
+
|
|
375
|
+
# Convert comma-separated string to set for easier membership testing
|
|
331
376
|
if isinstance(supported_types, str):
|
|
332
377
|
self.supported_types = set(supported_types.split(","))
|
|
333
378
|
else:
|
|
@@ -336,6 +381,7 @@ class CluePlugin:
|
|
|
336
381
|
self.actions = actions
|
|
337
382
|
self.setup_actions = setup_actions
|
|
338
383
|
|
|
384
|
+
# Allow URLs with or without trailing slashes to match the same route
|
|
339
385
|
self.app.url_map.strict_slashes = False
|
|
340
386
|
|
|
341
387
|
self.logger = logger if logger else build_default_logger()
|
|
@@ -349,20 +395,25 @@ class CluePlugin:
|
|
|
349
395
|
|
|
350
396
|
self.__init_routes()
|
|
351
397
|
|
|
398
|
+
# Initialize Application Performance Monitoring if enabled
|
|
352
399
|
if enable_apm:
|
|
353
400
|
self.__init_apm()
|
|
354
401
|
|
|
402
|
+
# Set up caching based on configuration
|
|
355
403
|
if enable_cache:
|
|
356
|
-
#
|
|
404
|
+
# Support both boolean (use default cache type) and explicit cache type specification
|
|
357
405
|
if isinstance(enable_cache, bool):
|
|
406
|
+
# Use environment variable or default to redis
|
|
407
|
+
cache_type = cast(Union[Literal["redis"], Literal["local"]], os.environ.get("CACHE_TYPE", "redis"))
|
|
358
408
|
self.cache = Cache(
|
|
359
409
|
self.app_name,
|
|
360
410
|
self.app,
|
|
361
|
-
|
|
411
|
+
cache_type,
|
|
362
412
|
timeout=cache_timeout,
|
|
363
413
|
local_cache_options=local_cache_options,
|
|
364
414
|
)
|
|
365
415
|
else:
|
|
416
|
+
# Use explicitly specified cache type
|
|
366
417
|
self.cache = Cache(
|
|
367
418
|
self.app_name,
|
|
368
419
|
self.app,
|
|
@@ -373,13 +424,27 @@ class CluePlugin:
|
|
|
373
424
|
else:
|
|
374
425
|
self.cache = None
|
|
375
426
|
|
|
427
|
+
# Configure werkzeug (Flask's WSGI server) logging to reduce noise
|
|
428
|
+
# Set to WARNING level to suppress INFO messages about HTTP requests
|
|
376
429
|
wlog = logging.getLogger("werkzeug")
|
|
377
430
|
wlog.setLevel(logging.WARNING)
|
|
431
|
+
# If our logger has a parent, inherit its handlers for consistency
|
|
378
432
|
if self.logger.parent: # pragma: no cover
|
|
379
433
|
for h in self.logger.parent.handlers:
|
|
380
434
|
wlog.addHandler(h)
|
|
381
435
|
|
|
382
|
-
#
|
|
436
|
+
# Automatically inject the Flask "app" variable into the calling module's global namespace
|
|
437
|
+
# for compatibility with WSGI servers like gunicorn.
|
|
438
|
+
#
|
|
439
|
+
# This mechanism allows plugin developers to simply instantiate a CluePlugin without
|
|
440
|
+
# needing to explicitly expose the underlying Flask app. WSGI servers typically expect
|
|
441
|
+
# to find an 'app' variable in the module's global scope when using module:variable
|
|
442
|
+
# syntax (e.g., "mymodule:app").
|
|
443
|
+
#
|
|
444
|
+
# Example usage in a plugin module:
|
|
445
|
+
# plugin = CluePlugin("my-plugin", ...)
|
|
446
|
+
# # The 'app' variable is now automatically available for gunicorn
|
|
447
|
+
# # Command: gunicorn mymodule:app
|
|
383
448
|
current_frame = inspect.currentframe()
|
|
384
449
|
if current_frame:
|
|
385
450
|
caller_frame = current_frame.f_back
|
|
@@ -389,60 +454,98 @@ class CluePlugin:
|
|
|
389
454
|
self.logger.debug("Initialization complete!")
|
|
390
455
|
|
|
391
456
|
def __check_actions(self) -> list[Action] | None:
|
|
457
|
+
"""Validate token and retrieve dynamic actions if setup_actions is configured.
|
|
458
|
+
|
|
459
|
+
This method handles token validation when required and calls the setup_actions
|
|
460
|
+
function to get a potentially user-specific or dynamically generated list of actions.
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
list[Action] | None: List of actions if setup_actions is configured, None otherwise
|
|
464
|
+
|
|
465
|
+
Raises:
|
|
466
|
+
AuthenticationException: If token validation fails
|
|
467
|
+
"""
|
|
392
468
|
if self.setup_actions:
|
|
469
|
+
# Validate token if token validation is configured
|
|
393
470
|
if self.validate_token:
|
|
394
471
|
token, error = self.validate_token()
|
|
395
472
|
|
|
396
473
|
if error:
|
|
397
474
|
self.logger.error("Error on token validation: %s", error)
|
|
398
|
-
|
|
399
475
|
raise AuthenticationException(error)
|
|
400
476
|
else:
|
|
401
477
|
token = None
|
|
402
478
|
|
|
479
|
+
# Call user-defined setup_actions with base actions and validated token
|
|
403
480
|
return self.setup_actions(self.actions or [], token)
|
|
404
481
|
|
|
405
482
|
return None
|
|
406
483
|
|
|
407
484
|
def __init_apm(self):
|
|
408
|
-
"
|
|
409
|
-
|
|
485
|
+
"""Initialize Application Performance Monitoring (APM) using Elastic APM.
|
|
486
|
+
|
|
487
|
+
Sets up ElasticAPM integration with Flask if APM_SERVER_URL environment
|
|
488
|
+
variable is configured. This enables automatic collection of performance
|
|
489
|
+
metrics, error tracking, and distributed tracing.
|
|
410
490
|
|
|
491
|
+
Environment Variables:
|
|
492
|
+
APM_SERVER_URL: URL of the Elastic APM server to send metrics to
|
|
493
|
+
"""
|
|
494
|
+
# Check if APM server URL is configured via environment variable
|
|
411
495
|
apm_server_url = os.environ.get("APM_SERVER_URL")
|
|
412
496
|
if apm_server_url is None:
|
|
413
497
|
return
|
|
414
498
|
|
|
415
|
-
self.logger.debug("Initializing
|
|
499
|
+
self.logger.debug("Initializing APM")
|
|
416
500
|
|
|
501
|
+
# Import ElasticAPM components (lazy import to avoid dependency issues)
|
|
417
502
|
import elasticapm
|
|
418
503
|
from elasticapm.contrib.flask import ElasticAPM
|
|
419
504
|
|
|
420
505
|
self.logger.info(f"Exporting application metrics to: {apm_server_url}")
|
|
421
506
|
|
|
507
|
+
# Initialize ElasticAPM with Flask app and configure client
|
|
422
508
|
ElasticAPM(self.app, client=elasticapm.Client(server_url=apm_server_url, service_name=self.app_name))
|
|
423
509
|
|
|
424
510
|
def __build_ctx(self):
|
|
425
|
-
"
|
|
426
|
-
|
|
511
|
+
"""Create a context wrapper function for preserving Flask request context in greenlets.
|
|
512
|
+
|
|
513
|
+
Flask request context is thread-local and doesn't automatically propagate to
|
|
514
|
+
greenlets. This function captures the current request context and returns a
|
|
515
|
+
wrapper that pushes it into each greenlet before execution.
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
Callable: A wrapper function that preserves Flask context and handles exceptions
|
|
519
|
+
"""
|
|
520
|
+
# Capture the current Flask request context to propagate to greenlets
|
|
427
521
|
current_req_ctx = _cv_request.get(None)
|
|
428
522
|
reqctx = current_req_ctx.copy() if current_req_ctx else None
|
|
429
523
|
|
|
430
|
-
# Push the request context into the greenlet
|
|
431
524
|
def wrap_ctx(func: Callable, *args: Any, **kwargs) -> tuple[Any, Exception | None]:
|
|
525
|
+
"""Wrapper that pushes Flask context and handles enrichment function execution.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
func: The enrichment function to execute
|
|
529
|
+
*args: Arguments to pass to the function
|
|
530
|
+
**kwargs: Keyword arguments to pass to the function
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
tuple[Any, Exception | None]: (result, exception) tuple
|
|
534
|
+
"""
|
|
535
|
+
# Push the request context into this greenlet's scope
|
|
432
536
|
if reqctx:
|
|
433
537
|
reqctx.push()
|
|
434
538
|
|
|
435
539
|
try:
|
|
436
540
|
self.logger.debug("Executing enrichment function")
|
|
437
|
-
|
|
438
541
|
return func(*args, **kwargs), None
|
|
439
542
|
except NotFoundException:
|
|
543
|
+
# NotFoundException means no results found - return empty list, not an error
|
|
440
544
|
self.logger.warning("NotFoundException thrown in greenlet")
|
|
441
|
-
|
|
442
545
|
return [], None
|
|
443
546
|
except ClueException as e:
|
|
547
|
+
# Other Clue exceptions should be propagated as errors
|
|
444
548
|
self.logger.exception("ClueException thrown in greenlet")
|
|
445
|
-
|
|
446
549
|
return None, e
|
|
447
550
|
|
|
448
551
|
return wrap_ctx
|
|
@@ -454,58 +557,80 @@ class CluePlugin:
|
|
|
454
557
|
params: Params,
|
|
455
558
|
token: str | None,
|
|
456
559
|
):
|
|
457
|
-
"Default bulk lookup
|
|
560
|
+
"""Default bulk lookup implementation using greenlets for concurrent enrichment.
|
|
561
|
+
|
|
562
|
+
This method processes multiple enrichment requests concurrently by spawning
|
|
563
|
+
greenlets (lightweight threads) for each item. It uses the single-item enrich
|
|
564
|
+
function to process each request while maintaining Flask request context. Note
|
|
565
|
+
that this may lead to inefficient lookups (e.g. making ten requests to a database,
|
|
566
|
+
instead of a single bulk query)
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
bulk_result: Dictionary to populate with results, keyed by type then value
|
|
570
|
+
items: List of items to enrich, each containing 'type' and 'value' keys
|
|
571
|
+
params: Request parameters including timeouts and limits
|
|
572
|
+
token: Authentication token to pass to enrichment functions
|
|
573
|
+
"""
|
|
458
574
|
self.logger.debug("Using default bulk lookup")
|
|
459
575
|
|
|
460
|
-
#
|
|
576
|
+
# Create context wrapper to preserve Flask request context in greenlets
|
|
461
577
|
wrap_ctx = self.__build_ctx()
|
|
578
|
+
# Limit pool size to prevent resource exhaustion: min(items, cpu_count * 5 + 4)
|
|
462
579
|
thread_pool = gevent.pool.Pool(min(len(items), (os.cpu_count() or 0) * 5 + 4))
|
|
463
580
|
greenlets: list[tuple[str, str, Greenlet]] = []
|
|
464
581
|
|
|
582
|
+
# Spawn a greenlet for each enrichment request
|
|
465
583
|
for entry in items:
|
|
466
|
-
#
|
|
584
|
+
# Store type, value, and greenlet for later result processing
|
|
467
585
|
greenlets.append(
|
|
468
586
|
(
|
|
469
587
|
entry["type"],
|
|
470
588
|
entry["value"],
|
|
471
589
|
thread_pool.spawn(
|
|
472
|
-
wrap_ctx,
|
|
473
|
-
self.enrich,
|
|
474
|
-
entry["type"],
|
|
475
|
-
entry["value"],
|
|
476
|
-
params,
|
|
477
|
-
token,
|
|
590
|
+
wrap_ctx, # Context wrapper function
|
|
591
|
+
self.enrich, # User's enrichment function
|
|
592
|
+
entry["type"], # Selector type
|
|
593
|
+
entry["value"], # Selector value
|
|
594
|
+
params, # Request parameters
|
|
595
|
+
token, # Authentication token
|
|
478
596
|
),
|
|
479
597
|
)
|
|
480
598
|
)
|
|
481
599
|
|
|
600
|
+
# Calculate remaining time until deadline
|
|
482
601
|
timeout = params.deadline + params.max_timeout - time.time()
|
|
483
602
|
self.logger.debug("Joining threadpool (timeout=%s)", timeout)
|
|
484
603
|
|
|
604
|
+
# Wait for all greenlets to complete or timeout
|
|
485
605
|
thread_pool.join(timeout=timeout)
|
|
486
606
|
|
|
607
|
+
# Process results from all completed greenlets
|
|
487
608
|
for type_name, value, greenlet in greenlets:
|
|
488
609
|
greenlet_result = greenlet.value
|
|
489
610
|
|
|
611
|
+
# Check if greenlet completed successfully with results
|
|
490
612
|
if greenlet_result is not None and greenlet_result[0] is not None:
|
|
491
613
|
results: Union[list[QueryEntry], QueryEntry] = greenlet_result[0]
|
|
614
|
+
# Ensure results is always a list for consistent handling
|
|
492
615
|
if not isinstance(results, list):
|
|
493
616
|
results = [results]
|
|
494
617
|
|
|
495
618
|
bulk_result[type_name][value] = BulkEntry(items=results)
|
|
496
619
|
|
|
620
|
+
# Cache successful results if caching is enabled
|
|
497
621
|
if self.cache:
|
|
498
622
|
self.logger.info("Caching results for selector %s:%s", type_name, value)
|
|
499
|
-
|
|
500
623
|
try:
|
|
501
624
|
self.cache.set(type_name, value, params, results)
|
|
502
625
|
except KeyError:
|
|
503
626
|
self.logger.warning("Selector not present in bulk result, skipping cache step")
|
|
504
627
|
else:
|
|
628
|
+
# Handle errors: timeout, exceptions, or other failures
|
|
505
629
|
error = "Request Timed Out"
|
|
506
630
|
if greenlet_result is not None and greenlet_result[1] is not None:
|
|
507
631
|
error = str(greenlet_result[1])
|
|
508
632
|
|
|
633
|
+
# Use greenlet exception if available, otherwise use our error message
|
|
509
634
|
bulk_result[type_name][value] = BulkEntry(
|
|
510
635
|
error=(error if not greenlet.exception else str(greenlet.exception))
|
|
511
636
|
)
|
|
@@ -516,7 +641,19 @@ class CluePlugin:
|
|
|
516
641
|
)
|
|
517
642
|
|
|
518
643
|
def __init_routes(self):
|
|
519
|
-
"Set up
|
|
644
|
+
"""Set up all Flask routes for the plugin API endpoints.
|
|
645
|
+
|
|
646
|
+
Registers the following endpoints:
|
|
647
|
+
- GET /actions/: List available actions
|
|
648
|
+
- POST /actions/<action_id>/: Execute a specific action
|
|
649
|
+
- GET /fetchers/: List available fetchers
|
|
650
|
+
- POST /fetchers/<fetcher_id>: Execute a specific fetcher
|
|
651
|
+
- GET /types/: List supported types
|
|
652
|
+
- GET /lookup/<type_name>/<value>/: Single enrichment lookup
|
|
653
|
+
- POST /lookup/: Bulk enrichment lookup
|
|
654
|
+
- GET /healthz/live: Liveness probe
|
|
655
|
+
- GET /healthz/ready: Readiness probe
|
|
656
|
+
"""
|
|
520
657
|
self.logger.debug("Initializing routes")
|
|
521
658
|
|
|
522
659
|
self.app.add_url_rule("/actions/", self.get_actions.__name__, self.get_actions, methods=["GET"])
|
|
@@ -534,16 +671,38 @@ class CluePlugin:
|
|
|
534
671
|
self.app.add_url_rule("/healthz/ready", self.readyness.__name__, self.readyness)
|
|
535
672
|
|
|
536
673
|
def make_api_response(self: Self, data: Any, err: str = "", status_code: int = 200) -> Response:
|
|
537
|
-
"Create a
|
|
674
|
+
"""Create a standardized JSON response for all API endpoints.
|
|
675
|
+
|
|
676
|
+
This method ensures consistent response format across all plugin endpoints,
|
|
677
|
+
handles automatic error extraction from result objects, and logs all requests.
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
data: The response data (will be JSON serialized)
|
|
681
|
+
err: Error message (if any)
|
|
682
|
+
status_code: HTTP status code (default: 200)
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
Response: Flask response with standardized JSON structure
|
|
686
|
+
|
|
687
|
+
Response Format:
|
|
688
|
+
{
|
|
689
|
+
"api_response": <data>,
|
|
690
|
+
"api_error_message": <error_string>,
|
|
691
|
+
"api_status_code": <status_code>
|
|
692
|
+
}
|
|
693
|
+
"""
|
|
694
|
+
# Extract error messages from specialized result objects
|
|
538
695
|
if isinstance(data, FetcherResult) and data.outcome == "failure" and not err:
|
|
539
696
|
err = data.error or err
|
|
540
697
|
|
|
541
698
|
if isinstance(data, ActionResult) and data.outcome == "failure" and not err:
|
|
542
699
|
err = data.summary or err
|
|
543
700
|
|
|
701
|
+
# Convert Pydantic models to dict for JSON serialization
|
|
544
702
|
if isinstance(data, BaseModel):
|
|
545
703
|
data = data.model_dump(mode="json", exclude_none=True)
|
|
546
704
|
|
|
705
|
+
# Log all API requests with method, path, status, and error (if any)
|
|
547
706
|
self.logger.info("%s %s - %s%s", request.method, request.path, status_code, f": {err}" if err else "")
|
|
548
707
|
|
|
549
708
|
return make_response(
|
|
@@ -558,7 +717,18 @@ class CluePlugin:
|
|
|
558
717
|
)
|
|
559
718
|
|
|
560
719
|
def get_type_names(self: Self) -> Response:
|
|
561
|
-
"Return supported
|
|
720
|
+
"""Return the list of supported selector types with their classifications.
|
|
721
|
+
|
|
722
|
+
Returns:
|
|
723
|
+
Response: JSON response mapping each supported type to its classification level
|
|
724
|
+
|
|
725
|
+
Response Format:
|
|
726
|
+
{
|
|
727
|
+
"type1": "classification_level",
|
|
728
|
+
"type2": "classification_level",
|
|
729
|
+
...
|
|
730
|
+
}
|
|
731
|
+
"""
|
|
562
732
|
return self.make_api_response({tname: self.classification for tname in sorted(self.supported_types or [])})
|
|
563
733
|
|
|
564
734
|
def lookup(self: Self, type_name: str, value: str) -> Response: # noqa: C901
|
|
@@ -591,6 +761,7 @@ class CluePlugin:
|
|
|
591
761
|
if not self.enrich or not self.supported_types:
|
|
592
762
|
return self.make_api_response({}, err="Enrichment is not supported by this plugin.", status_code=400)
|
|
593
763
|
|
|
764
|
+
# Normalize generic "ip" type to specific "ipv4" or "ipv6" based on address format
|
|
594
765
|
if type_name == "ip":
|
|
595
766
|
is_ipv4 = isinstance(ipaddress.ip_address(value), ipaddress.IPv4Address)
|
|
596
767
|
type_name = "ipv4" if is_ipv4 else "ipv6"
|
|
@@ -602,8 +773,9 @@ class CluePlugin:
|
|
|
602
773
|
|
|
603
774
|
return self.make_api_response(None, str(e), 504)
|
|
604
775
|
|
|
776
|
+
# Double URL decode the value (required by API specification)
|
|
605
777
|
value = ul.unquote(ul.unquote(value))
|
|
606
|
-
#
|
|
778
|
+
# Validate that the requested type is supported by this plugin
|
|
607
779
|
if type_name not in self.supported_types:
|
|
608
780
|
return self.make_api_response(
|
|
609
781
|
None,
|
|
@@ -799,6 +971,7 @@ class CluePlugin:
|
|
|
799
971
|
else:
|
|
800
972
|
self.__default_bulk_lookup(bulk_result, remaining_items, params, token)
|
|
801
973
|
|
|
974
|
+
# Calculate how close we came to the deadline (positive = time remaining, negative = overrun)
|
|
802
975
|
variance = params.deadline - time.time()
|
|
803
976
|
|
|
804
977
|
if self.logger:
|
|
@@ -842,11 +1015,14 @@ class CluePlugin:
|
|
|
842
1015
|
|
|
843
1016
|
results: dict[str, dict[str, Any]] = {}
|
|
844
1017
|
for action in actions:
|
|
1018
|
+
# Extract base action fields (id, name, description, etc.)
|
|
845
1019
|
schema = action.model_dump(mode="json", include=set(ActionBase.model_fields.keys()), exclude_none=True)
|
|
1020
|
+
# Generate JSON schema for the action's parameter type
|
|
846
1021
|
schema["params"] = cast(
|
|
847
1022
|
BaseModel, cast(type[Any], action.model_fields["params"].annotation).__args__[0]
|
|
848
1023
|
).model_json_schema()
|
|
849
1024
|
|
|
1025
|
+
# Convert to ActionSpec format and add to results
|
|
850
1026
|
results[action.id] = ActionSpec.model_validate(schema).model_dump(mode="json", exclude_none=True)
|
|
851
1027
|
|
|
852
1028
|
return self.make_api_response(results)
|
|
@@ -890,6 +1066,7 @@ class CluePlugin:
|
|
|
890
1066
|
else:
|
|
891
1067
|
self.logger.warning("No token validation provided. The access token will not be provided to the action.")
|
|
892
1068
|
|
|
1069
|
+
# Extract the parameter type from the action definition for validation
|
|
893
1070
|
param_type: Any = action_to_run.model_fields["params"].annotation or Any
|
|
894
1071
|
|
|
895
1072
|
try:
|
|
@@ -899,6 +1076,7 @@ class CluePlugin:
|
|
|
899
1076
|
|
|
900
1077
|
return self.make_api_response(ActionResult(outcome="failure", summary="No request body specified."))
|
|
901
1078
|
|
|
1079
|
+
# Validate request body against the action's parameter schema
|
|
902
1080
|
action_request: ExecuteRequest = TypeAdapter(param_type.__args__[0]).validate_python(
|
|
903
1081
|
raw_request, context={"action": action_to_run}
|
|
904
1082
|
)
|
|
@@ -937,26 +1115,34 @@ class CluePlugin:
|
|
|
937
1115
|
return self.make_api_response(result)
|
|
938
1116
|
|
|
939
1117
|
def get_fetchers(self: Self) -> Response:
|
|
940
|
-
"""
|
|
1118
|
+
"""Get all available fetchers for this plugin.
|
|
941
1119
|
|
|
942
|
-
|
|
943
|
-
|
|
1120
|
+
Returns a dictionary of fetcher definitions, each containing the fetcher's
|
|
1121
|
+
schema including supported types, output format, and other metadata.
|
|
944
1122
|
|
|
945
1123
|
Returns:
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
1124
|
+
Response: JSON response containing fetcher definitions
|
|
1125
|
+
|
|
1126
|
+
Response Format:
|
|
1127
|
+
{
|
|
1128
|
+
"fetcher1": {
|
|
1129
|
+
"id": "fetcher1",
|
|
1130
|
+
"name": "Fetcher Name",
|
|
1131
|
+
"description": "Description",
|
|
1132
|
+
"supported_types": ["type1", "type2"],
|
|
1133
|
+
"output_format": "format",
|
|
1134
|
+
...
|
|
1135
|
+
},
|
|
1136
|
+
...
|
|
1137
|
+
}
|
|
952
1138
|
"""
|
|
953
1139
|
if not self.fetchers:
|
|
954
1140
|
self.logger.debug("No fetchers to show")
|
|
955
|
-
|
|
956
1141
|
return self.make_api_response({})
|
|
957
1142
|
|
|
958
1143
|
results: dict[str, dict[str, Any]] = {}
|
|
959
1144
|
for fetcher in self.fetchers:
|
|
1145
|
+
# Serialize fetcher definition to JSON-compatible dict
|
|
960
1146
|
schema = fetcher.model_dump(mode="json", exclude_none=True)
|
|
961
1147
|
results[fetcher.id] = schema
|
|
962
1148
|
|
|
@@ -974,6 +1160,7 @@ class CluePlugin:
|
|
|
974
1160
|
if not self.run_fetcher or not self.fetchers:
|
|
975
1161
|
return self.make_api_response({}, err=f"{self.app_name} does not support any fetchers.", status_code=400)
|
|
976
1162
|
|
|
1163
|
+
# Find the requested fetcher by ID
|
|
977
1164
|
fetcher_to_run = next((fetcher for fetcher in self.fetchers if fetcher.id == fetcher_id), None)
|
|
978
1165
|
if not fetcher_to_run:
|
|
979
1166
|
return self.make_api_response({}, err=f"Fetcher {fetcher_id} does not exist", status_code=404)
|
|
@@ -997,6 +1184,7 @@ class CluePlugin:
|
|
|
997
1184
|
status_code=400,
|
|
998
1185
|
)
|
|
999
1186
|
|
|
1187
|
+
# Validate request body as a Selector object
|
|
1000
1188
|
raw_request = Selector.model_validate(request.json)
|
|
1001
1189
|
|
|
1002
1190
|
self.logger.info("Running fetcher '%s'", fetcher_id)
|
|
@@ -1048,14 +1236,14 @@ class CluePlugin:
|
|
|
1048
1236
|
functions defined in OVERRIDABLE_FUNCTIONS.
|
|
1049
1237
|
|
|
1050
1238
|
Supported function names and their purposes:
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1239
|
+
- enrich: Main enrichment function for processing selectors
|
|
1240
|
+
- alternate_bulk_lookup: Alternative bulk enrichment implementation
|
|
1241
|
+
- liveness: Kubernetes liveness probe endpoint
|
|
1242
|
+
- readyness: Kubernetes readiness probe endpoint
|
|
1243
|
+
- run_action: Function to execute plugin actions
|
|
1244
|
+
- run_fetcher: Function to execute plugin fetchers
|
|
1245
|
+
- setup_actions: Runtime action definition generation
|
|
1246
|
+
- validate_token: Custom authentication token validation
|
|
1059
1247
|
|
|
1060
1248
|
Args:
|
|
1061
1249
|
func: The function to register. The function name determines which plugin
|
|
@@ -1086,9 +1274,11 @@ class CluePlugin:
|
|
|
1086
1274
|
", ".join(OVERRIDABLE_FUNCTIONS),
|
|
1087
1275
|
)
|
|
1088
1276
|
|
|
1277
|
+
# Warn if overwriting an existing function
|
|
1089
1278
|
if getattr(self, function_name) is not None:
|
|
1090
1279
|
self.logger.warning("plugin.uses decorator is overwriting existing function: %s", function_name)
|
|
1091
1280
|
|
|
1281
|
+
# Dynamically set the function as an attribute of this plugin instance
|
|
1092
1282
|
setattr(self, function_name, func)
|
|
1093
1283
|
|
|
1094
1284
|
return func
|
|
@@ -147,7 +147,7 @@ log_cli_level = "WARN"
|
|
|
147
147
|
[tool.poetry]
|
|
148
148
|
package-mode = true
|
|
149
149
|
name = "clue-api"
|
|
150
|
-
version = "1.0.1.
|
|
150
|
+
version = "1.0.1.dev61"
|
|
151
151
|
description = "Clue distributed enrichment service"
|
|
152
152
|
authors = ["Canadian Centre for Cyber Security <contact@cyber.gc.ca>"]
|
|
153
153
|
license = "MIT"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|