clue-api 1.0.1.dev57__tar.gz → 1.0.1.dev61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/PKG-INFO +1 -1
  2. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/config.py +6 -0
  3. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/__init__.py +248 -58
  4. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/pyproject.toml +1 -1
  5. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/LICENSE +0 -0
  6. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/README.md +0 -0
  7. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/.gitignore +0 -0
  8. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/__init__.py +0 -0
  9. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/__init__.py +0 -0
  10. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/base.py +0 -0
  11. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/__init__.py +0 -0
  12. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/actions.py +0 -0
  13. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/auth.py +0 -0
  14. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/configs.py +0 -0
  15. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/fetchers.py +0 -0
  16. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/lookup.py +0 -0
  17. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/registration.py +0 -0
  18. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/api/v1/static.py +0 -0
  19. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/app.py +0 -0
  20. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cache/__init__.py +0 -0
  21. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/__init__.py +0 -0
  22. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/classification.py +0 -0
  23. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/classification.yml +0 -0
  24. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/dict_utils.py +0 -0
  25. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/exceptions.py +0 -0
  26. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/forge.py +0 -0
  27. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/json_utils.py +0 -0
  28. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/list_utils.py +0 -0
  29. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/__init__.py +0 -0
  30. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/audit.py +0 -0
  31. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/logging/format.py +0 -0
  32. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/regex.py +0 -0
  33. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/str_utils.py +0 -0
  34. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/swagger.py +0 -0
  35. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/common/uid.py +0 -0
  36. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/config.py +0 -0
  37. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/constants/__init__.py +0 -0
  38. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/constants/supported_types.py +0 -0
  39. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cronjobs/__init__.py +0 -0
  40. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/cronjobs/plugins.py +0 -0
  41. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/error.py +0 -0
  42. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/extensions/__init__.py +0 -0
  43. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/extensions/config.py +0 -0
  44. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/gunicorn_config.py +0 -0
  45. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/healthz.py +0 -0
  46. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/discover.py +0 -0
  47. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/headers.py +0 -0
  48. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/helper/oauth.py +0 -0
  49. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/__init__.py +0 -0
  50. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/actions.py +0 -0
  51. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/fetchers.py +0 -0
  52. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/graph.py +0 -0
  53. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/model_list.py +0 -0
  54. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/network.py +0 -0
  55. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/__init__.py +0 -0
  56. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/base.py +0 -0
  57. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/graph.py +0 -0
  58. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/image.py +0 -0
  59. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/status.py +0 -0
  60. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/results/validation.py +0 -0
  61. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/selector.py +0 -0
  62. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/models/validators.py +0 -0
  63. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/patched.py +0 -0
  64. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/__init__.py +0 -0
  65. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/central_server.py +0 -0
  66. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/email_render.py +0 -0
  67. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/token.py +0 -0
  68. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/helpers/trino.py +0 -0
  69. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/interactive.py +0 -0
  70. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/models.py +0 -0
  71. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/plugin/utils.py +0 -0
  72. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/py.typed +0 -0
  73. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/__init__.py +0 -0
  74. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/__init__.py +0 -0
  75. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/cache.py +0 -0
  76. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/events.py +0 -0
  77. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/hash.py +0 -0
  78. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/queues/__init__.py +0 -0
  79. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/queues/comms.py +0 -0
  80. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/set.py +0 -0
  81. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/remote/datatypes/user_quota_tracker.py +0 -0
  82. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/__init__.py +0 -0
  83. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/obo.py +0 -0
  84. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/security/utils.py +0 -0
  85. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/action_service.py +0 -0
  86. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/auth_service.py +0 -0
  87. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/config_service.py +0 -0
  88. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/fetcher_service.py +0 -0
  89. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/jwt_service.py +0 -0
  90. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/lookup_service.py +0 -0
  91. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/type_service.py +0 -0
  92. {clue_api-1.0.1.dev57 → clue_api-1.0.1.dev61}/clue/services/user_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clue-api
3
- Version: 1.0.1.dev57
3
+ Version: 1.0.1.dev61
4
4
  Summary: Clue distributed enrichment service
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -455,6 +455,12 @@ class Config(BaseSettings):
455
455
 
456
456
  if __name__ == "__main__":
457
457
  # When executed, the config model will print the default values of the configuration
458
+ import json
459
+
458
460
  import yaml
459
461
 
462
+ print("Schema: ") # noqa: T201
463
+ print(json.dumps(Config.model_json_schema(), indent=2)) # noqa: T201
464
+
465
+ print("\n\nConfig:") # noqa: T201
460
466
  print(yaml.safe_dump(Config().model_dump(mode="json"))) # noqa: T201
@@ -40,24 +40,40 @@ from clue.plugin.helpers.token import get_username
40
40
  from clue.plugin.models import BulkEntry
41
41
  from clue.plugin.utils import Params
42
42
 
43
+ # Load environment variables from .env file if present
43
44
  load_dotenv()
44
45
 
46
+ # List of function names that can be overridden using the @plugin.use decorator
47
+ # These functions define the core plugin behavior and can be customized per plugin
45
48
  OVERRIDABLE_FUNCTIONS = [
46
- "enrich",
47
- "alternate_bulk_lookup",
48
- "liveness",
49
- "readyness",
50
- "run_action",
51
- "run_fetcher",
52
- "setup_actions",
53
- "validate_token",
49
+ "enrich", # Main enrichment function for processing selectors
50
+ "alternate_bulk_lookup", # Alternative bulk enrichment implementation
51
+ "liveness", # Kubernetes liveness probe endpoint
52
+ "readyness", # Kubernetes readiness probe endpoint
53
+ "run_action", # Function to execute plugin actions
54
+ "run_fetcher", # Function to execute plugin fetchers
55
+ "setup_actions", # Runtime action definition generation
56
+ "validate_token", # Custom authentication token validation
54
57
  ]
55
58
 
56
59
 
57
60
  def default_validate_token():
58
- """A default validation function that pulls from the authorization header. Not used by default."""
61
+ """A default validation function that extracts Bearer tokens from the Authorization header.
62
+
63
+ This function is provided as a reference implementation but is not used by default.
64
+ Plugin developers can use this as a starting point for their own token validation.
65
+
66
+ Returns:
67
+ tuple[str | None, str | None]: A tuple containing (token, error_message).
68
+ - If successful: (extracted_token, None)
69
+ - If failed: (None, error_description)
70
+
71
+ Note:
72
+ Expects Authorization header format: "Bearer <token>"
73
+ """
59
74
  token = request.headers.get("Authorization", None, type=str)
60
75
  if token and " " in token:
76
+ # Split "Bearer <token>" and extract the token part
61
77
  token = token.split()[1]
62
78
 
63
79
  if token:
@@ -67,21 +83,46 @@ def default_validate_token():
67
83
 
68
84
 
69
85
  def liveness(**_):
70
- "Default liveness probe"
86
+ """Default liveness probe for Kubernetes health checks.
87
+
88
+ This endpoint indicates whether the application is running and alive.
89
+ Returns a simple "OK" response with 200 status code.
90
+
91
+ Returns:
92
+ Response: Flask response with "OK" message
93
+ """
71
94
  return make_response("OK")
72
95
 
73
96
 
74
97
  def readyness(**_):
75
- "Default readyness probe"
98
+ """Default readiness probe for Kubernetes health checks.
99
+
100
+ This endpoint indicates whether the application is ready to serve traffic.
101
+ Returns a simple "OK" response with 200 status code.
102
+
103
+ Returns:
104
+ Response: Flask response with "OK" message
105
+ """
76
106
  return make_response("OK")
77
107
 
78
108
 
79
109
  def build_default_logger() -> logging.Logger:
80
- "Configure a default logger if one is not provided."
110
+ """Configure a default logger with standard Clue formatting when none is provided.
111
+
112
+ Creates a logger with INFO level that outputs to console using the standard
113
+ Clue log format and date format for consistency across all plugins.
114
+
115
+ Returns:
116
+ logging.Logger: Configured logger instance ready for use
117
+
118
+ Note:
119
+ Uses logger name "clue.plugin.default" to distinguish from user-provided loggers
120
+ """
81
121
  logger = logging.getLogger("clue.plugin.default")
82
122
  logger.setLevel(logging.INFO)
83
123
  console = logging.StreamHandler()
84
124
  console.setLevel(logging.INFO)
125
+ # Apply standard Clue log formatting for consistency
85
126
  console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
86
127
  logger.addHandler(console)
87
128
 
@@ -316,18 +357,22 @@ class CluePlugin:
316
357
  A readyness probe for kubernetes implementations of Clue.
317
358
  """
318
359
  self.alternate_bulk_lookup = alternate_bulk_lookup
360
+ # Create Flask app using the module name (before first dot) as app name
319
361
  self.app = Flask(__name__.split(".")[0])
320
362
  self.app_name = app_name
321
363
 
364
+ # Classification is required for security - must be specified via env var or parameter
322
365
  if classification is None:
323
366
  raise ClueValueError(
324
- "classification must be specified, either via the CLASSIFICATION environment variable, or when "
367
+ "Classification must be specified, either via the CLASSIFICATION environment variable, or when "
325
368
  "intializing the plugin."
326
369
  )
327
370
 
328
371
  self.classification = classification
329
372
  self.liveness = liveness
330
373
  self.readyness = readyness
374
+
375
+ # Convert comma-separated string to set for easier membership testing
331
376
  if isinstance(supported_types, str):
332
377
  self.supported_types = set(supported_types.split(","))
333
378
  else:
@@ -336,6 +381,7 @@ class CluePlugin:
336
381
  self.actions = actions
337
382
  self.setup_actions = setup_actions
338
383
 
384
+ # Allow URLs with or without trailing slashes to match the same route
339
385
  self.app.url_map.strict_slashes = False
340
386
 
341
387
  self.logger = logger if logger else build_default_logger()
@@ -349,20 +395,25 @@ class CluePlugin:
349
395
 
350
396
  self.__init_routes()
351
397
 
398
+ # Initialize Application Performance Monitoring if enabled
352
399
  if enable_apm:
353
400
  self.__init_apm()
354
401
 
402
+ # Set up caching based on configuration
355
403
  if enable_cache:
356
- # We support either using a boolean to use the redis default caching, or
404
+ # Support both boolean (use default cache type) and explicit cache type specification
357
405
  if isinstance(enable_cache, bool):
406
+ # Use environment variable or default to redis
407
+ cache_type = cast(Union[Literal["redis"], Literal["local"]], os.environ.get("CACHE_TYPE", "redis"))
358
408
  self.cache = Cache(
359
409
  self.app_name,
360
410
  self.app,
361
- cast(Union[Literal["redis"], Literal["local"]], os.environ.get("CACHE_TYPE", "redis")),
411
+ cache_type,
362
412
  timeout=cache_timeout,
363
413
  local_cache_options=local_cache_options,
364
414
  )
365
415
  else:
416
+ # Use explicitly specified cache type
366
417
  self.cache = Cache(
367
418
  self.app_name,
368
419
  self.app,
@@ -373,13 +424,27 @@ class CluePlugin:
373
424
  else:
374
425
  self.cache = None
375
426
 
427
+ # Configure werkzeug (Flask's WSGI server) logging to reduce noise
428
+ # Set to WARNING level to suppress INFO messages about HTTP requests
376
429
  wlog = logging.getLogger("werkzeug")
377
430
  wlog.setLevel(logging.WARNING)
431
+ # If our logger has a parent, inherit its handlers for consistency
378
432
  if self.logger.parent: # pragma: no cover
379
433
  for h in self.logger.parent.handlers:
380
434
  wlog.addHandler(h)
381
435
 
382
- # Injects the "app" variable back into the calling module, for use with gunicorn/flask
436
+ # Automatically inject the Flask "app" variable into the calling module's global namespace
437
+ # for compatibility with WSGI servers like gunicorn.
438
+ #
439
+ # This mechanism allows plugin developers to simply instantiate a CluePlugin without
440
+ # needing to explicitly expose the underlying Flask app. WSGI servers typically expect
441
+ # to find an 'app' variable in the module's global scope when using module:variable
442
+ # syntax (e.g., "mymodule:app").
443
+ #
444
+ # Example usage in a plugin module:
445
+ # plugin = CluePlugin("my-plugin", ...)
446
+ # # The 'app' variable is now automatically available for gunicorn
447
+ # # Command: gunicorn mymodule:app
383
448
  current_frame = inspect.currentframe()
384
449
  if current_frame:
385
450
  caller_frame = current_frame.f_back
@@ -389,60 +454,98 @@ class CluePlugin:
389
454
  self.logger.debug("Initialization complete!")
390
455
 
391
456
  def __check_actions(self) -> list[Action] | None:
457
+ """Validate token and retrieve dynamic actions if setup_actions is configured.
458
+
459
+ This method handles token validation when required and calls the setup_actions
460
+ function to get a potentially user-specific or dynamically generated list of actions.
461
+
462
+ Returns:
463
+ list[Action] | None: List of actions if setup_actions is configured, None otherwise
464
+
465
+ Raises:
466
+ AuthenticationException: If token validation fails
467
+ """
392
468
  if self.setup_actions:
469
+ # Validate token if token validation is configured
393
470
  if self.validate_token:
394
471
  token, error = self.validate_token()
395
472
 
396
473
  if error:
397
474
  self.logger.error("Error on token validation: %s", error)
398
-
399
475
  raise AuthenticationException(error)
400
476
  else:
401
477
  token = None
402
478
 
479
+ # Call user-defined setup_actions with base actions and validated token
403
480
  return self.setup_actions(self.actions or [], token)
404
481
 
405
482
  return None
406
483
 
407
484
  def __init_apm(self):
408
- "Initializes the APM connection if enabled"
409
- # Setup APMs
485
+ """Initialize Application Performance Monitoring (APM) using Elastic APM.
486
+
487
+ Sets up ElasticAPM integration with Flask if APM_SERVER_URL environment
488
+ variable is configured. This enables automatic collection of performance
489
+ metrics, error tracking, and distributed tracing.
410
490
 
491
+ Environment Variables:
492
+ APM_SERVER_URL: URL of the Elastic APM server to send metrics to
493
+ """
494
+ # Check if APM server URL is configured via environment variable
411
495
  apm_server_url = os.environ.get("APM_SERVER_URL")
412
496
  if apm_server_url is None:
413
497
  return
414
498
 
415
- self.logger.debug("Initializing apm")
499
+ self.logger.debug("Initializing APM")
416
500
 
501
+ # Import ElasticAPM components (lazy import to avoid dependency issues)
417
502
  import elasticapm
418
503
  from elasticapm.contrib.flask import ElasticAPM
419
504
 
420
505
  self.logger.info(f"Exporting application metrics to: {apm_server_url}")
421
506
 
507
+ # Initialize ElasticAPM with Flask app and configure client
422
508
  ElasticAPM(self.app, client=elasticapm.Client(server_url=apm_server_url, service_name=self.app_name))
423
509
 
424
510
  def __build_ctx(self):
425
- "Returns a wrap_ctx function to push the flask context into the greenlets"
426
- # Make a copy of the current context to pass it in the greenlets
511
+ """Create a context wrapper function for preserving Flask request context in greenlets.
512
+
513
+ Flask request context is thread-local and doesn't automatically propagate to
514
+ greenlets. This function captures the current request context and returns a
515
+ wrapper that pushes it into each greenlet before execution.
516
+
517
+ Returns:
518
+ Callable: A wrapper function that preserves Flask context and handles exceptions
519
+ """
520
+ # Capture the current Flask request context to propagate to greenlets
427
521
  current_req_ctx = _cv_request.get(None)
428
522
  reqctx = current_req_ctx.copy() if current_req_ctx else None
429
523
 
430
- # Push the request context into the greenlet
431
524
  def wrap_ctx(func: Callable, *args: Any, **kwargs) -> tuple[Any, Exception | None]:
525
+ """Wrapper that pushes Flask context and handles enrichment function execution.
526
+
527
+ Args:
528
+ func: The enrichment function to execute
529
+ *args: Arguments to pass to the function
530
+ **kwargs: Keyword arguments to pass to the function
531
+
532
+ Returns:
533
+ tuple[Any, Exception | None]: (result, exception) tuple
534
+ """
535
+ # Push the request context into this greenlet's scope
432
536
  if reqctx:
433
537
  reqctx.push()
434
538
 
435
539
  try:
436
540
  self.logger.debug("Executing enrichment function")
437
-
438
541
  return func(*args, **kwargs), None
439
542
  except NotFoundException:
543
+ # NotFoundException means no results found - return empty list, not an error
440
544
  self.logger.warning("NotFoundException thrown in greenlet")
441
-
442
545
  return [], None
443
546
  except ClueException as e:
547
+ # Other Clue exceptions should be propagated as errors
444
548
  self.logger.exception("ClueException thrown in greenlet")
445
-
446
549
  return None, e
447
550
 
448
551
  return wrap_ctx
@@ -454,58 +557,80 @@ class CluePlugin:
454
557
  params: Params,
455
558
  token: str | None,
456
559
  ):
457
- "Default bulk lookup that harnesses greenlets to multithread the provided enrich function"
560
+ """Default bulk lookup implementation using greenlets for concurrent enrichment.
561
+
562
+ This method processes multiple enrichment requests concurrently by spawning
563
+ greenlets (lightweight threads) for each item. It uses the single-item enrich
564
+ function to process each request while maintaining Flask request context. Note
565
+ that this may lead to inefficient lookups (e.g. making ten requests to a database,
566
+ instead of a single bulk query)
567
+
568
+ Args:
569
+ bulk_result: Dictionary to populate with results, keyed by type then value
570
+ items: List of items to enrich, each containing 'type' and 'value' keys
571
+ params: Request parameters including timeouts and limits
572
+ token: Authentication token to pass to enrichment functions
573
+ """
458
574
  self.logger.debug("Using default bulk lookup")
459
575
 
460
- # Submit the different requested items to the threadpool executor
576
+ # Create context wrapper to preserve Flask request context in greenlets
461
577
  wrap_ctx = self.__build_ctx()
578
+ # Limit pool size to prevent resource exhaustion: min(items, cpu_count * 5 + 4)
462
579
  thread_pool = gevent.pool.Pool(min(len(items), (os.cpu_count() or 0) * 5 + 4))
463
580
  greenlets: list[tuple[str, str, Greenlet]] = []
464
581
 
582
+ # Spawn a greenlet for each enrichment request
465
583
  for entry in items:
466
- # Request results for the type/value tuple
584
+ # Store type, value, and greenlet for later result processing
467
585
  greenlets.append(
468
586
  (
469
587
  entry["type"],
470
588
  entry["value"],
471
589
  thread_pool.spawn(
472
- wrap_ctx,
473
- self.enrich,
474
- entry["type"],
475
- entry["value"],
476
- params,
477
- token,
590
+ wrap_ctx, # Context wrapper function
591
+ self.enrich, # User's enrichment function
592
+ entry["type"], # Selector type
593
+ entry["value"], # Selector value
594
+ params, # Request parameters
595
+ token, # Authentication token
478
596
  ),
479
597
  )
480
598
  )
481
599
 
600
+ # Calculate remaining time until deadline
482
601
  timeout = params.deadline + params.max_timeout - time.time()
483
602
  self.logger.debug("Joining threadpool (timeout=%s)", timeout)
484
603
 
604
+ # Wait for all greenlets to complete or timeout
485
605
  thread_pool.join(timeout=timeout)
486
606
 
607
+ # Process results from all completed greenlets
487
608
  for type_name, value, greenlet in greenlets:
488
609
  greenlet_result = greenlet.value
489
610
 
611
+ # Check if greenlet completed successfully with results
490
612
  if greenlet_result is not None and greenlet_result[0] is not None:
491
613
  results: Union[list[QueryEntry], QueryEntry] = greenlet_result[0]
614
+ # Ensure results is always a list for consistent handling
492
615
  if not isinstance(results, list):
493
616
  results = [results]
494
617
 
495
618
  bulk_result[type_name][value] = BulkEntry(items=results)
496
619
 
620
+ # Cache successful results if caching is enabled
497
621
  if self.cache:
498
622
  self.logger.info("Caching results for selector %s:%s", type_name, value)
499
-
500
623
  try:
501
624
  self.cache.set(type_name, value, params, results)
502
625
  except KeyError:
503
626
  self.logger.warning("Selector not present in bulk result, skipping cache step")
504
627
  else:
628
+ # Handle errors: timeout, exceptions, or other failures
505
629
  error = "Request Timed Out"
506
630
  if greenlet_result is not None and greenlet_result[1] is not None:
507
631
  error = str(greenlet_result[1])
508
632
 
633
+ # Use greenlet exception if available, otherwise use our error message
509
634
  bulk_result[type_name][value] = BulkEntry(
510
635
  error=(error if not greenlet.exception else str(greenlet.exception))
511
636
  )
@@ -516,7 +641,19 @@ class CluePlugin:
516
641
  )
517
642
 
518
643
  def __init_routes(self):
519
- "Set up the routes for the flask server."
644
+ """Set up all Flask routes for the plugin API endpoints.
645
+
646
+ Registers the following endpoints:
647
+ - GET /actions/: List available actions
648
+ - POST /actions/<action_id>/: Execute a specific action
649
+ - GET /fetchers/: List available fetchers
650
+ - POST /fetchers/<fetcher_id>: Execute a specific fetcher
651
+ - GET /types/: List supported types
652
+ - GET /lookup/<type_name>/<value>/: Single enrichment lookup
653
+ - POST /lookup/: Bulk enrichment lookup
654
+ - GET /healthz/live: Liveness probe
655
+ - GET /healthz/ready: Readiness probe
656
+ """
520
657
  self.logger.debug("Initializing routes")
521
658
 
522
659
  self.app.add_url_rule("/actions/", self.get_actions.__name__, self.get_actions, methods=["GET"])
@@ -534,16 +671,38 @@ class CluePlugin:
534
671
  self.app.add_url_rule("/healthz/ready", self.readyness.__name__, self.readyness)
535
672
 
536
673
  def make_api_response(self: Self, data: Any, err: str = "", status_code: int = 200) -> Response:
537
- "Create a standard response for this API."
674
+ """Create a standardized JSON response for all API endpoints.
675
+
676
+ This method ensures consistent response format across all plugin endpoints,
677
+ handles automatic error extraction from result objects, and logs all requests.
678
+
679
+ Args:
680
+ data: The response data (will be JSON serialized)
681
+ err: Error message (if any)
682
+ status_code: HTTP status code (default: 200)
683
+
684
+ Returns:
685
+ Response: Flask response with standardized JSON structure
686
+
687
+ Response Format:
688
+ {
689
+ "api_response": <data>,
690
+ "api_error_message": <error_string>,
691
+ "api_status_code": <status_code>
692
+ }
693
+ """
694
+ # Extract error messages from specialized result objects
538
695
  if isinstance(data, FetcherResult) and data.outcome == "failure" and not err:
539
696
  err = data.error or err
540
697
 
541
698
  if isinstance(data, ActionResult) and data.outcome == "failure" and not err:
542
699
  err = data.summary or err
543
700
 
701
+ # Convert Pydantic models to dict for JSON serialization
544
702
  if isinstance(data, BaseModel):
545
703
  data = data.model_dump(mode="json", exclude_none=True)
546
704
 
705
+ # Log all API requests with method, path, status, and error (if any)
547
706
  self.logger.info("%s %s - %s%s", request.method, request.path, status_code, f": {err}" if err else "")
548
707
 
549
708
  return make_response(
@@ -558,7 +717,18 @@ class CluePlugin:
558
717
  )
559
718
 
560
719
  def get_type_names(self: Self) -> Response:
561
- "Return supported type names."
720
+ """Return the list of supported selector types with their classifications.
721
+
722
+ Returns:
723
+ Response: JSON response mapping each supported type to its classification level
724
+
725
+ Response Format:
726
+ {
727
+ "type1": "classification_level",
728
+ "type2": "classification_level",
729
+ ...
730
+ }
731
+ """
562
732
  return self.make_api_response({tname: self.classification for tname in sorted(self.supported_types or [])})
563
733
 
564
734
  def lookup(self: Self, type_name: str, value: str) -> Response: # noqa: C901
@@ -591,6 +761,7 @@ class CluePlugin:
591
761
  if not self.enrich or not self.supported_types:
592
762
  return self.make_api_response({}, err="Enrichment is not supported by this plugin.", status_code=400)
593
763
 
764
+ # Normalize generic "ip" type to specific "ipv4" or "ipv6" based on address format
594
765
  if type_name == "ip":
595
766
  is_ipv4 = isinstance(ipaddress.ip_address(value), ipaddress.IPv4Address)
596
767
  type_name = "ipv4" if is_ipv4 else "ipv6"
@@ -602,8 +773,9 @@ class CluePlugin:
602
773
 
603
774
  return self.make_api_response(None, str(e), 504)
604
775
 
776
+ # Double URL decode the value (required by API specification)
605
777
  value = ul.unquote(ul.unquote(value))
606
- # Invalid types must either be ignored, or return a 422
778
+ # Validate that the requested type is supported by this plugin
607
779
  if type_name not in self.supported_types:
608
780
  return self.make_api_response(
609
781
  None,
@@ -799,6 +971,7 @@ class CluePlugin:
799
971
  else:
800
972
  self.__default_bulk_lookup(bulk_result, remaining_items, params, token)
801
973
 
974
+ # Calculate how close we came to the deadline (positive = time remaining, negative = overrun)
802
975
  variance = params.deadline - time.time()
803
976
 
804
977
  if self.logger:
@@ -842,11 +1015,14 @@ class CluePlugin:
842
1015
 
843
1016
  results: dict[str, dict[str, Any]] = {}
844
1017
  for action in actions:
1018
+ # Extract base action fields (id, name, description, etc.)
845
1019
  schema = action.model_dump(mode="json", include=set(ActionBase.model_fields.keys()), exclude_none=True)
1020
+ # Generate JSON schema for the action's parameter type
846
1021
  schema["params"] = cast(
847
1022
  BaseModel, cast(type[Any], action.model_fields["params"].annotation).__args__[0]
848
1023
  ).model_json_schema()
849
1024
 
1025
+ # Convert to ActionSpec format and add to results
850
1026
  results[action.id] = ActionSpec.model_validate(schema).model_dump(mode="json", exclude_none=True)
851
1027
 
852
1028
  return self.make_api_response(results)
@@ -890,6 +1066,7 @@ class CluePlugin:
890
1066
  else:
891
1067
  self.logger.warning("No token validation provided. The access token will not be provided to the action.")
892
1068
 
1069
+ # Extract the parameter type from the action definition for validation
893
1070
  param_type: Any = action_to_run.model_fields["params"].annotation or Any
894
1071
 
895
1072
  try:
@@ -899,6 +1076,7 @@ class CluePlugin:
899
1076
 
900
1077
  return self.make_api_response(ActionResult(outcome="failure", summary="No request body specified."))
901
1078
 
1079
+ # Validate request body against the action's parameter schema
902
1080
  action_request: ExecuteRequest = TypeAdapter(param_type.__args__[0]).validate_python(
903
1081
  raw_request, context={"action": action_to_run}
904
1082
  )
@@ -937,26 +1115,34 @@ class CluePlugin:
937
1115
  return self.make_api_response(result)
938
1116
 
939
1117
  def get_fetchers(self: Self) -> Response:
940
- """Gets all the fetchers for this plugin.
1118
+ """Get all available fetchers for this plugin.
941
1119
 
942
- Variables:
943
- None
1120
+ Returns a dictionary of fetcher definitions, each containing the fetcher's
1121
+ schema including supported types, output format, and other metadata.
944
1122
 
945
1123
  Returns:
946
- { # Dictionary of fetchers
947
- "fetcher1": {
948
- ... # schema of the fetcher
949
- },
950
- ...
951
- }
1124
+ Response: JSON response containing fetcher definitions
1125
+
1126
+ Response Format:
1127
+ {
1128
+ "fetcher1": {
1129
+ "id": "fetcher1",
1130
+ "name": "Fetcher Name",
1131
+ "description": "Description",
1132
+ "supported_types": ["type1", "type2"],
1133
+ "output_format": "format",
1134
+ ...
1135
+ },
1136
+ ...
1137
+ }
952
1138
  """
953
1139
  if not self.fetchers:
954
1140
  self.logger.debug("No fetchers to show")
955
-
956
1141
  return self.make_api_response({})
957
1142
 
958
1143
  results: dict[str, dict[str, Any]] = {}
959
1144
  for fetcher in self.fetchers:
1145
+ # Serialize fetcher definition to JSON-compatible dict
960
1146
  schema = fetcher.model_dump(mode="json", exclude_none=True)
961
1147
  results[fetcher.id] = schema
962
1148
 
@@ -974,6 +1160,7 @@ class CluePlugin:
974
1160
  if not self.run_fetcher or not self.fetchers:
975
1161
  return self.make_api_response({}, err=f"{self.app_name} does not support any fetchers.", status_code=400)
976
1162
 
1163
+ # Find the requested fetcher by ID
977
1164
  fetcher_to_run = next((fetcher for fetcher in self.fetchers if fetcher.id == fetcher_id), None)
978
1165
  if not fetcher_to_run:
979
1166
  return self.make_api_response({}, err=f"Fetcher {fetcher_id} does not exist", status_code=404)
@@ -997,6 +1184,7 @@ class CluePlugin:
997
1184
  status_code=400,
998
1185
  )
999
1186
 
1187
+ # Validate request body as a Selector object
1000
1188
  raw_request = Selector.model_validate(request.json)
1001
1189
 
1002
1190
  self.logger.info("Running fetcher '%s'", fetcher_id)
@@ -1048,14 +1236,14 @@ class CluePlugin:
1048
1236
  functions defined in OVERRIDABLE_FUNCTIONS.
1049
1237
 
1050
1238
  Supported function names and their purposes:
1051
- - `enrich`: Main enrichment function for processing selectors
1052
- - `alternate_bulk_lookup`: Alternative bulk enrichment implementation
1053
- - `liveness`: Kubernetes liveness probe endpoint
1054
- - `readyness`: Kubernetes readiness probe endpoint
1055
- - `run_action`: Function to execute plugin actions
1056
- - `run_fetcher`: Function to execute plugin fetchers
1057
- - `setup_actions`: Runtime action definition generation
1058
- - `validate_token`: Custom authentication token validation
1239
+ - enrich: Main enrichment function for processing selectors
1240
+ - alternate_bulk_lookup: Alternative bulk enrichment implementation
1241
+ - liveness: Kubernetes liveness probe endpoint
1242
+ - readyness: Kubernetes readiness probe endpoint
1243
+ - run_action: Function to execute plugin actions
1244
+ - run_fetcher: Function to execute plugin fetchers
1245
+ - setup_actions: Runtime action definition generation
1246
+ - validate_token: Custom authentication token validation
1059
1247
 
1060
1248
  Args:
1061
1249
  func: The function to register. The function name determines which plugin
@@ -1086,9 +1274,11 @@ class CluePlugin:
1086
1274
  ", ".join(OVERRIDABLE_FUNCTIONS),
1087
1275
  )
1088
1276
 
1277
+ # Warn if overwriting an existing function
1089
1278
  if getattr(self, function_name) is not None:
1090
1279
  self.logger.warning("plugin.uses decorator is overwriting existing function: %s", function_name)
1091
1280
 
1281
+ # Dynamically set the function as an attribute of this plugin instance
1092
1282
  setattr(self, function_name, func)
1093
1283
 
1094
1284
  return func
@@ -147,7 +147,7 @@ log_cli_level = "WARN"
147
147
  [tool.poetry]
148
148
  package-mode = true
149
149
  name = "clue-api"
150
- version = "1.0.1.dev57"
150
+ version = "1.0.1.dev61"
151
151
  description = "Clue distributed enrichment service"
152
152
  authors = ["Canadian Centre for Cyber Security <contact@cyber.gc.ca>"]
153
153
  license = "MIT"
File without changes
File without changes