clue-api 1.5.0.dev231__tar.gz → 1.5.0.dev238__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/PKG-INFO +2 -1
  2. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/__init__.py +9 -1
  3. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/static.py +16 -23
  4. clue_api-1.5.0.dev238/clue/api/v1/sync.py +155 -0
  5. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/app.py +2 -0
  6. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/forge.py +2 -8
  7. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/logging/__init__.py +24 -1
  8. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/config.py +4 -0
  9. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/extensions/config.py +2 -8
  10. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/config.py +62 -11
  11. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/fetchers.py +4 -1
  12. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/network.py +287 -150
  13. clue_api-1.5.0.dev238/clue/models/schema.py +99 -0
  14. clue_api-1.5.0.dev238/clue/models/sync.py +90 -0
  15. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/__init__.py +6 -1
  16. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/lookup_service.py +15 -16
  17. clue_api-1.5.0.dev238/clue/services/mongo_service.py +358 -0
  18. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/user_service.py +2 -2
  19. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/pyproject.toml +7 -3
  20. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/LICENSE +0 -0
  21. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/README.md +0 -0
  22. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/.gitignore +0 -0
  23. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/__init__.py +0 -0
  24. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/base.py +0 -0
  25. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/__init__.py +0 -0
  26. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/actions.py +0 -0
  27. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/auth.py +0 -0
  28. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/configs.py +0 -0
  29. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/fetchers.py +0 -0
  30. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/lookup.py +0 -0
  31. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/api/v1/registration.py +0 -0
  32. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/cache/__init__.py +0 -0
  33. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/__init__.py +0 -0
  34. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/bytes_utils.py +0 -0
  35. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/classification.py +0 -0
  36. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/classification.yml +0 -0
  37. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/dict_utils.py +0 -0
  38. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/exceptions.py +0 -0
  39. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/json_utils.py +0 -0
  40. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/list_utils.py +0 -0
  41. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/logging/audit.py +0 -0
  42. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/logging/format.py +0 -0
  43. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/regex.py +0 -0
  44. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/str_utils.py +0 -0
  45. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/swagger.py +0 -0
  46. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/common/uid.py +0 -0
  47. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/constants/__init__.py +0 -0
  48. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/constants/env.py +0 -0
  49. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/constants/supported_types.py +0 -0
  50. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/cronjobs/__init__.py +0 -0
  51. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/cronjobs/plugins.py +0 -0
  52. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/error.py +0 -0
  53. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/extensions/__init__.py +0 -0
  54. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/gunicorn_config.py +0 -0
  55. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/healthz.py +0 -0
  56. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/helper/discover.py +0 -0
  57. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/helper/headers.py +0 -0
  58. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/helper/oauth.py +0 -0
  59. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/__init__.py +0 -0
  60. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/actions.py +0 -0
  61. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/graph.py +0 -0
  62. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/model_list.py +0 -0
  63. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/__init__.py +0 -0
  64. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/base.py +0 -0
  65. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/file.py +0 -0
  66. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/graph.py +0 -0
  67. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/image.py +0 -0
  68. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/status.py +0 -0
  69. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/results/validation.py +0 -0
  70. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/selector.py +0 -0
  71. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/models/validators.py +0 -0
  72. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/patched.py +0 -0
  73. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/__init__.py +0 -0
  74. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/celery_app.py +0 -0
  75. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/helpers/__init__.py +0 -0
  76. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/helpers/central_server.py +0 -0
  77. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/helpers/email_render.py +0 -0
  78. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/helpers/token.py +0 -0
  79. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/helpers/trino.py +0 -0
  80. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/models.py +0 -0
  81. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/plugin/utils.py +0 -0
  82. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/py.typed +0 -0
  83. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/__init__.py +0 -0
  84. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/cache.py +0 -0
  85. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/events.py +0 -0
  86. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/hash.py +0 -0
  87. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/queues/__init__.py +0 -0
  88. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/queues/comms.py +0 -0
  89. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/set.py +0 -0
  90. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/remote/datatypes/user_quota_tracker.py +0 -0
  91. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/security/__init__.py +0 -0
  92. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/security/obo.py +0 -0
  93. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/security/utils.py +0 -0
  94. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/action_service.py +0 -0
  95. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/auth_service.py +0 -0
  96. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/config_service.py +0 -0
  97. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/fetcher_service.py +0 -0
  98. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/jwt_service.py +0 -0
  99. {clue_api-1.5.0.dev231 → clue_api-1.5.0.dev238}/clue/services/type_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clue-api
3
- Version: 1.5.0.dev231
3
+ Version: 1.5.0.dev238
4
4
  Summary: Clue distributed enrichment service
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -39,6 +39,7 @@ Requires-Dist: prometheus-client (>=0.20.0,<0.21.0) ; extra == "server"
39
39
  Requires-Dist: pydantic (>=2.7.1,<3.0.0)
40
40
  Requires-Dist: pydantic-settings[yaml] (>=2.3.4,<3.0.0)
41
41
  Requires-Dist: pyjwt (>=2.8.0,<3.0.0) ; extra == "server"
42
+ Requires-Dist: pymongo (>=4.16.0,<5.0.0) ; extra == "server"
42
43
  Requires-Dist: pyroute2 (>=0.7.12,<0.8.0) ; extra == "server"
43
44
  Requires-Dist: python-baseconv (>=1.2.2,<2.0.0) ; extra == "server"
44
45
  Requires-Dist: pytz (>=2024.1,<2025.0) ; extra == "server"
@@ -1,9 +1,10 @@
1
1
  from sys import exc_info
2
2
  from traceback import format_tb
3
- from typing import Any, Union
3
+ from typing import Any, Union, cast
4
4
 
5
5
  from flask import Blueprint, Response, make_response, request
6
6
  from prometheus_client import Counter
7
+ from pydantic import BaseModel
7
8
 
8
9
  from clue.common.forge import APP_NAME
9
10
  from clue.common.logging import get_logger, log_with_traceback
@@ -33,6 +34,13 @@ def _make_api_response(
33
34
  err = "".join(["\n"] + format_tb(trace) + ["%s: %s\n" % (err.__class__.__name__, str(err))]).rstrip("\n")
34
35
  log_with_traceback(trace, "Exception", is_exception=True)
35
36
 
37
+ if isinstance(data, BaseModel):
38
+ data = data.model_dump(mode="json", by_alias=True, exclude_none=True)
39
+ elif isinstance(data, list) and len(data) > 0 and isinstance(data[0], BaseModel):
40
+ data = [
41
+ entry.model_dump(mode="json", by_alias=True, exclude_none=True) for entry in cast(list[BaseModel], data)
42
+ ]
43
+
36
44
  resp = make_response(
37
45
  ClueResponse(response=data, error_message=err, warning=warnings, status_code=status_code).model_dump(
38
46
  mode="json", by_alias=True, exclude_none=True
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from pathlib import Path
2
3
 
3
4
  from flask import request
@@ -12,10 +13,12 @@ from clue.security.utils import is_path_traversal
12
13
 
13
14
  SUB_API = "static"
14
15
  static_api = make_subapi_blueprint(SUB_API, api_version=1)
15
- static_api._doc = "Fetch static documentation"
16
+ static_api._doc = "Fetch static documentation" # type: ignore
16
17
 
17
18
  CORS(static_api, origins=config.ui.cors_origins, supports_credentials=True)
18
19
 
20
+ DOCUMENTATION_FOLDER = (Path(os.environ.get("CLUE_DOCUMENTATION_PATH", Path.cwd() / "docs"))).resolve()
21
+
19
22
  logger = get_logger(__file__)
20
23
 
21
24
 
@@ -39,23 +42,15 @@ def serve_documentation(**kwargs) -> dict[str, str]:
39
42
  """
40
43
  docs_filter = request.args.get("filter")
41
44
 
42
- documentation_folder = Path.cwd() / "docs"
43
-
44
45
  returned_files = {}
45
46
 
46
- if docs_filter is None:
47
- for file in documentation_folder.rglob("*"):
48
- if file.is_file():
49
- content = file.read_text(encoding="utf-8")
50
- returned_files[file.name] = content
51
- else:
52
- for file in documentation_folder.rglob("*"):
53
- if file.is_file() and docs_filter in file.name:
54
- try:
55
- content = file.read_text(encoding="utf-8")
56
- returned_files[file.name] = content
57
- except FileNotFoundError:
58
- return not_found(err="The file was not found")
47
+ for file in DOCUMENTATION_FOLDER.rglob("*"):
48
+ if file.is_file():
49
+ if docs_filter and docs_filter not in file.name:
50
+ continue
51
+
52
+ content = file.read_text(encoding="utf-8")
53
+ returned_files[file.name] = content
59
54
 
60
55
  return ok(returned_files)
61
56
 
@@ -78,17 +73,15 @@ def serve_documentation_file(filename: str, **kwargs) -> dict[str, str]:
78
73
  {"markdown": "Markdown documentation of howler-docs.md"}
79
74
 
80
75
  """
81
- documentation_folder = (Path.cwd() / "docs").resolve()
82
-
83
- docs_path = (documentation_folder / filename).resolve()
76
+ docs_path = (DOCUMENTATION_FOLDER / filename).resolve()
84
77
 
85
- if is_path_traversal(documentation_folder, docs_path):
78
+ if is_path_traversal(DOCUMENTATION_FOLDER, docs_path):
86
79
  return not_found(err="The file does not exist or is typed incorrectly within the relative path.")
87
80
 
88
- if documentation_folder.exists():
89
- content = documentation_folder.read_text(encoding="utf-8")
81
+ if docs_path.exists():
82
+ content = docs_path.read_text(encoding="utf-8")
90
83
 
91
84
  return ok({"markdown": content})
92
85
 
93
- logger.info("File %s does not exist", documentation_folder)
86
+ logger.info("File %s does not exist", docs_path)
94
87
  return not_found(err="The file does not exist or is typed incorrectly.")
@@ -0,0 +1,155 @@
1
+ from typing import Any
2
+
3
+ from flask import request
4
+ from flask_cors import CORS
5
+ from pydantic import TypeAdapter, ValidationError
6
+
7
+ from clue.api import bad_request, forbidden, internal_error, make_subapi_blueprint, ok
8
+ from clue.common.logging import get_logger
9
+ from clue.common.swagger import generate_swagger_docs
10
+ from clue.config import config
11
+ from clue.models.sync import ChangeRow
12
+ from clue.security import api_login
13
+ from clue.services import mongo_service
14
+
15
+ SUB_API = "sync"
16
+ sync_api = make_subapi_blueprint(SUB_API, api_version=1)
17
+ sync_api._doc = "Replication functionality for RxDB" # type: ignore
18
+
19
+ CORS(sync_api, origins=config.ui.cors_origins, supports_credentials=True)
20
+
21
+ logger = get_logger(__file__)
22
+
23
+
24
+ @generate_swagger_docs()
25
+ @sync_api.route("/<collection>", methods=["GET"])
26
+ @api_login()
27
+ def pull(collection: str, user: dict[str, Any] | None = None, **kwargs) -> dict[str, str]:
28
+ """Pull replicated changes from a collection since a specified checkpoint.
29
+
30
+ Variables:
31
+ collection => The name of the collection to pull from.
32
+
33
+ Optional Arguments:
34
+ updated_at: int => Timestamp of the last checkpoint. [Default: 0]
35
+ id: string => Document ID of the last checkpoint for pagination.
36
+ limit: int => Maximum number of records to return per batch. [Default: 10]
37
+ omit_deleted => If present, omit deleted records from the results.
38
+
39
+ Result Example:
40
+ [ # List of SelectorDocument records since the given checkpoint
41
+ {
42
+ "id": "<document id>",
43
+ "updated_at": 1234567890,
44
+ "_deleted": false,
45
+ ...
46
+ },
47
+ ...
48
+ ]
49
+ """
50
+ if not user:
51
+ return forbidden(err="You must be logged in as a valid user.")
52
+
53
+ if collection not in mongo_service.ALLOWED_COLLECTIONS:
54
+ return bad_request(err=f"Unknown collection: {collection}")
55
+
56
+ updated_at = request.args.get("updated_at", 0, type=int)
57
+ id: str | None = request.args.get("id", None)
58
+ limit = request.args.get("limit", 10, type=int)
59
+ omit_deleted = "omit_deleted" in request.args
60
+
61
+ return ok(
62
+ mongo_service.pull(user["uname"], collection, id, updated_at, batch_size=limit, omit_deleted=omit_deleted)
63
+ )
64
+
65
+
66
+ @generate_swagger_docs()
67
+ @sync_api.route("/<collection>/stream", methods=["GET"])
68
+ @api_login()
69
+ def stream(collection: str, user: dict[str, Any] | None = None, **kwargs):
70
+ """Stream replicated changes from a collection as server-sent events.
71
+
72
+ Variables:
73
+ collection => The name of the collection to stream from.
74
+
75
+ Arguments:
76
+ None
77
+
78
+ Result Example:
79
+ {
80
+ # A continuous text/event-stream (SSE) of JSON-encoded change events
81
+ "id": "<event id>",
82
+ "documents": [{...}, ...],
83
+ "checkpoint": {
84
+ "id": "<id>",
85
+ "updated_at": 1234567890
86
+ }
87
+ }
88
+ """
89
+ if not user:
90
+ return forbidden(err="You must be logged in as a valid user.")
91
+
92
+ if collection not in mongo_service.ALLOWED_COLLECTIONS:
93
+ return bad_request(err=f"Unknown collection: {collection}")
94
+
95
+ logger.info("Initializing event source stream")
96
+
97
+ return mongo_service.event_stream(user["uname"], collection)
98
+
99
+
100
+ @generate_swagger_docs()
101
+ @sync_api.route("/<collection>", methods=["POST"])
102
+ @api_login()
103
+ def push(collection: str, user: dict[str, Any] | None = None, **kwargs) -> dict[str, str]:
104
+ """Push replicated changes to a collection.
105
+
106
+ Variables:
107
+ collection => The name of the collection to push to.
108
+
109
+ Arguments:
110
+ None
111
+
112
+ Data Block:
113
+ [ # List of change rows for RxDB replication
114
+ {
115
+ "newDocumentState": { # Required. The new state of the document.
116
+ "id": "<document id>",
117
+ "updated_at": 1234567890,
118
+ "_deleted": false,
119
+ ...
120
+ },
121
+ "assumedMasterState": { # Optional. The assumed current server state for conflict detection.
122
+ "id": "<document id>",
123
+ "updated_at": 1234567890,
124
+ ...
125
+ }
126
+ },
127
+ ...
128
+ ]
129
+
130
+ Result Example:
131
+ [ # List of conflicting SelectorDocuments that were not applied
132
+ {
133
+ "id": "<document id>",
134
+ "updated_at": 1234567890,
135
+ "_deleted": false,
136
+ ...
137
+ },
138
+ ...
139
+ ]
140
+ """
141
+ if not user:
142
+ return forbidden(err="You must be logged in as a valid user.")
143
+
144
+ if collection not in mongo_service.ALLOWED_COLLECTIONS:
145
+ return bad_request(err=f"Unknown collection: {collection}")
146
+
147
+ try:
148
+ change_rows = TypeAdapter(list[ChangeRow]).validate_python(request.json, strict=True, by_alias=True)
149
+
150
+ return ok(mongo_service.push(user["uname"], collection, change_rows))
151
+ except ValidationError:
152
+ logger.exception("Validation exception on push")
153
+ return bad_request(err="Invalid replication data.")
154
+ except Exception:
155
+ return internal_error(err="Failed to process replication data.")
@@ -59,6 +59,7 @@ from clue.api.v1.fetchers import fetchers_api
59
59
  from clue.api.v1.lookup import lookup_api
60
60
  from clue.api.v1.registration import registration_api
61
61
  from clue.api.v1.static import static_api
62
+ from clue.api.v1.sync import sync_api
62
63
  from clue.common.logging import get_logger
63
64
  from clue.cronjobs import setup_jobs as setup_cron_jobs
64
65
  from clue.error import errors
@@ -132,6 +133,7 @@ app.register_blueprint(fetchers_api)
132
133
  app.register_blueprint(lookup_api)
133
134
  app.register_blueprint(registration_api)
134
135
  app.register_blueprint(static_api)
136
+ app.register_blueprint(sync_api)
135
137
 
136
138
 
137
139
  logger.info("Checking extensions for initialization and additional routes")
@@ -1,7 +1,6 @@
1
1
  # This file contains the loaders for the different components of the system
2
2
  from __future__ import annotations
3
3
 
4
- import logging
5
4
  import os
6
5
  from pathlib import Path
7
6
  from string import Template
@@ -10,7 +9,7 @@ from typing import TYPE_CHECKING
10
9
  from flask_caching import Cache
11
10
 
12
11
  from clue.common.dict_utils import recursive_update
13
- from clue.common.logging.format import CLUE_DATE_FORMAT, CLUE_LOG_FORMAT
12
+ from clue.common.logging import get_module_logger
14
13
  from clue.common.str_utils import default_string_value
15
14
 
16
15
  APP_NAME: str = default_string_value(env_name="APP_NAME", default="clue") # type: ignore[assignment]
@@ -23,12 +22,7 @@ cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
23
22
 
24
23
  classification_engines: dict[Path, Classification] = {}
25
24
 
26
- logger = logging.getLogger(f"{APP_NAME}.common.forge")
27
- logger.setLevel(logging.INFO)
28
- console = logging.StreamHandler()
29
- console.setLevel(logging.INFO)
30
- console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
31
- logger.addHandler(console)
25
+ logger = get_module_logger(f"{APP_NAME}.common.forge")
32
26
 
33
27
 
34
28
  def __get_yml_path(yml_config: str | None = None) -> Path | None: # noqa: C901
@@ -29,6 +29,29 @@ LOG_LEVEL_MAP = {
29
29
  DEBUG = False
30
30
 
31
31
 
32
+ def get_module_logger(name: str, level: int = logging.INFO) -> logging.Logger:
33
+ """Create and return a logger with a pre-configured console handler.
34
+
35
+ A StreamHandler is added only when the logger has no handlers yet, so
36
+ calling this function multiple times for the same name is safe.
37
+
38
+ Args:
39
+ name: The dotted logger name (e.g. ``"clue.models.config"``).
40
+ level: The logging level to set on both the logger and the handler.
41
+
42
+ Returns:
43
+ The configured :class:`logging.Logger` instance.
44
+ """
45
+ _logger = logging.getLogger(name)
46
+ _logger.setLevel(level)
47
+ if not _logger.handlers:
48
+ _console = logging.StreamHandler()
49
+ _console.setLevel(level)
50
+ _console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
51
+ _logger.addHandler(_console)
52
+ return _logger
53
+
54
+
32
55
  class JsonFormatter(logging.Formatter):
33
56
  """A custom implementation of logging.Formatter that supports json logs as well as traceback for exceptions.
34
57
 
@@ -61,7 +84,7 @@ class JsonFormatter(logging.Formatter):
61
84
 
62
85
  return self._style.format(record)
63
86
 
64
- def formatException(self, exc_info): # noqa: N802
87
+ def formatException(self, exc_info): # type: ignore # noqa: N802
65
88
  """Formats the exception using traceback
66
89
 
67
90
  Args:
@@ -1,3 +1,7 @@
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+
1
5
  import os
2
6
 
3
7
  from clue.common import forge
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from typing import Any
3
2
 
4
3
  from pydantic import BaseModel, ImportString, model_validator
@@ -8,14 +7,9 @@ from pydantic_settings import (
8
7
  YamlConfigSettingsSource,
9
8
  )
10
9
 
11
- from clue.common.logging import CLUE_DATE_FORMAT, CLUE_LOG_FORMAT
10
+ from clue.common.logging import get_module_logger
12
11
 
13
- logger = logging.getLogger("clue.extensions.config")
14
- logger.setLevel(logging.INFO)
15
- console = logging.StreamHandler()
16
- console.setLevel(logging.INFO)
17
- console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
18
- logger.addHandler(console)
12
+ logger = get_module_logger("clue.extensions.config")
19
13
 
20
14
 
21
15
  class Modules(BaseModel):
@@ -1,10 +1,9 @@
1
1
  # ruff: noqa: D101
2
- import logging
3
2
  import os
4
3
  from email.utils import parseaddr
5
4
  from enum import Enum
6
5
  from pathlib import Path
7
- from typing import Self
6
+ from typing import Annotated, Any, Self
8
7
  from uuid import uuid4
9
8
 
10
9
  from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
@@ -18,7 +17,7 @@ from pydantic_settings import (
18
17
 
19
18
  from clue.common import forge
20
19
  from clue.common.exceptions import ClueValueError
21
- from clue.common.logging.format import CLUE_DATE_FORMAT, CLUE_LOG_FORMAT
20
+ from clue.common.logging import get_module_logger
22
21
  from clue.common.str_utils import default_string_value
23
22
 
24
23
  AUTO_PROPERTY_TYPE = ["access", "classification", "type", "role", "remove_role", "group"]
@@ -28,6 +27,8 @@ DEFAULT_USER_NAME_FIELDS = ["name", "displayName"]
28
27
  APP_NAME = default_string_value(env_name="APP_NAME", default="clue").replace("-dev", "") # type: ignore[union-attr]
29
28
  CLASSIFICATION = forge.get_classification()
30
29
 
30
+ logger = get_module_logger("clue.models.config")
31
+
31
32
 
32
33
  class PasswordRequirement(BaseModel):
33
34
  lower: bool = Field(description="Password must contain lowercase letters", default=False)
@@ -213,7 +214,52 @@ class APMServer(BaseModel):
213
214
  class Metrics(BaseModel):
214
215
  apm_server: APMServer = APMServer()
215
216
  export_interval: int = Field(description="How often should we be exporting metrics?", default=5)
216
- redis: RedisServer = RedisServer()
217
+
218
+
219
+ class MongoDB(BaseModel):
220
+ host: str = Field(description="Hostname of the MongoDB instance", default="mongodb")
221
+ port: int = Field(description="Port of the MongoDB instance", default=27017, ge=1, le=65535)
222
+ user: str | None = Field(description="Username to use to connect to the MongoDB instance", default=None)
223
+ password: str | None = Field(description="Password to use to connect to the MongoDB instance", default=None)
224
+ database: str = Field(description="The database to use in the mongodb instance", default="clue")
225
+ max_retries: int = Field(
226
+ description="Controls the maximum number of retries to use when an initial connection fails", default=2
227
+ )
228
+ connect_timeout: int = Field(
229
+ description="Controls how long (in milliseconds) to wait when connecting a new socket", default=3000
230
+ )
231
+ server_selection_timeout: int = Field(
232
+ description="Controls how long (in milliseconds) to wait for a suitable server to be found", default=3000
233
+ )
234
+
235
+ def __repr__(self):
236
+ auth = ""
237
+ if self.user and self.password:
238
+ auth = f"{self.user}:***@"
239
+
240
+ return f"mongodb://{auth}{self.host}:{self.port}"
241
+
242
+ def connection(self) -> dict[str, Any]:
243
+ """Generate MongoDB connection string and authentication parameters.
244
+
245
+ Returns:
246
+ dict[str, str | int]: A dictionary of connection parameters including host, port,
247
+ connection timeouts, and optionally username and password if available.
248
+ """
249
+ params: dict[str, str | int] = {
250
+ "host": self.host,
251
+ "port": self.port,
252
+ "connectTimeoutMS": self.connect_timeout,
253
+ "serverSelectionTimeoutMS": self.server_selection_timeout,
254
+ }
255
+
256
+ if self.user and self.password:
257
+ params["username"] = self.user
258
+ params["password"] = self.password
259
+ else:
260
+ logger.warning("No authentication used for mongodb.")
261
+
262
+ return params
217
263
 
218
264
 
219
265
  class Core(BaseModel):
@@ -225,6 +271,9 @@ class Core(BaseModel):
225
271
  redis: RedisServer = RedisServer()
226
272
  "Configuration for Redis instances"
227
273
 
274
+ mongodb: MongoDB = MongoDB()
275
+ "Configuration for MongoDB instance"
276
+
228
277
 
229
278
  class LogLevel(str, Enum):
230
279
  DEBUG = "DEBUG"
@@ -363,6 +412,7 @@ class OBOService(BaseModel):
363
412
 
364
413
  class UI(BaseModel):
365
414
  cors_origins: list[str] = Field(default=[], description="List of valid deployments")
415
+ replication: bool = Field(default=True, description="Should server-side replication be enabled?")
366
416
 
367
417
 
368
418
  class API(BaseModel):
@@ -386,6 +436,13 @@ class API(BaseModel):
386
436
  )
387
437
 
388
438
 
439
+ class Retention(BaseModel):
440
+ enabled: Annotated[bool, Field(description="Should records be cached for users?")] = True
441
+ default_ttl: Annotated[
442
+ int, Field(description="The number of seconds a record with no set expiry should be cached")
443
+ ] = 3600
444
+
445
+
389
446
  root_path = Path("/etc") / APP_NAME
390
447
 
391
448
  config_locations = [
@@ -396,13 +453,6 @@ config_locations = [
396
453
  if os.getenv("AZURE_TEST_CONFIG", None) is not None:
397
454
  import re
398
455
 
399
- logger = logging.getLogger("clue.models.config")
400
- logger.setLevel(logging.INFO)
401
- console = logging.StreamHandler()
402
- console.setLevel(logging.INFO)
403
- console.setFormatter(logging.Formatter(CLUE_LOG_FORMAT, CLUE_DATE_FORMAT))
404
- logger.addHandler(console)
405
-
406
456
  logger.info("Azure build environment detected, adding additional config path")
407
457
 
408
458
  work_dir_parent = Path("/__w")
@@ -436,6 +486,7 @@ class Config(BaseSettings):
436
486
  auth: Auth = Auth()
437
487
  core: Core = Core()
438
488
  logging: Logging = Logging()
489
+ retention: Retention = Retention()
439
490
 
440
491
  model_config = SettingsConfigDict(
441
492
  yaml_file=config_locations,
@@ -106,7 +106,10 @@ class FetcherResult(BaseModel, Generic[DATA]):
106
106
  description="Did the fetcher succeed or fail, or is it pending?"
107
107
  )
108
108
  data: DATA | None = Field(description="The output of the fetcher.", default=None)
109
- error: str | None = Field(description="If the fetcher failed, contains the relevant error message.", default=None)
109
+ error: str | None = Field(
110
+ description="If the fetcher failed, contains the relevant error message.",
111
+ default=None,
112
+ )
110
113
  format: str | None = Field(
111
114
  description="What is the format of the output? Used to indicate what component to use when rendering "
112
115
  "the output.",