clue-api 1.0.0.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clue/.gitignore +21 -0
- clue/__init__.py +0 -0
- clue/api/__init__.py +211 -0
- clue/api/base.py +99 -0
- clue/api/v1/__init__.py +82 -0
- clue/api/v1/actions.py +92 -0
- clue/api/v1/auth.py +243 -0
- clue/api/v1/configs.py +83 -0
- clue/api/v1/fetchers.py +94 -0
- clue/api/v1/lookup.py +221 -0
- clue/api/v1/registration.py +109 -0
- clue/api/v1/static.py +94 -0
- clue/app.py +166 -0
- clue/cache/__init__.py +129 -0
- clue/common/__init__.py +0 -0
- clue/common/classification.py +1006 -0
- clue/common/classification.yml +130 -0
- clue/common/dict_utils.py +130 -0
- clue/common/exceptions.py +199 -0
- clue/common/forge.py +152 -0
- clue/common/json_utils.py +10 -0
- clue/common/list_utils.py +11 -0
- clue/common/logging/__init__.py +291 -0
- clue/common/logging/audit.py +157 -0
- clue/common/logging/format.py +42 -0
- clue/common/regex.py +31 -0
- clue/common/str_utils.py +213 -0
- clue/common/swagger.py +139 -0
- clue/common/uid.py +47 -0
- clue/config.py +60 -0
- clue/constants/__init__.py +0 -0
- clue/constants/supported_types.py +38 -0
- clue/cronjobs/__init__.py +30 -0
- clue/cronjobs/plugins.py +32 -0
- clue/error.py +129 -0
- clue/gunicorn_config.py +29 -0
- clue/healthz.py +74 -0
- clue/helper/discover.py +53 -0
- clue/helper/headers.py +30 -0
- clue/helper/oauth.py +128 -0
- clue/models/__init__.py +0 -0
- clue/models/actions.py +243 -0
- clue/models/config.py +456 -0
- clue/models/fetchers.py +136 -0
- clue/models/graph.py +162 -0
- clue/models/model_list.py +52 -0
- clue/models/network.py +430 -0
- clue/models/results/__init__.py +34 -0
- clue/models/results/base.py +10 -0
- clue/models/results/graph.py +26 -0
- clue/models/results/image.py +22 -0
- clue/models/results/status.py +55 -0
- clue/models/results/validation.py +57 -0
- clue/models/selector.py +67 -0
- clue/models/utils.py +52 -0
- clue/models/validators.py +19 -0
- clue/patched.py +8 -0
- clue/plugin/__init__.py +1008 -0
- clue/plugin/helpers/__init__.py +0 -0
- clue/plugin/helpers/central_server.py +27 -0
- clue/plugin/helpers/email_render.py +228 -0
- clue/plugin/helpers/token.py +34 -0
- clue/plugin/helpers/trino.py +103 -0
- clue/plugin/interactive.py +270 -0
- clue/plugin/models.py +19 -0
- clue/plugin/utils.py +78 -0
- clue/remote/__init__.py +0 -0
- clue/remote/datatypes/__init__.py +130 -0
- clue/remote/datatypes/cache.py +62 -0
- clue/remote/datatypes/events.py +118 -0
- clue/remote/datatypes/hash.py +193 -0
- clue/remote/datatypes/queues/__init__.py +0 -0
- clue/remote/datatypes/queues/comms.py +62 -0
- clue/remote/datatypes/set.py +96 -0
- clue/remote/datatypes/user_quota_tracker.py +54 -0
- clue/security/__init__.py +211 -0
- clue/security/obo.py +95 -0
- clue/security/utils.py +34 -0
- clue/services/action_service.py +186 -0
- clue/services/auth_service.py +348 -0
- clue/services/config_service.py +38 -0
- clue/services/fetcher_service.py +203 -0
- clue/services/jwt_service.py +233 -0
- clue/services/lookup_service.py +786 -0
- clue/services/type_service.py +165 -0
- clue/services/user_service.py +152 -0
- clue_api-1.0.0.dev7.dist-info/METADATA +111 -0
- clue_api-1.0.0.dev7.dist-info/RECORD +91 -0
- clue_api-1.0.0.dev7.dist-info/WHEEL +4 -0
- clue_api-1.0.0.dev7.dist-info/entry_points.txt +8 -0
- clue_api-1.0.0.dev7.dist-info/licenses/LICENSE +11 -0
clue/common/str_utils.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from copy import copy
|
|
4
|
+
from typing import Optional, Self, Union
|
|
5
|
+
|
|
6
|
+
from clue.common.exceptions import ClueAttributeError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def remove_bidir_unicode_controls(in_str):
|
|
10
|
+
"""Remove UBA characters"""
|
|
11
|
+
try:
|
|
12
|
+
no_controls_str = "".join(
|
|
13
|
+
c
|
|
14
|
+
for c in in_str
|
|
15
|
+
if c
|
|
16
|
+
not in [
|
|
17
|
+
"\u202e",
|
|
18
|
+
"\u202b",
|
|
19
|
+
"\u202d",
|
|
20
|
+
"\u202a",
|
|
21
|
+
"\u200e",
|
|
22
|
+
"\u200f",
|
|
23
|
+
]
|
|
24
|
+
)
|
|
25
|
+
except Exception:
|
|
26
|
+
no_controls_str = in_str
|
|
27
|
+
|
|
28
|
+
return no_controls_str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def wrap_bidir_unicode_string(uni_str):
|
|
32
|
+
"""Wraps str in a LRE (Left-to-Right Embed) unicode control.
|
|
33
|
+
|
|
34
|
+
Guarantees that str can be concatenated to other strings without
|
|
35
|
+
affecting their left-to-right direction
|
|
36
|
+
"""
|
|
37
|
+
if len(uni_str) == 0 or isinstance(uni_str, bytes): # Not str, return it unchanged
|
|
38
|
+
return uni_str
|
|
39
|
+
|
|
40
|
+
re_obj = re.search(r"[\u202E\u202B\u202D\u202A\u200E\u200F]", uni_str)
|
|
41
|
+
if re_obj is None or len(re_obj.group()) == 0: # No unicode bidir controls found, return string unchanged
|
|
42
|
+
return uni_str
|
|
43
|
+
|
|
44
|
+
# Parse str for unclosed bidir blocks
|
|
45
|
+
count = 0
|
|
46
|
+
for letter in uni_str:
|
|
47
|
+
if letter in ["\u202a", "\u202b", "\u202d", "\u202e"]: # bidir block open?
|
|
48
|
+
count += 1
|
|
49
|
+
elif letter == "\u202c":
|
|
50
|
+
if count > 0:
|
|
51
|
+
count -= 1
|
|
52
|
+
|
|
53
|
+
# close all bidir blocks
|
|
54
|
+
if count > 0:
|
|
55
|
+
uni_str += "\u202c" * count
|
|
56
|
+
|
|
57
|
+
# Final wrapper (LTR block) to neutralize any Marks (u+200E and u+200F)
|
|
58
|
+
uni_str = "\u202a" + uni_str + "\u202c"
|
|
59
|
+
|
|
60
|
+
return uni_str
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# According to wikipedia, RFC 3629 restricted UTF-8 to end at U+10FFFF.
|
|
64
|
+
# This removed the 6, 5 and (irritatingly) half of the 4 byte sequences.
|
|
65
|
+
#
|
|
66
|
+
# The start byte for 2-byte sequences should be a value between 0xc0 and
|
|
67
|
+
# 0xdf but the values 0xc0 and 0xc1 are invalid as they could only be
|
|
68
|
+
# the result of an overlong encoding of basic ASCII characters. There
|
|
69
|
+
# are similar restrictions on the valid values for 3 and 4-byte sequences.
|
|
70
|
+
_valid_utf8 = re.compile(
|
|
71
|
+
rb"""((?:
|
|
72
|
+
[\x09\x0a\x20-\x7e]| # 1-byte (ASCII excluding control chars).
|
|
73
|
+
[\xc2-\xdf][\x80-\xbf]| # 2-bytes (excluding overlong sequences).
|
|
74
|
+
[\xe0][\xa0-\xbf][\x80-\xbf]| # 3-bytes (excluding overlong sequences).
|
|
75
|
+
|
|
76
|
+
[\xe1-\xec][\x80-\xbf]{2}| # 3-bytes.
|
|
77
|
+
[\xed][\x80-\x9f][\x80-\xbf]| # 3-bytes (up to invalid code points).
|
|
78
|
+
[\xee-\xef][\x80-\xbf]{2}| # 3-bytes (after invalid code points).
|
|
79
|
+
|
|
80
|
+
[\xf0][\x90-\xbf][\x80-\xbf]{2}| # 4-bytes (excluding overlong sequences).
|
|
81
|
+
[\xf1-\xf3][\x80-\xbf]{3}| # 4-bytes.
|
|
82
|
+
[\xf4][\x80-\x8f][\x80-\xbf]{2} # 4-bytes (up to U+10FFFF).
|
|
83
|
+
)+)""",
|
|
84
|
+
re.VERBOSE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _escape(t, reversible=True):
|
|
89
|
+
if t[0] % 2:
|
|
90
|
+
return t[1].replace(b"\\", b"\\\\") if reversible else t[1]
|
|
91
|
+
else:
|
|
92
|
+
return b"".join((b"\\x%02x" % x) for x in t[1])
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def dotdump(s: Union[bytes, str]):
|
|
96
|
+
"""Remove any non-ascii characters and replace them with periods
|
|
97
|
+
|
|
98
|
+
https://www.cs.cmu.edu/~pattis/15-1XX/common/handouts/ascii.html
|
|
99
|
+
"""
|
|
100
|
+
if isinstance(s, str):
|
|
101
|
+
s = s.encode()
|
|
102
|
+
|
|
103
|
+
return "".join(["." if x < 32 or x > 126 else chr(x) for x in s])
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def escape_str(s, reversible=True, force_str=False):
|
|
107
|
+
"""Escape a string"""
|
|
108
|
+
if isinstance(s, bytes):
|
|
109
|
+
return escape_str_strict(s, reversible)
|
|
110
|
+
elif not isinstance(s, str):
|
|
111
|
+
if force_str:
|
|
112
|
+
return str(s)
|
|
113
|
+
return s
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
return escape_str_strict(
|
|
117
|
+
s.encode("utf-16", "surrogatepass").decode("utf-16").encode("utf-8"),
|
|
118
|
+
reversible,
|
|
119
|
+
)
|
|
120
|
+
except Exception:
|
|
121
|
+
return escape_str_strict(s.encode("utf-8", errors="backslashreplace"), reversible)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Returns a string (str) with only valid UTF-8 byte sequences.
|
|
125
|
+
def escape_str_strict(s: bytes, reversible: bool = True) -> str:
|
|
126
|
+
"""Strictly escape a string"""
|
|
127
|
+
escaped = b"".join([_escape(t, reversible) for t in enumerate(_valid_utf8.split(s))])
|
|
128
|
+
return escaped.decode("utf-8")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def safe_str(s, force_str=False):
|
|
132
|
+
"""Create a safe, escaped string"""
|
|
133
|
+
return escape_str(s, reversible=False, force_str=force_str)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def is_safe_str(s: str) -> bool:
|
|
137
|
+
"""Check if a given string is safe"""
|
|
138
|
+
return escape_str(copy(s), reversible=False) == s
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# This method not really necessary. More to stop people from rolling their own.
|
|
142
|
+
def unescape_str(s):
|
|
143
|
+
"""unescape a string"""
|
|
144
|
+
return s.decode("string_escape")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def default_string_value(
|
|
148
|
+
*values: Optional[str], env_name: Optional[str] = None, default: Optional[str] = None
|
|
149
|
+
) -> Optional[str]:
|
|
150
|
+
"""Return a string value based on a list of potential values, an environmnet variable, or a default string"""
|
|
151
|
+
return next(
|
|
152
|
+
(val for val in values if val), (os.getenv(env_name, default or "") or default) if env_name else default
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_parent_key(key: str) -> str:
|
|
157
|
+
"""Get a parent key of a key in the format a.b.c"""
|
|
158
|
+
return re.sub(r"^(.+)\..+?$", r"\1", key)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def sanitize_lucene_query(query: str):
|
|
162
|
+
"""Take in a given string, and escape it to ensure it is safe to search on via lucene"""
|
|
163
|
+
query = re.sub(r'([\^"~*?:\\/()[\]{}\-!])', r"\\\1", query)
|
|
164
|
+
|
|
165
|
+
return query.replace("&&", "\\&&").replace("||", "\\||")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class NamedConstants(object):
|
|
169
|
+
"""A class containing a list of named constants, as well as a reverse map for those constants to their name"""
|
|
170
|
+
|
|
171
|
+
def __init__(self: Self, name: str, string_value_list: list[tuple[str, str]]):
|
|
172
|
+
self._name = name
|
|
173
|
+
self._value_map = dict(string_value_list)
|
|
174
|
+
self._reverse_map = dict([(s[1], s[0]) for s in string_value_list])
|
|
175
|
+
|
|
176
|
+
# we also import the list as attributes so things like
|
|
177
|
+
# tab completion and introspection still work.
|
|
178
|
+
for s, v in self._value_map.items():
|
|
179
|
+
setattr(self, s, v)
|
|
180
|
+
|
|
181
|
+
def name_for_value(self, v):
|
|
182
|
+
"""Get the name of a given value"""
|
|
183
|
+
return self._reverse_map[v]
|
|
184
|
+
|
|
185
|
+
def contains_value(self, v):
|
|
186
|
+
"""Chgeck if this instance contains the given value"""
|
|
187
|
+
return v in self._reverse_map
|
|
188
|
+
|
|
189
|
+
def __getitem__(self, s):
|
|
190
|
+
return self._value_map[s]
|
|
191
|
+
|
|
192
|
+
def __getattr__(self, s):
|
|
193
|
+
# We implement our own getattr mainly to provide the better exception.
|
|
194
|
+
return self._value_map[s]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class StringTable(NamedConstants):
|
|
198
|
+
"""A subclass of NamedConstants that throws an attribute error if the value does not exist in the table"""
|
|
199
|
+
|
|
200
|
+
def contains_string(self, s):
|
|
201
|
+
"""Chgeck if this instance contains the given value"""
|
|
202
|
+
return s in self._reverse_map
|
|
203
|
+
|
|
204
|
+
def __getitem__(self, s):
|
|
205
|
+
if s in self._value_map:
|
|
206
|
+
return s
|
|
207
|
+
raise ClueAttributeError("Invalid value for %s (%s)" % (self._name, s))
|
|
208
|
+
|
|
209
|
+
def __getattr__(self, s):
|
|
210
|
+
# We implement our own getattr mainly to provide the better exception.
|
|
211
|
+
if s in self._value_map:
|
|
212
|
+
return s
|
|
213
|
+
raise ClueAttributeError("Invalid value for %s (%s)" % (self._name, s))
|
clue/common/swagger.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import re
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from typing import Any, Callable, Optional, cast
|
|
5
|
+
|
|
6
|
+
from flasgger import utils
|
|
7
|
+
from pydantic import TypeAdapter
|
|
8
|
+
|
|
9
|
+
from clue.models.network import (
|
|
10
|
+
Annotation,
|
|
11
|
+
ClueResponse,
|
|
12
|
+
QueryEntry,
|
|
13
|
+
QueryResult,
|
|
14
|
+
)
|
|
15
|
+
from clue.services import type_service
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def monkey_patched_parse(obj: object, *args: tuple[Any], **kwargs) -> tuple[Optional[str], Optional[str], None]:
|
|
19
|
+
"""Parse existing docstrings for a python object and return a short and long description of it
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
obj (object): The object to inspect.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
tuple[str, str, None]: A tuple containing the short and long description, along with a None just for fun
|
|
26
|
+
(i've got no idea why).
|
|
27
|
+
"""
|
|
28
|
+
short_desc: Optional[str] = None
|
|
29
|
+
long_desc: Optional[str] = None
|
|
30
|
+
|
|
31
|
+
doc = inspect.getdoc(obj)
|
|
32
|
+
|
|
33
|
+
if doc:
|
|
34
|
+
short_desc = doc.splitlines()[0]
|
|
35
|
+
long_desc = f"```\n{doc}\n```"
|
|
36
|
+
|
|
37
|
+
return short_desc, long_desc, None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
utils.parse_docstring = monkey_patched_parse
|
|
41
|
+
|
|
42
|
+
DEFINITIONS = {
|
|
43
|
+
"QueryResult": QueryResult.model_json_schema(ref_template="#/definitions/{model}"),
|
|
44
|
+
"QueryEntry": QueryEntry.model_json_schema(ref_template="#/definitions/{model}"),
|
|
45
|
+
"Annotation": Annotation.model_json_schema(ref_template="#/definitions/{model}"),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
RESPONSES = {
|
|
49
|
+
status_code: {
|
|
50
|
+
"description": "Something went wrong with your request",
|
|
51
|
+
"schema": {
|
|
52
|
+
**ClueResponse.model_json_schema(),
|
|
53
|
+
"example": ClueResponse(error_message="Example error", status_code=status_code).model_dump(),
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
for status_code in [400, 401, 403, 404]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def generate_swagger_docs(responses: dict[int, str] = {}) -> Callable: # noqa: C901
|
|
61
|
+
"""Generates a decorator that allows to create swagger doc for an endpoint.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
responses (dict[int, str], optional): A dict of the possible responses, with the HTTP code as the key and the
|
|
65
|
+
description of the response as the value. Defaults to {}.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Callable: The decorator
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def decorator(function: Callable) -> Callable:
|
|
72
|
+
func_signature = inspect.signature(function)
|
|
73
|
+
func_doc = inspect.getdoc(function)
|
|
74
|
+
if module := inspect.getmodule(function):
|
|
75
|
+
module_name = module.__name__
|
|
76
|
+
func_path = f"{module_name}.{function.__name__}" if module_name else function.__name__
|
|
77
|
+
|
|
78
|
+
path_params = [
|
|
79
|
+
{
|
|
80
|
+
"name": param,
|
|
81
|
+
"in": "path",
|
|
82
|
+
"type": "string",
|
|
83
|
+
"enum": list(type_service.SUPPORTED_TYPES.keys()) if param == "type_name" else None,
|
|
84
|
+
}
|
|
85
|
+
for param in func_signature.parameters
|
|
86
|
+
if param not in ["kwargs", "_"] and not param.startswith("_")
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
query_params: list[dict[str, Any]] = []
|
|
90
|
+
if func_doc:
|
|
91
|
+
for section in func_doc.split("\n\n"):
|
|
92
|
+
lines = section.splitlines()
|
|
93
|
+
if not lines[0].lower().endswith("arguments:"):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
lines = [re.sub(r" =>.+", "", line).strip() for line in lines[1:]]
|
|
97
|
+
|
|
98
|
+
for line in lines:
|
|
99
|
+
if line.lower() == "none" or "=>" not in line:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
if ": " in line:
|
|
103
|
+
name, type = line.split(": ")
|
|
104
|
+
else:
|
|
105
|
+
name = line
|
|
106
|
+
type = None
|
|
107
|
+
|
|
108
|
+
query_params.append({"name": name, "in": "query", "type": type})
|
|
109
|
+
|
|
110
|
+
tags: list[str] = []
|
|
111
|
+
if module := inspect.getmodule(function):
|
|
112
|
+
tags.append(module.__name__.split(".")[-1].capitalize())
|
|
113
|
+
|
|
114
|
+
cast(Any, function).specs_dict = {
|
|
115
|
+
"parameters": [*path_params, *query_params],
|
|
116
|
+
"definitions": DEFINITIONS,
|
|
117
|
+
"responses": {
|
|
118
|
+
"200": {
|
|
119
|
+
"description": responses.get(200, "Request succeeded"),
|
|
120
|
+
"schema": (
|
|
121
|
+
TypeAdapter(func_signature.return_annotation).json_schema(ref_template="#/definitions/{model}")
|
|
122
|
+
if func_signature.return_annotation != inspect._empty
|
|
123
|
+
else None
|
|
124
|
+
),
|
|
125
|
+
},
|
|
126
|
+
**RESPONSES,
|
|
127
|
+
},
|
|
128
|
+
"summary": "test",
|
|
129
|
+
"tags": tags,
|
|
130
|
+
"operationId": func_path,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@wraps(function)
|
|
134
|
+
def wrapper(*args, **kwargs):
|
|
135
|
+
return function(*args, **kwargs)
|
|
136
|
+
|
|
137
|
+
return wrapper
|
|
138
|
+
|
|
139
|
+
return decorator
|
clue/common/uid.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Any, Literal, Optional
|
|
4
|
+
|
|
5
|
+
TINY = 8
|
|
6
|
+
SHORT = 16
|
|
7
|
+
MEDIUM = NORMAL = 32
|
|
8
|
+
LONG = 64
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_random_id() -> str:
|
|
12
|
+
"""Generates a random unique id, using uuid4 and encoded in base62
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
str: Base62 encoded uuid4
|
|
16
|
+
"""
|
|
17
|
+
import baseconv
|
|
18
|
+
|
|
19
|
+
return baseconv.base62.encode(uuid.uuid4().int)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_id_from_data(data: Any, prefix: Optional[str] = None, length: Literal[8, 16, 32, 64] = MEDIUM): # type: ignore[assignment]
|
|
23
|
+
"""Generates an id based on the provided data, using sha256, truncated to the specified length and encoded in base62
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
data (Any): The data to use to generate the id
|
|
27
|
+
prefix (Optional[str], optional): Defaults to None.
|
|
28
|
+
length (Literal[8, 16, 32, 64], optional): Defaults to 32.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: Raised when an invalid length is provided
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
str: The generated base62 encoded truncated sha256 hash.
|
|
35
|
+
"""
|
|
36
|
+
import baseconv
|
|
37
|
+
|
|
38
|
+
possible_len = [TINY, SHORT, MEDIUM, LONG]
|
|
39
|
+
if length not in possible_len:
|
|
40
|
+
raise ValueError(f"Invalid hash length of {length}. Possible values are: {str(possible_len)}.")
|
|
41
|
+
sha256_hash = hashlib.sha256(str(data).encode()).hexdigest()[:length]
|
|
42
|
+
_hash = baseconv.base62.encode(int(sha256_hash, 16))
|
|
43
|
+
|
|
44
|
+
if isinstance(prefix, str):
|
|
45
|
+
_hash = f"{prefix}_{_hash}"
|
|
46
|
+
|
|
47
|
+
return _hash
|
clue/config.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from clue.common import forge
|
|
4
|
+
from clue.models.config import Config
|
|
5
|
+
|
|
6
|
+
config: Config = Config()
|
|
7
|
+
|
|
8
|
+
#################################################################
|
|
9
|
+
# Configuration
|
|
10
|
+
|
|
11
|
+
CLASSIFICATION = forge.get_classification()
|
|
12
|
+
|
|
13
|
+
AUDIT = config.api.audit
|
|
14
|
+
|
|
15
|
+
SECRET_KEY = config.api.secret_key
|
|
16
|
+
DEBUG = config.api.debug
|
|
17
|
+
MAX_CLASSIFICATION = CLASSIFICATION.UNRESTRICTED
|
|
18
|
+
|
|
19
|
+
USER_TYPES = {"admin", "user"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_version() -> str:
|
|
23
|
+
"""The version of the Clue API
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
str: The clue version
|
|
27
|
+
"""
|
|
28
|
+
return os.environ.get("CLUE_VERSION", "1.0.0.dev0")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_commit() -> str:
|
|
32
|
+
"""The commit of the currently deployed Clue API
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The commit of the currently deployed image
|
|
36
|
+
"""
|
|
37
|
+
return os.environ.get("COMMIT_HASH", "this is not the commit you are looking for")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_branch() -> str:
|
|
41
|
+
"""The branch of the current Clue Image
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The current branch
|
|
45
|
+
"""
|
|
46
|
+
return os.environ.get("BRANCH", "this is not the branch you are looking for")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_redis():
|
|
50
|
+
"""The Redis instance used by Clue.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
The Redis client instance.
|
|
54
|
+
"""
|
|
55
|
+
from clue.remote.datatypes import get_client
|
|
56
|
+
|
|
57
|
+
return get_client(config.core.redis.host, config.core.redis.port, False)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
cache = forge.cache
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from clue.common.regex import (
|
|
2
|
+
DOMAIN_ONLY_REGEX,
|
|
3
|
+
EMAIL_PATH_REGEX,
|
|
4
|
+
EMAIL_REGEX,
|
|
5
|
+
HBS_AGENT_ID_REGEX,
|
|
6
|
+
IPV4_ONLY_REGEX,
|
|
7
|
+
IPV6_ONLY_REGEX,
|
|
8
|
+
MD5_REGEX,
|
|
9
|
+
PORT_REGEX,
|
|
10
|
+
SHA1_REGEX,
|
|
11
|
+
SHA256_REGEX,
|
|
12
|
+
URI_ONLY,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
SUPPORTED_TYPES = {
|
|
16
|
+
"ipv4": IPV4_ONLY_REGEX,
|
|
17
|
+
"ipv6": IPV6_ONLY_REGEX,
|
|
18
|
+
# We don't auto-detect ip types, as it's redundant with ipv4/v6. This is just a convenience/backwards compat thing
|
|
19
|
+
"ip": None,
|
|
20
|
+
"domain": DOMAIN_ONLY_REGEX,
|
|
21
|
+
"port": PORT_REGEX,
|
|
22
|
+
"url": URI_ONLY,
|
|
23
|
+
"userid": None,
|
|
24
|
+
"user_agent": None,
|
|
25
|
+
"email_address": EMAIL_REGEX,
|
|
26
|
+
"email_id": None,
|
|
27
|
+
"email_path": EMAIL_PATH_REGEX,
|
|
28
|
+
"md5": MD5_REGEX,
|
|
29
|
+
"sha1": SHA1_REGEX,
|
|
30
|
+
"sha256": SHA256_REGEX,
|
|
31
|
+
"hbs_oid": None,
|
|
32
|
+
"hbs_agent_id": HBS_AGENT_ID_REGEX,
|
|
33
|
+
"telemetry": None,
|
|
34
|
+
"howler_id": None,
|
|
35
|
+
"hostname": None,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
CASE_INSENSITIVE_TYPES = ["ip", "domain", "port"]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from apscheduler.schedulers.background import BackgroundScheduler
|
|
6
|
+
from pytz import timezone
|
|
7
|
+
|
|
8
|
+
from clue.common.logging import get_logger
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__file__)
|
|
11
|
+
|
|
12
|
+
scheduler = BackgroundScheduler(timezone=timezone(os.getenv("SCHEDULER_TZ", "America/Toronto")))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def setup_jobs():
|
|
16
|
+
"""Imports all modules in the current directory (cronjobs) and adds them to the scheduler."""
|
|
17
|
+
module_path = Path(__file__).parent
|
|
18
|
+
modules_to_import = [
|
|
19
|
+
_file for _file in module_path.iterdir() if _file.suffix == ".py" and _file.name != "__init__.py"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
for module in modules_to_import:
|
|
23
|
+
try:
|
|
24
|
+
job = importlib.import_module(f"clue.cronjobs.{module.stem}")
|
|
25
|
+
|
|
26
|
+
job.setup_job(scheduler)
|
|
27
|
+
except Exception as e:
|
|
28
|
+
logger.critical("Error when initializing %s - %s", module, e)
|
|
29
|
+
|
|
30
|
+
scheduler.start()
|
clue/cronjobs/plugins.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from apscheduler.schedulers.base import BaseScheduler
|
|
2
|
+
from gevent.queue import Queue
|
|
3
|
+
|
|
4
|
+
from clue.api.v1.registration import EXTERNAL_PLUGIN_SET
|
|
5
|
+
from clue.common.logging import get_logger
|
|
6
|
+
from clue.config import config
|
|
7
|
+
from clue.models.config import ExternalSource
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__file__)
|
|
10
|
+
|
|
11
|
+
config_updates = Queue()
|
|
12
|
+
|
|
13
|
+
__scheduler_instance: BaseScheduler | None = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def update_external_source_list():
|
|
17
|
+
"""Updates the external_sources list with the plugins that have been registered through the API."""
|
|
18
|
+
plugin_list: list[ExternalSource] = [ExternalSource.model_validate(item) for item in EXTERNAL_PLUGIN_SET.members()]
|
|
19
|
+
config.api.external_sources = [item for item in config.api.external_sources if item.built_in is True]
|
|
20
|
+
config.api.external_sources.extend(plugin_list)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def setup_job(sched: BaseScheduler):
|
|
24
|
+
"""Sets the scheduler instance to the one provided, and refreshes the external sources.
|
|
25
|
+
|
|
26
|
+
Arguments:
|
|
27
|
+
sched: The scheduler instance to set.
|
|
28
|
+
"""
|
|
29
|
+
global __scheduler_instance
|
|
30
|
+
__scheduler_instance = sched
|
|
31
|
+
sched.add_job(update_external_source_list, "interval", minutes=1)
|
|
32
|
+
logger.debug("Plugin job setup complete.")
|
clue/error.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from http.client import HTTPException
|
|
2
|
+
from sys import exc_info
|
|
3
|
+
from traceback import format_tb
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
from flask import Blueprint, request
|
|
7
|
+
from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, Unauthorized
|
|
8
|
+
|
|
9
|
+
from clue.api import bad_request, forbidden, internal_error, not_found, unauthorized
|
|
10
|
+
from clue.common.exceptions import (
|
|
11
|
+
AccessDeniedException,
|
|
12
|
+
AuthenticationException,
|
|
13
|
+
ClueException,
|
|
14
|
+
)
|
|
15
|
+
from clue.common.logging import get_logger, log_with_traceback
|
|
16
|
+
from clue.common.logging.audit import AUDIT
|
|
17
|
+
from clue.config import config
|
|
18
|
+
|
|
19
|
+
errors = Blueprint("errors", __name__)
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__file__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
######################################
|
|
25
|
+
# Custom Error page
|
|
26
|
+
@errors.app_errorhandler(400)
|
|
27
|
+
def handle_400(e: Union[HTTPException, ClueException]):
|
|
28
|
+
"""Handles HTTP 400 Bad Request errors.
|
|
29
|
+
|
|
30
|
+
If the error is not an instance of BadRequest, the string representation of that error will be included in the
|
|
31
|
+
response.
|
|
32
|
+
|
|
33
|
+
Arguments:
|
|
34
|
+
e: The error to handle.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
A Response object representing a Bad Request HTTP error.
|
|
38
|
+
"""
|
|
39
|
+
if isinstance(e, BadRequest):
|
|
40
|
+
error_message = "No data block provided or data block not in JSON format.'"
|
|
41
|
+
else:
|
|
42
|
+
error_message = str(e)
|
|
43
|
+
return bad_request(err=error_message)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@errors.app_errorhandler(401)
|
|
47
|
+
def handle_401(e: Union[HTTPException, ClueException]):
|
|
48
|
+
"""Handles HTTP 401 Unauthorized errors.
|
|
49
|
+
|
|
50
|
+
If the error is not an instance of Unauthorized, the string representation of that error will be included in the
|
|
51
|
+
response. It will also clear the XSRF-TOKEN.
|
|
52
|
+
|
|
53
|
+
Arguments:
|
|
54
|
+
e: The error to handle.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
A Response object representing an Unauthorized HTTP error, also containing the oauth_providers data.
|
|
58
|
+
"""
|
|
59
|
+
if isinstance(e, Unauthorized):
|
|
60
|
+
msg = e.description
|
|
61
|
+
else:
|
|
62
|
+
msg = str(e)
|
|
63
|
+
|
|
64
|
+
data = {"oauth_providers": [name for name in config.auth.oauth.providers.keys()]}
|
|
65
|
+
res = unauthorized(data, err=msg)
|
|
66
|
+
res.set_cookie("XSRF-TOKEN", "", max_age=0)
|
|
67
|
+
return res
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@errors.app_errorhandler(403)
|
|
71
|
+
def handle_403(e: Union[HTTPException, ClueException]):
|
|
72
|
+
"""Handles HTTP 403 Forbidden errors.
|
|
73
|
+
|
|
74
|
+
If the error is not an instance of Forbidden, the string representation of that error will be included in the
|
|
75
|
+
response. If the AUDIT config is enabled, this request will be logged.
|
|
76
|
+
|
|
77
|
+
Arguments:
|
|
78
|
+
e: The error to handle.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A Response object representing a Forbidden HTTP error.
|
|
82
|
+
"""
|
|
83
|
+
if isinstance(e, Forbidden):
|
|
84
|
+
error_message = e.description
|
|
85
|
+
else:
|
|
86
|
+
error_message = str(e)
|
|
87
|
+
|
|
88
|
+
trace = exc_info()[2]
|
|
89
|
+
if AUDIT:
|
|
90
|
+
uname = "(None)"
|
|
91
|
+
ip = request.remote_addr
|
|
92
|
+
|
|
93
|
+
log_with_traceback(trace, f"Access Denied. (U:{uname} - IP:{ip}) [{error_message}]", audit=True)
|
|
94
|
+
|
|
95
|
+
return forbidden(err=f"Access Denied ({request.path}) [{error_message}]")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@errors.app_errorhandler(404)
|
|
99
|
+
def handle_404(_):
|
|
100
|
+
"""Handles HTTP 404 Not Found errors."""
|
|
101
|
+
return not_found(err=f"Api does not exist ({request.path})")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@errors.app_errorhandler(500)
|
|
105
|
+
def handle_500(e: InternalServerError):
|
|
106
|
+
"""Handles HTTP 500 Internal Server errors.
|
|
107
|
+
|
|
108
|
+
If the original_exception of e is an AccessDeniedException or AuthenticationException, this redirects to the 403 or
|
|
109
|
+
401 error handlers, otherwise it logs it and returns a formatted error message.
|
|
110
|
+
|
|
111
|
+
Arguments:
|
|
112
|
+
e: The error to handle.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
A Response object representing an Internal Server HTTP error.
|
|
116
|
+
"""
|
|
117
|
+
if isinstance(e.original_exception, AccessDeniedException):
|
|
118
|
+
return handle_403(e.original_exception)
|
|
119
|
+
|
|
120
|
+
if isinstance(e.original_exception, AuthenticationException):
|
|
121
|
+
return handle_401(e.original_exception)
|
|
122
|
+
|
|
123
|
+
oe = e.original_exception or e
|
|
124
|
+
|
|
125
|
+
trace = exc_info()[2]
|
|
126
|
+
log_with_traceback(trace, "Exception", is_exception=True)
|
|
127
|
+
|
|
128
|
+
message = "".join(["\n"] + format_tb(exc_info()[2]) + ["%s: %s\n" % (oe.__class__.__name__, str(oe))]).rstrip("\n")
|
|
129
|
+
return internal_error(err=message)
|