clue-api 1.3.0.dev32__tar.gz → 1.3.0.dev34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/PKG-INFO +1 -2
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/app.py +4 -0
- clue_api-1.3.0.dev34/clue/constants/env.py +5 -0
- clue_api-1.3.0.dev34/clue/helper/discover.py +51 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/__init__.py +3 -1
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/lookup_service.py +45 -37
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/type_service.py +2 -1
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/pyproject.toml +4 -8
- clue_api-1.3.0.dev32/clue/helper/discover.py +0 -53
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/LICENSE +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/README.md +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/.gitignore +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/base.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/actions.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/auth.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/configs.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/fetchers.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/lookup.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/registration.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/api/v1/static.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/cache/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/classification.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/classification.yml +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/dict_utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/exceptions.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/forge.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/json_utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/list_utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/logging/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/logging/audit.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/logging/format.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/regex.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/str_utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/swagger.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/common/uid.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/config.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/constants/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/constants/supported_types.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/cronjobs/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/cronjobs/plugins.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/error.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/extensions/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/extensions/config.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/gunicorn_config.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/healthz.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/helper/headers.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/helper/oauth.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/actions.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/config.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/fetchers.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/graph.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/model_list.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/network.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/base.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/graph.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/image.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/status.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/results/validation.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/selector.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/models/validators.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/patched.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/helpers/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/helpers/central_server.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/helpers/email_render.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/helpers/token.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/helpers/trino.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/models.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/plugin/utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/py.typed +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/cache.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/events.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/hash.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/queues/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/queues/comms.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/set.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/remote/datatypes/user_quota_tracker.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/security/__init__.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/security/obo.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/security/utils.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/action_service.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/auth_service.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/config_service.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/fetcher_service.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/jwt_service.py +0 -0
- {clue_api-1.3.0.dev32 → clue_api-1.3.0.dev34}/clue/services/user_service.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: clue-api
|
|
3
|
-
Version: 1.3.0.
|
|
3
|
+
Version: 1.3.0.dev34
|
|
4
4
|
Summary: Clue distributed enrichment service
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -30,7 +30,6 @@ Requires-Dist: flask (<3.0.0)
|
|
|
30
30
|
Requires-Dist: flask-caching (>=2.1.0,<3.0.0)
|
|
31
31
|
Requires-Dist: flask-cors (>=4.0.1,<7.0.0) ; extra == "server"
|
|
32
32
|
Requires-Dist: gevent (>=24.2.1,<25.0.0)
|
|
33
|
-
Requires-Dist: geventhttpclient (>=2.3.1,<3.0.0)
|
|
34
33
|
Requires-Dist: gunicorn (>=22,<24)
|
|
35
34
|
Requires-Dist: imgkit (>=1.2.3,<2.0.0)
|
|
36
35
|
Requires-Dist: passlib (>=1.7.4,<2.0.0) ; extra == "server"
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from clue.common.logging import get_logger
|
|
6
|
+
from clue.config import config
|
|
7
|
+
from clue.constants.env import TESTING
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__file__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_apps_list() -> list[dict[str, str]]:
|
|
13
|
+
"""Get a list of apps from the discovery service
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
list[dict[str, str]]: A list of other apps
|
|
17
|
+
"""
|
|
18
|
+
apps: list[dict[str, Any]] = []
|
|
19
|
+
|
|
20
|
+
if TESTING:
|
|
21
|
+
return apps
|
|
22
|
+
|
|
23
|
+
if config.api.discover_url:
|
|
24
|
+
try:
|
|
25
|
+
resp = requests.get(config.api.discover_url, headers={"accept": "application/json"}, timeout=5)
|
|
26
|
+
|
|
27
|
+
if not resp.ok:
|
|
28
|
+
logger.warning(
|
|
29
|
+
"Invalid response %s from server for apps discovery: %s", resp.status_code, config.api.discover_url
|
|
30
|
+
)
|
|
31
|
+
return apps
|
|
32
|
+
|
|
33
|
+
data = resp.json()
|
|
34
|
+
for app in data["applications"]["application"]:
|
|
35
|
+
url = app["instance"][0]["hostName"]
|
|
36
|
+
|
|
37
|
+
if "clue" not in url:
|
|
38
|
+
apps.append(
|
|
39
|
+
{
|
|
40
|
+
"alt": app["instance"][0]["metadata"]["alternateText"],
|
|
41
|
+
"name": app["name"],
|
|
42
|
+
"img_d": app["instance"][0]["metadata"]["imageDark"],
|
|
43
|
+
"img_l": app["instance"][0]["metadata"]["imageLight"],
|
|
44
|
+
"route": url,
|
|
45
|
+
"classification": app["instance"][0]["metadata"]["classification"],
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
except Exception:
|
|
49
|
+
logger.exception(f"Failed to get apps from discover URL: {config.api.discover_url}")
|
|
50
|
+
|
|
51
|
+
return sorted(apps, key=lambda k: k["name"])
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
5
6
|
import time
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
|
|
@@ -10,6 +11,7 @@ from packaging.version import parse
|
|
|
10
11
|
|
|
11
12
|
from clue.common.logging import get_logger
|
|
12
13
|
from clue.common.uid import get_random_id
|
|
14
|
+
from clue.constants.env import TESTING
|
|
13
15
|
|
|
14
16
|
logger = get_logger(__file__)
|
|
15
17
|
|
|
@@ -91,7 +93,7 @@ def get_client(host, port, private, password=None):
|
|
|
91
93
|
|
|
92
94
|
if password:
|
|
93
95
|
logger.debug("Connecting to redis with password")
|
|
94
|
-
|
|
96
|
+
elif "pytest" not in sys.modules and not TESTING:
|
|
95
97
|
logger.warning("Connecting to redis without authentication.")
|
|
96
98
|
|
|
97
99
|
ssl_kwargs = {}
|
|
@@ -7,17 +7,14 @@ import time
|
|
|
7
7
|
from datetime import datetime, timedelta, timezone
|
|
8
8
|
from hashlib import sha256
|
|
9
9
|
from typing import Any, Optional
|
|
10
|
-
from urllib.parse import urlparse
|
|
11
10
|
|
|
12
|
-
import
|
|
13
|
-
from elasticapm.traces import Transaction, execution_context
|
|
11
|
+
from elasticapm.traces import Transaction, capture_span, execution_context
|
|
14
12
|
from flask import Request, request
|
|
15
13
|
from gevent import Greenlet
|
|
16
14
|
from gevent.pool import Pool
|
|
17
|
-
from geventhttpclient import HTTPClient
|
|
18
|
-
from geventhttpclient.response import HTTPResponse
|
|
19
15
|
from pydantic import BaseModel, ValidationError
|
|
20
|
-
from requests import Response
|
|
16
|
+
from requests import Response, Session
|
|
17
|
+
from requests.adapters import HTTPAdapter, Retry
|
|
21
18
|
|
|
22
19
|
from clue.common.exceptions import (
|
|
23
20
|
AuthenticationException,
|
|
@@ -35,32 +32,40 @@ from clue.models.selector import Selector
|
|
|
35
32
|
from clue.services import auth_service, type_service, user_service
|
|
36
33
|
|
|
37
34
|
logger = get_logger(__file__)
|
|
38
|
-
CLIENTS: dict[str,
|
|
35
|
+
CLIENTS: dict[str, Session] = {}
|
|
39
36
|
|
|
40
37
|
|
|
41
|
-
def get_client(base_url: str, timeout: float) ->
|
|
42
|
-
"""Gets or creates
|
|
38
|
+
def get_client(base_url: str, timeout: float) -> Session:
|
|
39
|
+
"""Gets or creates a requests session for the provided base_url.
|
|
43
40
|
|
|
44
41
|
Args:
|
|
45
42
|
base_url (str): The base url of the desired client.
|
|
46
43
|
timeout (float): The connection and network timeout to use (is multiplied by 3).
|
|
47
44
|
|
|
48
45
|
Returns:
|
|
49
|
-
|
|
46
|
+
Session: The requests Session instance matching the provided base_url.
|
|
50
47
|
"""
|
|
51
48
|
client_hash = sha256(base_url.encode())
|
|
52
49
|
client_hash.update(str(timeout).encode())
|
|
53
50
|
client_key = client_hash.hexdigest()
|
|
54
51
|
|
|
55
52
|
if client_key not in CLIENTS:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
53
|
+
session = Session()
|
|
54
|
+
# Configure connection pool with HTTPAdapter
|
|
55
|
+
pool_connections = math.floor(int(os.environ.get("EXECUTOR_THREADS", 32)) / 2)
|
|
56
|
+
|
|
57
|
+
retry_strategy = Retry(
|
|
58
|
+
total=2, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=["GET", "POST"]
|
|
59
|
+
)
|
|
60
|
+
adapter = HTTPAdapter(
|
|
61
|
+
pool_connections=pool_connections, pool_maxsize=pool_connections, max_retries=retry_strategy
|
|
62
62
|
)
|
|
63
63
|
|
|
64
|
+
session.mount("http://", adapter)
|
|
65
|
+
session.mount("https://", adapter)
|
|
66
|
+
|
|
67
|
+
CLIENTS[client_key] = session
|
|
68
|
+
|
|
64
69
|
return CLIENTS[client_key]
|
|
65
70
|
|
|
66
71
|
|
|
@@ -162,7 +167,7 @@ def parse_query_params(request: Request, limit: int = 10, timeout: float = 5.0):
|
|
|
162
167
|
|
|
163
168
|
def generate_params(
|
|
164
169
|
limit: int, timeout: float, no_annotation: bool = False, include_raw: bool = False, no_cache: bool = False
|
|
165
|
-
):
|
|
170
|
+
) -> dict[str, str | int | float | bool]:
|
|
166
171
|
"""Generates HTTP request parameters for a call to a source.
|
|
167
172
|
|
|
168
173
|
Args:
|
|
@@ -176,7 +181,7 @@ def generate_params(
|
|
|
176
181
|
str: A string of HTTP params formatted so that it can be appended to a url
|
|
177
182
|
(in the format "?param1=value1¶m2=value2")
|
|
178
183
|
"""
|
|
179
|
-
params = {
|
|
184
|
+
params: dict[str, str | int | float | bool] = {
|
|
180
185
|
"limit": limit,
|
|
181
186
|
"max_timeout": max(timeout * 0.95, 0.5),
|
|
182
187
|
"deadline": (datetime.now(timezone.utc) + timedelta(seconds=max(timeout * 0.95, 0.5))).timestamp(),
|
|
@@ -191,7 +196,7 @@ def generate_params(
|
|
|
191
196
|
if no_cache:
|
|
192
197
|
params["no_cache"] = True
|
|
193
198
|
|
|
194
|
-
return
|
|
199
|
+
return params
|
|
195
200
|
|
|
196
201
|
|
|
197
202
|
def process_exception(source_name: str, rsp: Response | None, exception: Exception):
|
|
@@ -252,7 +257,7 @@ def parse_response(source: ExternalSource, user: dict[str, Any], api_response: A
|
|
|
252
257
|
Returns:
|
|
253
258
|
list[QueryEntry]: The list of results contained in the response.
|
|
254
259
|
"""
|
|
255
|
-
with
|
|
260
|
+
with capture_span(source.name, "parsing"):
|
|
256
261
|
if isinstance(api_response, dict):
|
|
257
262
|
api_response = [api_response]
|
|
258
263
|
|
|
@@ -294,7 +299,7 @@ def parse_bulk_response(
|
|
|
294
299
|
if source.production:
|
|
295
300
|
logger.debug(f"Skipping validation for production source {source.name}")
|
|
296
301
|
|
|
297
|
-
with
|
|
302
|
+
with capture_span(f"{source.name}-bulk", "parsing"):
|
|
298
303
|
for type in api_response:
|
|
299
304
|
bulk_result.setdefault(type, {})
|
|
300
305
|
for value in api_response[type]:
|
|
@@ -379,7 +384,7 @@ def query_external(
|
|
|
379
384
|
|
|
380
385
|
finish_result = functools.partial(build_result, type_name, value, source)
|
|
381
386
|
|
|
382
|
-
with
|
|
387
|
+
with capture_span(query_external.__name__, span_type="greenlet"):
|
|
383
388
|
if type_name not in type_service.all_supported_types(user, access_token=access_token).get(source.name, {}):
|
|
384
389
|
return finish_result(error="invalid_type")
|
|
385
390
|
|
|
@@ -404,17 +409,19 @@ def query_external(
|
|
|
404
409
|
# perform the lookup, ensuring access controls are applied
|
|
405
410
|
url = f"{source.url}/lookup/{type_name}/{value}/"
|
|
406
411
|
response: Any = None
|
|
407
|
-
rsp:
|
|
412
|
+
rsp: Response | None = None
|
|
408
413
|
start = time.perf_counter()
|
|
409
414
|
try:
|
|
410
|
-
with
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
415
|
+
with capture_span(url, "http"):
|
|
416
|
+
rsp = get_client(source.url, timeout).get(
|
|
417
|
+
url,
|
|
418
|
+
params=generate_params(limit, timeout, no_annotation, include_raw, no_cache),
|
|
414
419
|
headers=generate_headers(access_token, clue_access_token),
|
|
420
|
+
timeout=(timeout, timeout * 3),
|
|
415
421
|
)
|
|
422
|
+
rsp.raise_for_status()
|
|
416
423
|
|
|
417
|
-
response = json
|
|
424
|
+
response = rsp.json()
|
|
418
425
|
except Exception as exception:
|
|
419
426
|
return finish_result(
|
|
420
427
|
error=process_exception(source.name, rsp, exception),
|
|
@@ -571,7 +578,7 @@ def bulk_query_external( # noqa: C901
|
|
|
571
578
|
if apm_transaction:
|
|
572
579
|
execution_context.set_transaction(apm_transaction)
|
|
573
580
|
|
|
574
|
-
with
|
|
581
|
+
with capture_span(bulk_query_external.__name__, span_type="greenlet"):
|
|
575
582
|
supported_types = type_service.all_supported_types(user, access_token=access_token).get(source.name, {})
|
|
576
583
|
bulk_result: dict[str, dict[str, QueryResult]] = {}
|
|
577
584
|
|
|
@@ -617,22 +624,23 @@ def bulk_query_external( # noqa: C901
|
|
|
617
624
|
url = f"{source.url}/lookup/"
|
|
618
625
|
response: Any = None
|
|
619
626
|
start = time.perf_counter()
|
|
620
|
-
rsp:
|
|
627
|
+
rsp: Response | None = None
|
|
621
628
|
try:
|
|
622
|
-
with
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
body=json.dumps([entry.model_dump(exclude_none=True, exclude_unset=True) for entry in data]),
|
|
629
|
+
with capture_span(url, "http"):
|
|
630
|
+
rsp = get_client(source.url, timeout).post(
|
|
631
|
+
url,
|
|
632
|
+
params=generate_params(limit, timeout, no_annotation, include_raw, no_cache),
|
|
633
|
+
json=[entry.model_dump(exclude_none=True, exclude_unset=True) for entry in data],
|
|
628
634
|
headers=generate_headers(access_token, clue_access_token),
|
|
635
|
+
timeout=(timeout * 3, timeout * 3),
|
|
629
636
|
)
|
|
637
|
+
rsp.raise_for_status()
|
|
630
638
|
|
|
631
639
|
if not rsp:
|
|
632
640
|
raise ClueRuntimeError(f"An error occurred when connecting to {source.name}.") # noqa: TRY301
|
|
633
641
|
|
|
634
642
|
logger.debug(f"{rsp.status_code}: {url}")
|
|
635
|
-
response = json
|
|
643
|
+
response = rsp.json()
|
|
636
644
|
except Exception as exception:
|
|
637
645
|
error = process_exception(source.name, rsp, exception)
|
|
638
646
|
finally:
|
|
@@ -7,6 +7,7 @@ from requests import exceptions
|
|
|
7
7
|
|
|
8
8
|
from clue.common.logging import get_logger
|
|
9
9
|
from clue.config import CLASSIFICATION, DEBUG, cache, config
|
|
10
|
+
from clue.constants.env import DISABLE_CACHE
|
|
10
11
|
from clue.constants.supported_types import SUPPORTED_TYPES
|
|
11
12
|
from clue.helper.headers import generate_headers
|
|
12
13
|
from clue.models.config import ExternalSource
|
|
@@ -57,7 +58,7 @@ def get_supported_types(source_url: str, access_token: str | None = None, obo_ac
|
|
|
57
58
|
"""
|
|
58
59
|
url = f"{source_url}{'' if source_url.endswith('/') else '/'}types/"
|
|
59
60
|
|
|
60
|
-
if result := CACHE.get(url):
|
|
61
|
+
if not DISABLE_CACHE and (result := CACHE.get(url)):
|
|
61
62
|
logger.info("Cache hit for url %s", url)
|
|
62
63
|
return result
|
|
63
64
|
|
|
@@ -8,7 +8,7 @@ line-length = 120
|
|
|
8
8
|
# coverage.py settings #
|
|
9
9
|
########################
|
|
10
10
|
[tool.coverage.run]
|
|
11
|
-
omit = ["clue/gunicorn_config.py", "
|
|
11
|
+
omit = ["clue/gunicorn_config.py", "test/utils/*"]
|
|
12
12
|
branch = true
|
|
13
13
|
sigterm = true
|
|
14
14
|
data_file = ".coverage.server"
|
|
@@ -76,9 +76,6 @@ line-length = 120
|
|
|
76
76
|
indent-width = 4
|
|
77
77
|
target-version = "py312"
|
|
78
78
|
|
|
79
|
-
[tool.ruff.format]
|
|
80
|
-
exclude = ["clue/patched.py"]
|
|
81
|
-
|
|
82
79
|
[tool.ruff.lint]
|
|
83
80
|
select = [
|
|
84
81
|
"E",
|
|
@@ -112,7 +109,6 @@ ignore = [
|
|
|
112
109
|
"TRY300",
|
|
113
110
|
]
|
|
114
111
|
exclude = [
|
|
115
|
-
"clue/patched.py",
|
|
116
112
|
"plugins/assemblyline_lookup/app.py",
|
|
117
113
|
"plugins/**/test*.py",
|
|
118
114
|
]
|
|
@@ -146,7 +142,7 @@ log_cli_level = "WARN"
|
|
|
146
142
|
[tool.poetry]
|
|
147
143
|
package-mode = true
|
|
148
144
|
name = "clue-api"
|
|
149
|
-
version = "1.3.0.
|
|
145
|
+
version = "1.3.0.dev34"
|
|
150
146
|
description = "Clue distributed enrichment service"
|
|
151
147
|
authors = ["Canadian Centre for Cyber Security <contact@cyber.gc.ca>"]
|
|
152
148
|
license = "MIT"
|
|
@@ -184,7 +180,6 @@ flask-caching = "^2.1.0"
|
|
|
184
180
|
gunicorn = ">=22,<24"
|
|
185
181
|
gevent = "^24.2.1"
|
|
186
182
|
pydantic = "^2.7.1"
|
|
187
|
-
geventhttpclient = "^2.3.1"
|
|
188
183
|
pydantic-settings = { extras = ["yaml"], version = "^2.3.4" }
|
|
189
184
|
redis = { version = "^5.0.3" }
|
|
190
185
|
beautifulsoup4 = "^4.13.3"
|
|
@@ -232,7 +227,7 @@ name = "pypi"
|
|
|
232
227
|
priority = "supplemental"
|
|
233
228
|
|
|
234
229
|
[tool.poetry.scripts]
|
|
235
|
-
server = "clue.
|
|
230
|
+
server = "clue.app:main"
|
|
236
231
|
test = "build_scripts.run_tests:main"
|
|
237
232
|
last_success = "build_scripts.last_success:main"
|
|
238
233
|
check_changes = "build_scripts.check_changes:main"
|
|
@@ -261,6 +256,7 @@ types-pyyaml = "^6.0.12.20240311"
|
|
|
261
256
|
types-pytz = "^2024.1.0.20240203"
|
|
262
257
|
types-redis = "^4.6.0.20241004"
|
|
263
258
|
types-dateparser = "^1.2.0.20250208"
|
|
259
|
+
types-requests = "^2.32.4.20250913"
|
|
264
260
|
|
|
265
261
|
[build-system]
|
|
266
262
|
requires = ["poetry-core"]
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
import geventhttpclient
|
|
5
|
-
|
|
6
|
-
from clue.common.logging import get_logger
|
|
7
|
-
from clue.config import config
|
|
8
|
-
|
|
9
|
-
logger = get_logger(__file__)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def get_apps_list() -> list[dict[str, str]]:
|
|
13
|
-
"""Get a list of apps from the discovery service
|
|
14
|
-
|
|
15
|
-
Returns:
|
|
16
|
-
list[dict[str, str]]: A list of other apps
|
|
17
|
-
"""
|
|
18
|
-
apps = []
|
|
19
|
-
|
|
20
|
-
if "pytest" in sys.modules or bool(os.getenv("SKIP_DISCOVERY", "")):
|
|
21
|
-
logger.info("Skipping discovery, running in a test environment")
|
|
22
|
-
|
|
23
|
-
if config.api.discover_url:
|
|
24
|
-
try:
|
|
25
|
-
resp = geventhttpclient.get(
|
|
26
|
-
config.api.discover_url,
|
|
27
|
-
headers={"accept": "application/json"},
|
|
28
|
-
)
|
|
29
|
-
if resp.ok:
|
|
30
|
-
data = resp.json()
|
|
31
|
-
for app in data["applications"]["application"]:
|
|
32
|
-
try:
|
|
33
|
-
url = app["instance"][0]["hostName"]
|
|
34
|
-
|
|
35
|
-
if "clue" not in url:
|
|
36
|
-
apps.append(
|
|
37
|
-
{
|
|
38
|
-
"alt": app["instance"][0]["metadata"]["alternateText"],
|
|
39
|
-
"name": app["name"],
|
|
40
|
-
"img_d": app["instance"][0]["metadata"]["imageDark"],
|
|
41
|
-
"img_l": app["instance"][0]["metadata"]["imageLight"],
|
|
42
|
-
"route": url,
|
|
43
|
-
"classification": app["instance"][0]["metadata"]["classification"],
|
|
44
|
-
}
|
|
45
|
-
)
|
|
46
|
-
except Exception:
|
|
47
|
-
logger.exception(f"Failed to parse get app: {str(app)}")
|
|
48
|
-
else:
|
|
49
|
-
logger.warning(f"Invalid response from server for apps discovery: {config.api.discover_url}")
|
|
50
|
-
except Exception:
|
|
51
|
-
logger.exception(f"Failed to get apps from discover URL: {config.api.discover_url}")
|
|
52
|
-
|
|
53
|
-
return sorted(apps, key=lambda k: k["name"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|