howler-api 2.10.0.dev255__py3-none-any.whl → 2.13.0.dev344__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of howler-api might be problematic. Click here for more details.

howler/api/__init__.py CHANGED
@@ -32,7 +32,7 @@ def _make_api_response(
32
32
  ) -> Response:
33
33
  quota_user = flsk_session.pop("quota_user", None)
34
34
  quota_set = flsk_session.pop("quota_set", False)
35
- if quota_user and quota_set and not request.path.startswith("/api/v1/borealis"):
35
+ if quota_user and quota_set and not request.path.startswith("/api/v1/clue"):
36
36
  QUOTA_TRACKER.end(quota_user)
37
37
 
38
38
  if type(err) is Exception: # pragma: no cover
howler/api/v1/auth.py CHANGED
@@ -131,7 +131,7 @@ def add_apikey(**kwargs): # noqa: C901
131
131
  key_name = apikey_data["name"] if "I" not in privs else f"impersonate_{apikey_data['name']}"
132
132
 
133
133
  new_key = {
134
- "password": bcrypt.encrypt(random_pass),
134
+ "password": bcrypt.hash(random_pass),
135
135
  "agents": apikey_data.get("agents", []),
136
136
  "acl": privs,
137
137
  }
@@ -14,9 +14,9 @@ from howler.config import cache, config
14
14
  from howler.plugins import get_plugins
15
15
  from howler.security import api_login
16
16
 
17
- SUB_API = "borealis"
18
- borealis_api = make_subapi_blueprint(SUB_API, api_version=1)
19
- borealis_api._doc = "Proxy enrichment requests to borealis"
17
+ SUB_API = "clue"
18
+ clue_api = make_subapi_blueprint(SUB_API, api_version=1)
19
+ clue_api._doc = "Proxy enrichment requests to clue"
20
20
 
21
21
  logger = get_logger(__file__)
22
22
 
@@ -28,27 +28,27 @@ def skip_cache(*args):
28
28
 
29
29
  @cache.memoize(15 * 60, unless=skip_cache)
30
30
  def get_token(access_token: str) -> str:
31
- """Get a borealis token based on the current howler token"""
32
- get_borealis_token: Optional[Callable[[str], str]] = None
31
+ """Get a clue token based on the current howler token"""
32
+ get_clue_token: Optional[Callable[[str], str]] = None
33
33
 
34
34
  for plugin in get_plugins():
35
- if get_borealis_token := plugin.modules.token_functions.get("borealis", None):
35
+ if get_clue_token := plugin.modules.token_functions.get("clue", None):
36
36
  break
37
37
 
38
- if get_borealis_token:
39
- borealis_access_token = get_borealis_token(access_token)
38
+ if get_clue_token:
39
+ clue_access_token = get_clue_token(access_token)
40
40
  else:
41
- logger.info("No custom borealis token logic provided, continuing with howler credentials")
42
- borealis_access_token = access_token
41
+ logger.info("No custom clue token logic provided, continuing with howler credentials")
42
+ clue_access_token = access_token
43
43
 
44
- return borealis_access_token
44
+ return clue_access_token
45
45
 
46
46
 
47
47
  @generate_swagger_docs()
48
- @borealis_api.route("/<path:path>", methods=["GET", "POST"])
48
+ @clue_api.route("/<path:path>", methods=["GET", "POST"])
49
49
  @api_login(required_priv=["R"], required_method=["oauth"])
50
- def proxy_to_borealis(path, **kwargs):
51
- """Proxy enrichment requests to Borealis
50
+ def proxy_to_clue(path, **kwargs):
51
+ """Proxy enrichment requests to Clue
52
52
 
53
53
  Variables:
54
54
  None
@@ -60,11 +60,9 @@ def proxy_to_borealis(path, **kwargs):
60
60
  Any
61
61
 
62
62
  Result Example:
63
- Borealis Responses
63
+ Clue Responses
64
64
  """
65
- logger.info(
66
- "Proxying borealis request to path %s/%s?%s", config.core.borealis.url, path, request.query_string.decode()
67
- )
65
+ logger.info("Proxying clue request to path %s/%s?%s", config.core.clue.url, path, request.query_string.decode())
68
66
 
69
67
  auth_data: Optional[str] = request.headers.get("Authorization", None, type=str)
70
68
 
@@ -73,29 +71,29 @@ def proxy_to_borealis(path, **kwargs):
73
71
 
74
72
  auth_token = auth_data.split(" ")[1]
75
73
 
76
- borealis_token = get_token(auth_token)
74
+ clue_token = get_token(auth_token)
77
75
 
78
76
  start = time.perf_counter()
79
- with elasticapm.capture_span("borealis", span_type="http"):
77
+ with elasticapm.capture_span("clue", span_type="http"):
80
78
  if request.method.lower() == "get":
81
79
  response = requests.get(
82
- f"{config.core.borealis.url}/{path}",
83
- headers={"Authorization": f"Bearer {borealis_token}", "Accept": "application/json"},
80
+ f"{config.core.clue.url}/{path}",
81
+ headers={"Authorization": f"Bearer {clue_token}", "Accept": "application/json"},
84
82
  params=request.args.to_dict(),
85
83
  timeout=5 * 60,
86
84
  )
87
85
  else:
88
86
  response = requests.post(
89
- f"{config.core.borealis.url}/{path}",
87
+ f"{config.core.clue.url}/{path}",
90
88
  json=request.json,
91
- headers={"Authorization": f"Bearer {borealis_token}", "Accept": "application/json"},
89
+ headers={"Authorization": f"Bearer {clue_token}", "Accept": "application/json"},
92
90
  params=request.args.to_dict(),
93
91
  timeout=5 * 60,
94
92
  )
95
93
 
96
- logger.debug(f"Request to borealis completed in {round(time.perf_counter() - start)}ms")
94
+ logger.debug(f"Request to clue completed in {round(time.perf_counter() - start)}ms")
97
95
 
98
96
  if not response.ok:
99
- return bad_gateway(response.json(), err="Something went wrong when connecting to borealis")
97
+ return bad_gateway(response.json(), err="Something went wrong when connecting to clue")
100
98
 
101
99
  return ok(response.json()["api_response"])
howler/api/v1/dossier.py CHANGED
@@ -1,15 +1,6 @@
1
1
  from flask import request
2
2
 
3
- from howler.api import (
4
- bad_request,
5
- created,
6
- forbidden,
7
- internal_error,
8
- make_subapi_blueprint,
9
- no_content,
10
- not_found,
11
- ok,
12
- )
3
+ from howler.api import bad_request, created, forbidden, internal_error, make_subapi_blueprint, no_content, not_found, ok
13
4
  from howler.common.exceptions import ForbiddenException, HowlerException, InvalidDataException, NotFoundException
14
5
  from howler.common.loader import datastore
15
6
  from howler.common.logging import get_logger
@@ -17,7 +8,7 @@ from howler.common.swagger import generate_swagger_docs
17
8
  from howler.odm.models.dossier import Dossier
18
9
  from howler.odm.models.user import User
19
10
  from howler.security import api_login
20
- from howler.services import dossier_service, lucene_service
11
+ from howler.services import dossier_service
21
12
 
22
13
  SUB_API = "dossier"
23
14
  dossier_api = make_subapi_blueprint(SUB_API, api_version=1)
@@ -141,29 +132,14 @@ def get_dossier_for_hit(id: str, user: User, **kwargs):
141
132
  """
142
133
  storage = datastore()
143
134
  try:
144
- response = storage.hit.search(f"howler.id:{id}", rows=1)
135
+ response = storage.hit.search(f"howler.id:{id}", rows=1, as_obj=False)
145
136
 
146
137
  if response["total"] < 1:
147
138
  return not_found(err="Hit does not exist.")
148
139
 
149
140
  hit = response["items"][0]
150
141
 
151
- results: list[Dossier] = storage.dossier.search(
152
- "dossier_id:*",
153
- as_obj=True,
154
- rows=1000,
155
- )["items"]
156
-
157
- matching_dossiers: list[Dossier] = []
158
- for dossier in results:
159
- if dossier.query is None:
160
- matching_dossiers.append(dossier)
161
- continue
162
-
163
- if lucene_service.match(dossier.query, hit.as_primitives()):
164
- matching_dossiers.append(dossier)
165
-
166
- return ok(matching_dossiers)
142
+ return ok(dossier_service.get_matching_dossiers(hit))
167
143
  except ValueError as e:
168
144
  return bad_request(err=str(e))
169
145
 
howler/api/v1/hit.py CHANGED
@@ -17,11 +17,7 @@ from howler.api import (
17
17
  ok,
18
18
  )
19
19
  from howler.api.v1.utils.etag import add_etag
20
- from howler.common.exceptions import (
21
- HowlerException,
22
- HowlerValueError,
23
- InvalidDataException,
24
- )
20
+ from howler.common.exceptions import HowlerException, HowlerValueError, InvalidDataException
25
21
  from howler.common.loader import datastore
26
22
  from howler.common.logging import get_logger
27
23
  from howler.common.swagger import generate_swagger_docs
@@ -252,7 +248,7 @@ def validate_hits(**kwargs):
252
248
  @generate_swagger_docs()
253
249
  @hit_api.route("/<id>", methods=["GET"])
254
250
  @api_login(audit=False, required_priv=["R"])
255
- @add_etag(getter=hit_service.get_hit, check_if_match=False)
251
+ @add_etag(getter=hit_service.get_hit)
256
252
  def get_hit(id: str, server_version: str, **kwargs):
257
253
  """Get a hit.
258
254
 
@@ -265,11 +261,19 @@ def get_hit(id: str, server_version: str, **kwargs):
265
261
  Result Example:
266
262
  https://github.com/CybercentreCanada/howler-api/blob/main/howler/odm/models/hit.py
267
263
  """
268
- hit = cast(Optional[Hit], kwargs.get("cached_hit"))
264
+ hit = cast(Optional[Any], kwargs.get("cached_hit"))
269
265
 
270
266
  if not hit:
271
267
  return not_found(err="Hit %s does not exist" % id)
272
268
 
269
+ if "metadata" in request.args:
270
+ metadata = (request.args.get("metadata", type=str) or "").split(",")
271
+
272
+ hit = hit.as_primitives()
273
+
274
+ if len(metadata) > 0:
275
+ hit_service.augment_metadata(hit, metadata, kwargs["user"])
276
+
273
277
  return ok(hit), server_version
274
278
 
275
279
 
howler/api/v1/search.py CHANGED
@@ -1,6 +1,9 @@
1
- from typing import Union
1
+ import re
2
+ from copy import deepcopy
3
+ from typing import Any, Union
2
4
 
3
5
  from elasticsearch import BadRequestError
6
+ from elasticsearch._sync.client.indices import IndicesClient
4
7
  from flask import request
5
8
  from sigma.backends.elasticsearch import LuceneBackend
6
9
  from sigma.rule import SigmaRule
@@ -12,13 +15,9 @@ from howler.common.loader import datastore
12
15
  from howler.common.logging import get_logger
13
16
  from howler.common.swagger import generate_swagger_docs
14
17
  from howler.datastore.exceptions import SearchException
15
- from howler.helper.search import (
16
- get_collection,
17
- get_default_sort,
18
- has_access_control,
19
- list_all_fields,
20
- )
18
+ from howler.helper.search import get_collection, get_default_sort, has_access_control, list_all_fields
21
19
  from howler.security import api_login
20
+ from howler.services import hit_service, lucene_service
22
21
 
23
22
  SUB_API = "search"
24
23
  search_api = make_subapi_blueprint(SUB_API, api_version=1)
@@ -78,16 +77,18 @@ def search(index, **kwargs):
78
77
  timeout => Maximum execution time (ms)
79
78
  use_archive => Allow access to the datastore achive (Default: False)
80
79
  track_total_hits => Track the total number of query matches, instead of stopping at 10000 (Default: False)
80
+ metadata => A list of additional features to be added to the result alongside the raw results
81
81
 
82
82
  Data Block:
83
83
  # Note that the data block is for POST requests only!
84
- {"query": "query", # Query to search for
85
- "offset": 0, # Offset in the results
86
- "rows": 100, # Max number of results
87
- "sort": "field asc", # How to sort the results
88
- "fl": "id,score", # List of fields to return
89
- "timeout": 1000, # Maximum execution time (ms)
90
- "filters": ['fq']} # List of additional filter queries limit the data
84
+ {"query": "query", # Query to search for
85
+ "offset": 0, # Offset in the results
86
+ "rows": 100, # Max number of results
87
+ "sort": "field asc", # How to sort the results
88
+ "fl": "id,score", # List of fields to return
89
+ "timeout": 1000, # Maximum execution time (ms)
90
+ "filters": ['fq'], # List of additional filter queries limit the data
91
+ "metadata": ["dossiers"]} # List of additional features to add to the search
91
92
 
92
93
 
93
94
  Result Example:
@@ -113,7 +114,7 @@ def search(index, **kwargs):
113
114
  "deep_paging_id",
114
115
  "track_total_hits",
115
116
  ]
116
- multi_fields = ["filters"]
117
+ multi_fields = ["filters", "metadata"]
117
118
  boolean_fields = ["use_archive"]
118
119
 
119
120
  params, req_data = generate_params(request, fields, multi_fields)
@@ -137,11 +138,87 @@ def search(index, **kwargs):
137
138
  return bad_request(err="There was no search query.")
138
139
 
139
140
  try:
140
- return ok(collection().search(query, **params))
141
+ metadata = params.pop("metadata", [])
142
+ result = collection().search(query, **params)
143
+
144
+ if index == "hit" and len(metadata) > 0:
145
+ hit_service.augment_metadata(result["items"], metadata, user)
146
+
147
+ return ok(result)
141
148
  except (SearchException, BadRequestError) as e:
142
149
  return bad_request(err=f"SearchException: {e}")
143
150
 
144
151
 
152
+ @generate_swagger_docs()
153
+ @search_api.route("/<index>/explain", methods=["GET", "POST"])
154
+ @api_login(required_priv=["R"])
155
+ def explain_query(index, **kwargs):
156
+ """Search through specified index for a given Lucene query. Uses Lucene search syntax for query.
157
+
158
+ Variables:
159
+ index => Index to explain against (hit, user,...)
160
+
161
+ Arguments:
162
+ query => Lucene Query to explain
163
+
164
+ Data Block:
165
+ # Note that the data block is for POST requests only!
166
+ {
167
+ "query": "id:*", # Lucene Query to explain
168
+ }
169
+
170
+
171
+ Result Example:
172
+ {
173
+ 'valid': True,
174
+ 'explanations': [
175
+ {
176
+ 'valid': True,
177
+ 'explanation': 'ConstantScore(FieldExistsQuery [field=id])'
178
+ }
179
+ ]
180
+ }
181
+ """
182
+ user = kwargs["user"]
183
+ collection = get_collection(index, user)
184
+
185
+ if collection is None:
186
+ return bad_request(err=f"Not a valid index to explain: {index}")
187
+
188
+ fields = ["query"]
189
+ multi_fields: list[str] = []
190
+
191
+ params, req_data = generate_params(request, fields, multi_fields)
192
+
193
+ params["as_obj"] = False
194
+
195
+ query = req_data.get("query", None)
196
+ if not query:
197
+ return bad_request(err="There was no query.")
198
+
199
+ # This regex checks for lucene phrases (i.e. the "Example Analytic" part of howler.analytic:"Example Analytic")
200
+ # And then escapes them.
201
+ # https://regex101.com/r/8u5F6a/1
202
+ escaped_lucene = re.sub(r'((:\()?(".+?")(\)?))', lucene_service.replace_lucene_phrase, query)
203
+
204
+ try:
205
+ indices_client = IndicesClient(datastore().hit.datastore.client)
206
+
207
+ result = deepcopy(
208
+ indices_client.validate_query(q=escaped_lucene, explain=True, index=collection().index_name).body
209
+ )
210
+
211
+ del result["_shards"]
212
+
213
+ for explanation in result["explanations"]:
214
+ del explanation["index"]
215
+
216
+ return ok(result)
217
+ except Exception as e:
218
+ logger.exception("Exception on query explanation")
219
+ return bad_request(err=f"Exception: {e}")
220
+
221
+
145
222
  @generate_swagger_docs()
146
223
  @search_api.route("/<index>/eql", methods=["GET", "POST"])
147
224
  @api_login(required_priv=["R"])
@@ -458,10 +535,76 @@ def count(index, **kwargs):
458
535
  return bad_request(err=f"SearchException: {e}")
459
536
 
460
537
 
538
+ @generate_swagger_docs()
539
+ @search_api.route("/facet/<index>", methods=["GET", "POST"])
540
+ @api_login(required_priv=["R"])
541
+ def facet(index, **kwargs):
542
+ """Perform field analysis on the selected fields. (Also known as facetting in lucene).
543
+
544
+ This essentially counts the number of instances a field is seen with each specific
545
+ values where the documents matches the specified queries.
546
+
547
+ Variables:
548
+ index => Index to search in (hit, user,...)
549
+
550
+ Optional Arguments:
551
+ query => Query to search for
552
+ mincount => Minimum item count for the fieldvalue to be returned
553
+ rows => The max number of fieldvalues to return
554
+ filters => Additional query to limit to output
555
+ fields => Field to analyse
556
+
557
+ Data Block:
558
+ # Note that the data block is for POST requests only!
559
+ {"fields": ["howler.id", ...]
560
+ "query": "id:*",
561
+ "mincount": "10",
562
+ "rows": "10",
563
+ "filters": ['fq']}
564
+
565
+ Result Example:
566
+ {
567
+ "howler.id": { # Facetting results
568
+ "value_0": 2,
569
+ ...
570
+ "value_N": 19,
571
+ },
572
+ ...
573
+ }
574
+ """
575
+ user = kwargs["user"]
576
+ collection = get_collection(index, user)
577
+ if collection is None:
578
+ return bad_request(err=f"Not a valid index to search in: {index}")
579
+
580
+ fields = ["query", "mincount", "rows"]
581
+ multi_fields = ["filters", "fields"]
582
+
583
+ params = generate_params(request, fields, multi_fields)[0]
584
+
585
+ if has_access_control(index):
586
+ params.update({"access_control": user["access_control"]})
587
+
588
+ try:
589
+ fields = params.pop("fields")
590
+ facet_result: dict[str, dict[str, Any]] = {}
591
+ for field in fields:
592
+ if field not in collection().fields():
593
+ logger.warning("Invalid field %s requested for faceting, skipping", field)
594
+ continue
595
+
596
+ facet_result[field] = collection().facet(field, **params)
597
+
598
+ return ok(facet_result)
599
+ except (SearchException, BadRequestError) as e:
600
+ logger.error("SearchException: %s", str(e), exc_info=True)
601
+ return bad_request(err=f"SearchException: {e}")
602
+
603
+
461
604
  @generate_swagger_docs()
462
605
  @search_api.route("/facet/<index>/<field>", methods=["GET", "POST"])
463
606
  @api_login(required_priv=["R"])
464
- def facet(index, field, **kwargs):
607
+ def facet_field(index, field, **kwargs):
465
608
  """Perform field analysis on the selected field. (Also known as facetting in lucene).
466
609
 
467
610
  This essentially counts the number of instances a field is seen with each specific
howler/api/v1/user.py CHANGED
@@ -145,7 +145,7 @@ def add_user_account(username, **_):
145
145
  @generate_swagger_docs()
146
146
  @user_api.route("/<username>", methods=["GET"])
147
147
  @api_login(audit=False, required_priv=["R"])
148
- @add_etag(getter=user_service.get_user, check_if_match=False)
148
+ @add_etag(getter=user_service.get_user, check_if_match=True)
149
149
  def get_user_account(username: str, server_version: Optional[str] = None, **kwargs):
150
150
  """Load the user account information.
151
151
 
@@ -327,7 +327,7 @@ def get_user_avatar(username, **_):
327
327
  resp.headers["ETag"] = sha256(avatar.encode("utf-8")).hexdigest()
328
328
  return resp
329
329
  else:
330
- return not_found(err="No avatar for specified user")
330
+ return no_content()
331
331
 
332
332
 
333
333
  @generate_swagger_docs()
@@ -1,3 +1,10 @@
1
+ """ETag utility module for handling HTTP ETags in Flask responses.
2
+
3
+ ETags (Entity Tags) are HTTP headers used for web cache validation and conditional requests.
4
+ They help optimize performance by allowing clients to cache responses and only fetch
5
+ new data when the resource has actually changed.
6
+ """
7
+
1
8
  import functools
2
9
  import re
3
10
 
@@ -6,38 +13,69 @@ from flask import Response, request
6
13
  from howler.api import not_modified
7
14
 
8
15
 
9
- def add_etag(getter, check_if_match=False):
10
- """Decorator to add etag handling to a flask response"""
16
+ def add_etag(getter, check_if_match=True):
17
+ """Decorator to add ETag handling to a Flask response.
18
+
19
+ This decorator implements HTTP ETag functionality for API endpoints, enabling:
20
+ - Conditional requests using If-Match headers
21
+ - Cache validation to prevent unnecessary data transfers
22
+ - Version tracking for resources
23
+
24
+ Args:
25
+ getter: Function that retrieves the object and its version
26
+ check_if_match (bool): Whether to check If-Match headers for conditional requests
27
+
28
+ Returns:
29
+ Decorated function with ETag support
30
+ """
11
31
 
12
32
  def wrapper(f):
33
+ """Inner wrapper function that applies ETag functionality to the decorated function."""
34
+
13
35
  @functools.wraps(f)
14
36
  def generate_etag(*args, **kwargs):
37
+ """Generate and handle ETags for the HTTP response."""
38
+ # Retrieve the object and its version using the provided getter function
39
+ # The getter should return (object, version) tuple
15
40
  obj, version = getter(
16
41
  kwargs.get("id", kwargs.get("username", None)),
17
42
  as_odm=True,
18
43
  version=True,
19
44
  )
45
+
46
+ # Handle conditional requests with If-Match header
47
+ # If the client's version matches the current version and it's a GET request
48
+ # without metadata parameter, return 304 Not Modified to save bandwidth
20
49
  if (
21
- not check_if_match
50
+ check_if_match
22
51
  and "If-Match" in request.headers
23
52
  and request.headers["If-Match"] == version
24
53
  and request.method == "GET"
54
+ and "metadata" not in request.args
25
55
  ):
26
56
  return not_modified()
27
57
 
58
+ # Extract the resource type from the API path and create a cache key
59
+ # e.g., "/api/v1/users/123" becomes "cached_users"
28
60
  key = re.sub(r"^\/api\/v\d+\/(\w+)\/.+$", r"cached_\1", request.path)
29
61
  kwargs[key] = obj
30
62
 
63
+ # Call the original function with the cached object and version
31
64
  values = f(*args, server_version=version, **kwargs)
32
65
 
33
- # If there is only one return, Its just the response
66
+ # Handle different return value formats from the decorated function
67
+ # If there is only one return, it's just the response
34
68
  if isinstance(values, Response):
69
+ # Only add ETag header for successful responses (not 409 Conflict or 400 Bad Request)
35
70
  if values.status_code != 409 and values.status_code != 400:
36
71
  values.headers["ETag"] = version
37
72
  return values
38
- # If there is two returns, its the response and the new version
73
+
74
+ # If there are two returns, it's the response and the new version
75
+ # This happens when the function modifies the resource and returns an updated version
39
76
  else:
40
77
  if values[0].status_code != 409 and values[0].status_code != 400:
78
+ # Add the new ETag version to successful responses
41
79
  values[0].headers["ETag"] = values[1]
42
80
  return values[0]
43
81