PyPI - howler-api - Versions diffs - 2.10.0.dev255__py3-none-any.whl → 2.13.0.dev344__py3-none-any.whl - Mend

howler-api 2.10.0.dev255py3-none-any.whl → 2.13.0.dev344py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of howler-api might be problematic. Click here for more details.

Files changed (35) hide show

howler/api/__init__.py +1 -1
howler/api/v1/auth.py +1 -1
howler/api/v1/{borealis.py → clue.py} +24 -26
howler/api/v1/dossier.py +4 -28
howler/api/v1/hit.py +11 -7
howler/api/v1/search.py +160 -17
howler/api/v1/user.py +2 -2
howler/api/v1/utils/etag.py +43 -5
howler/api/v1/view.py +26 -34
howler/app.py +4 -4
howler/cronjobs/view_cleanup.py +88 -0
howler/datastore/README.md +0 -2
howler/datastore/collection.py +109 -132
howler/datastore/howler_store.py +0 -45
howler/datastore/store.py +25 -6
howler/odm/base.py +1 -1
howler/odm/helper.py +9 -6
howler/odm/models/config.py +168 -8
howler/odm/models/howler_data.py +2 -1
howler/odm/models/lead.py +1 -10
howler/odm/models/pivot.py +2 -11
howler/odm/random_data.py +1 -1
howler/security/__init__.py +2 -2
howler/services/analytic_service.py +31 -0
howler/services/config_service.py +2 -2
howler/services/dossier_service.py +140 -7
howler/services/hit_service.py +317 -72
howler/services/lucene_service.py +14 -7
howler/services/overview_service.py +44 -0
howler/services/template_service.py +45 -0
howler/utils/lucene.py +22 -2
{howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/METADATA +5 -5
{howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/RECORD +35 -32
{howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/WHEEL +1 -1
{howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/entry_points.txt +0 -0

howler/api/__init__.py CHANGED Viewed

@@ -32,7 +32,7 @@ def _make_api_response(
 ) -> Response:
     quota_user = flsk_session.pop("quota_user", None)
     quota_set = flsk_session.pop("quota_set", False)
-    if quota_user and quota_set and not request.path.startswith("/api/v1/borealis"):
+    if quota_user and quota_set and not request.path.startswith("/api/v1/clue"):
         QUOTA_TRACKER.end(quota_user)
     if type(err) is Exception:  # pragma: no cover

howler/api/v1/auth.py CHANGED Viewed

@@ -131,7 +131,7 @@ def add_apikey(**kwargs):  # noqa: C901
         key_name = apikey_data["name"] if "I" not in privs else f"impersonate_{apikey_data['name']}"
         new_key = {
-            "password": bcrypt.encrypt(random_pass),
+            "password": bcrypt.hash(random_pass),
             "agents": apikey_data.get("agents", []),
             "acl": privs,
         }

howler/api/v1/{borealis.py → clue.py} RENAMED Viewed

@@ -14,9 +14,9 @@ from howler.config import cache, config
 from howler.plugins import get_plugins
 from howler.security import api_login
-SUB_API = "borealis"
-borealis_api = make_subapi_blueprint(SUB_API, api_version=1)
-borealis_api._doc = "Proxy enrichment requests to borealis"
+SUB_API = "clue"
+clue_api = make_subapi_blueprint(SUB_API, api_version=1)
+clue_api._doc = "Proxy enrichment requests to clue"
 logger = get_logger(__file__)
@@ -28,27 +28,27 @@ def skip_cache(*args):
 @cache.memoize(15 * 60, unless=skip_cache)
 def get_token(access_token: str) -> str:
-    """Get a borealis token based on the current howler token"""
-    get_borealis_token: Optional[Callable[[str], str]] = None
+    """Get a clue token based on the current howler token"""
+    get_clue_token: Optional[Callable[[str], str]] = None
     for plugin in get_plugins():
-        if get_borealis_token := plugin.modules.token_functions.get("borealis", None):
+        if get_clue_token := plugin.modules.token_functions.get("clue", None):
             break
-    if get_borealis_token:
-        borealis_access_token = get_borealis_token(access_token)
+    if get_clue_token:
+        clue_access_token = get_clue_token(access_token)
     else:
-        logger.info("No custom borealis token logic provided, continuing with howler credentials")
-        borealis_access_token = access_token
+        logger.info("No custom clue token logic provided, continuing with howler credentials")
+        clue_access_token = access_token
-    return borealis_access_token
+    return clue_access_token
 @generate_swagger_docs()
-@borealis_api.route("/<path:path>", methods=["GET", "POST"])
+@clue_api.route("/<path:path>", methods=["GET", "POST"])
 @api_login(required_priv=["R"], required_method=["oauth"])
-def proxy_to_borealis(path, **kwargs):
-    """Proxy enrichment requests to Borealis
+def proxy_to_clue(path, **kwargs):
+    """Proxy enrichment requests to Clue
     Variables:
     None
@@ -60,11 +60,9 @@ def proxy_to_borealis(path, **kwargs):
     Any
     Result Example:
-    Borealis Responses
+    Clue Responses
     """
-    logger.info(
-        "Proxying borealis request to path %s/%s?%s", config.core.borealis.url, path, request.query_string.decode()
-    )
+    logger.info("Proxying clue request to path %s/%s?%s", config.core.clue.url, path, request.query_string.decode())
     auth_data: Optional[str] = request.headers.get("Authorization", None, type=str)
@@ -73,29 +71,29 @@ def proxy_to_borealis(path, **kwargs):
     auth_token = auth_data.split(" ")[1]
-    borealis_token = get_token(auth_token)
+    clue_token = get_token(auth_token)
     start = time.perf_counter()
-    with elasticapm.capture_span("borealis", span_type="http"):
+    with elasticapm.capture_span("clue", span_type="http"):
         if request.method.lower() == "get":
             response = requests.get(
-                f"{config.core.borealis.url}/{path}",
-                headers={"Authorization": f"Bearer {borealis_token}", "Accept": "application/json"},
+                f"{config.core.clue.url}/{path}",
+                headers={"Authorization": f"Bearer {clue_token}", "Accept": "application/json"},
                 params=request.args.to_dict(),
                 timeout=5 * 60,
             )
         else:
             response = requests.post(
-                f"{config.core.borealis.url}/{path}",
+                f"{config.core.clue.url}/{path}",
                 json=request.json,
-                headers={"Authorization": f"Bearer {borealis_token}", "Accept": "application/json"},
+                headers={"Authorization": f"Bearer {clue_token}", "Accept": "application/json"},
                 params=request.args.to_dict(),
                 timeout=5 * 60,
             )
-    logger.debug(f"Request to borealis completed in {round(time.perf_counter() - start)}ms")
+    logger.debug(f"Request to clue completed in {round(time.perf_counter() - start)}ms")
     if not response.ok:
-        return bad_gateway(response.json(), err="Something went wrong when connecting to borealis")
+        return bad_gateway(response.json(), err="Something went wrong when connecting to clue")
     return ok(response.json()["api_response"])

howler/api/v1/dossier.py CHANGED Viewed

@@ -1,15 +1,6 @@
 from flask import request
-from howler.api import (
-    bad_request,
-    created,
-    forbidden,
-    internal_error,
-    make_subapi_blueprint,
-    no_content,
-    not_found,
-    ok,
-)
+from howler.api import bad_request, created, forbidden, internal_error, make_subapi_blueprint, no_content, not_found, ok
 from howler.common.exceptions import ForbiddenException, HowlerException, InvalidDataException, NotFoundException
 from howler.common.loader import datastore
 from howler.common.logging import get_logger
@@ -17,7 +8,7 @@ from howler.common.swagger import generate_swagger_docs
 from howler.odm.models.dossier import Dossier
 from howler.odm.models.user import User
 from howler.security import api_login
-from howler.services import dossier_service, lucene_service
+from howler.services import dossier_service
 SUB_API = "dossier"
 dossier_api = make_subapi_blueprint(SUB_API, api_version=1)
@@ -141,29 +132,14 @@ def get_dossier_for_hit(id: str, user: User, **kwargs):
     """
     storage = datastore()
     try:
-        response = storage.hit.search(f"howler.id:{id}", rows=1)
+        response = storage.hit.search(f"howler.id:{id}", rows=1, as_obj=False)
         if response["total"] < 1:
             return not_found(err="Hit does not exist.")
         hit = response["items"][0]
-        results: list[Dossier] = storage.dossier.search(
-            "dossier_id:*",
-            as_obj=True,
-            rows=1000,
-        )["items"]
-        matching_dossiers: list[Dossier] = []
-        for dossier in results:
-            if dossier.query is None:
-                matching_dossiers.append(dossier)
-                continue
-            if lucene_service.match(dossier.query, hit.as_primitives()):
-                matching_dossiers.append(dossier)
-        return ok(matching_dossiers)
+        return ok(dossier_service.get_matching_dossiers(hit))
     except ValueError as e:
         return bad_request(err=str(e))

howler/api/v1/hit.py CHANGED Viewed

@@ -17,11 +17,7 @@ from howler.api import (
     ok,
 )
 from howler.api.v1.utils.etag import add_etag
-from howler.common.exceptions import (
-    HowlerException,
-    HowlerValueError,
-    InvalidDataException,
-)
+from howler.common.exceptions import HowlerException, HowlerValueError, InvalidDataException
 from howler.common.loader import datastore
 from howler.common.logging import get_logger
 from howler.common.swagger import generate_swagger_docs
@@ -252,7 +248,7 @@ def validate_hits(**kwargs):
 @generate_swagger_docs()
 @hit_api.route("/<id>", methods=["GET"])
 @api_login(audit=False, required_priv=["R"])
-@add_etag(getter=hit_service.get_hit, check_if_match=False)
+@add_etag(getter=hit_service.get_hit)
 def get_hit(id: str, server_version: str, **kwargs):
     """Get a hit.
@@ -265,11 +261,19 @@ def get_hit(id: str, server_version: str, **kwargs):
     Result Example:
     https://github.com/CybercentreCanada/howler-api/blob/main/howler/odm/models/hit.py
     """
-    hit = cast(Optional[Hit], kwargs.get("cached_hit"))
+    hit = cast(Optional[Any], kwargs.get("cached_hit"))
     if not hit:
         return not_found(err="Hit %s does not exist" % id)
+    if "metadata" in request.args:
+        metadata = (request.args.get("metadata", type=str) or "").split(",")
+        hit = hit.as_primitives()
+        if len(metadata) > 0:
+            hit_service.augment_metadata(hit, metadata, kwargs["user"])
     return ok(hit), server_version

howler/api/v1/search.py CHANGED Viewed

@@ -1,6 +1,9 @@
-from typing import Union
+import re
+from copy import deepcopy
+from typing import Any, Union
 from elasticsearch import BadRequestError
+from elasticsearch._sync.client.indices import IndicesClient
 from flask import request
 from sigma.backends.elasticsearch import LuceneBackend
 from sigma.rule import SigmaRule
@@ -12,13 +15,9 @@ from howler.common.loader import datastore
 from howler.common.logging import get_logger
 from howler.common.swagger import generate_swagger_docs
 from howler.datastore.exceptions import SearchException
-from howler.helper.search import (
-    get_collection,
-    get_default_sort,
-    has_access_control,
-    list_all_fields,
-)
+from howler.helper.search import get_collection, get_default_sort, has_access_control, list_all_fields
 from howler.security import api_login
+from howler.services import hit_service, lucene_service
 SUB_API = "search"
 search_api = make_subapi_blueprint(SUB_API, api_version=1)
@@ -78,16 +77,18 @@ def search(index, **kwargs):
     timeout             =>   Maximum execution time (ms)
     use_archive         =>   Allow access to the datastore achive (Default: False)
     track_total_hits    =>   Track the total number of query matches, instead of stopping at 10000 (Default: False)
+    metadata            =>   A list of additional features to be added to the result alongside the raw results
     Data Block:
     # Note that the data block is for POST requests only!
-    {"query": "query",     # Query to search for
-     "offset": 0,          # Offset in the results
-     "rows": 100,          # Max number of results
-     "sort": "field asc",  # How to sort the results
-     "fl": "id,score",     # List of fields to return
-     "timeout": 1000,      # Maximum execution time (ms)
-     "filters": ['fq']}    # List of additional filter queries limit the data
+    {"query": "query",          # Query to search for
+     "offset": 0,               # Offset in the results
+     "rows": 100,               # Max number of results
+     "sort": "field asc",       # How to sort the results
+     "fl": "id,score",          # List of fields to return
+     "timeout": 1000,           # Maximum execution time (ms)
+     "filters": ['fq'],         # List of additional filter queries limit the data
+     "metadata": ["dossiers"]}  # List of additional features to add to the search
     Result Example:
@@ -113,7 +114,7 @@ def search(index, **kwargs):
         "deep_paging_id",
         "track_total_hits",
     ]
-    multi_fields = ["filters"]
+    multi_fields = ["filters", "metadata"]
     boolean_fields = ["use_archive"]
     params, req_data = generate_params(request, fields, multi_fields)
@@ -137,11 +138,87 @@ def search(index, **kwargs):
         return bad_request(err="There was no search query.")
     try:
-        return ok(collection().search(query, **params))
+        metadata = params.pop("metadata", [])
+        result = collection().search(query, **params)
+        if index == "hit" and len(metadata) > 0:
+            hit_service.augment_metadata(result["items"], metadata, user)
+        return ok(result)
     except (SearchException, BadRequestError) as e:
         return bad_request(err=f"SearchException: {e}")
+@generate_swagger_docs()
+@search_api.route("/<index>/explain", methods=["GET", "POST"])
+@api_login(required_priv=["R"])
+def explain_query(index, **kwargs):
+    """Search through specified index for a given Lucene query. Uses Lucene search syntax for query.
+    Variables:
+    index  =>   Index to explain against (hit, user,...)
+    Arguments:
+    query   =>   Lucene Query to explain
+    Data Block:
+    # Note that the data block is for POST requests only!
+    {
+        "query": "id:*", # Lucene Query to explain
+    }
+    Result Example:
+    {
+        'valid': True,
+        'explanations': [
+            {
+                'valid': True,
+                'explanation': 'ConstantScore(FieldExistsQuery [field=id])'
+            }
+        ]
+    }
+    """
+    user = kwargs["user"]
+    collection = get_collection(index, user)
+    if collection is None:
+        return bad_request(err=f"Not a valid index to explain: {index}")
+    fields = ["query"]
+    multi_fields: list[str] = []
+    params, req_data = generate_params(request, fields, multi_fields)
+    params["as_obj"] = False
+    query = req_data.get("query", None)
+    if not query:
+        return bad_request(err="There was no query.")
+    # This regex checks for lucene phrases (i.e. the "Example Analytic" part of howler.analytic:"Example Analytic")
+    # And then escapes them.
+    # https://regex101.com/r/8u5F6a/1
+    escaped_lucene = re.sub(r'((:\()?(".+?")(\)?))', lucene_service.replace_lucene_phrase, query)
+    try:
+        indices_client = IndicesClient(datastore().hit.datastore.client)
+        result = deepcopy(
+            indices_client.validate_query(q=escaped_lucene, explain=True, index=collection().index_name).body
+        )
+        del result["_shards"]
+        for explanation in result["explanations"]:
+            del explanation["index"]
+        return ok(result)
+    except Exception as e:
+        logger.exception("Exception on query explanation")
+        return bad_request(err=f"Exception: {e}")
 @generate_swagger_docs()
 @search_api.route("/<index>/eql", methods=["GET", "POST"])
 @api_login(required_priv=["R"])
@@ -458,10 +535,76 @@ def count(index, **kwargs):
         return bad_request(err=f"SearchException: {e}")
+@generate_swagger_docs()
+@search_api.route("/facet/<index>", methods=["GET", "POST"])
+@api_login(required_priv=["R"])
+def facet(index, **kwargs):
+    """Perform field analysis on the selected fields. (Also known as facetting in lucene).
+    This essentially counts the number of instances a field is seen with each specific
+    values where the documents matches the specified queries.
+    Variables:
+    index       =>   Index to search in (hit, user,...)
+    Optional Arguments:
+    query       =>   Query to search for
+    mincount    =>   Minimum item count for the fieldvalue to be returned
+    rows        => The max number of fieldvalues to return
+    filters     =>   Additional query to limit to output
+    fields        =>   Field to analyse
+    Data Block:
+    # Note that the data block is for POST requests only!
+    {"fields": ["howler.id", ...]
+     "query": "id:*",
+     "mincount": "10",
+     "rows": "10",
+     "filters": ['fq']}
+    Result Example:
+    {
+        "howler.id": {                 # Facetting results
+            "value_0": 2,
+            ...
+            "value_N": 19,
+        },
+        ...
+    }
+    """
+    user = kwargs["user"]
+    collection = get_collection(index, user)
+    if collection is None:
+        return bad_request(err=f"Not a valid index to search in: {index}")
+    fields = ["query", "mincount", "rows"]
+    multi_fields = ["filters", "fields"]
+    params = generate_params(request, fields, multi_fields)[0]
+    if has_access_control(index):
+        params.update({"access_control": user["access_control"]})
+    try:
+        fields = params.pop("fields")
+        facet_result: dict[str, dict[str, Any]] = {}
+        for field in fields:
+            if field not in collection().fields():
+                logger.warning("Invalid field %s requested for faceting, skipping", field)
+                continue
+            facet_result[field] = collection().facet(field, **params)
+        return ok(facet_result)
+    except (SearchException, BadRequestError) as e:
+        logger.error("SearchException: %s", str(e), exc_info=True)
+        return bad_request(err=f"SearchException: {e}")
 @generate_swagger_docs()
 @search_api.route("/facet/<index>/<field>", methods=["GET", "POST"])
 @api_login(required_priv=["R"])
-def facet(index, field, **kwargs):
+def facet_field(index, field, **kwargs):
     """Perform field analysis on the selected field. (Also known as facetting in lucene).
     This essentially counts the number of instances a field is seen with each specific

howler/api/v1/user.py CHANGED Viewed

@@ -145,7 +145,7 @@ def add_user_account(username, **_):
 @generate_swagger_docs()
 @user_api.route("/<username>", methods=["GET"])
 @api_login(audit=False, required_priv=["R"])
-@add_etag(getter=user_service.get_user, check_if_match=False)
+@add_etag(getter=user_service.get_user, check_if_match=True)
 def get_user_account(username: str, server_version: Optional[str] = None, **kwargs):
     """Load the user account information.
@@ -327,7 +327,7 @@ def get_user_avatar(username, **_):
         resp.headers["ETag"] = sha256(avatar.encode("utf-8")).hexdigest()
         return resp
     else:
-        return not_found(err="No avatar for specified user")
+        return no_content()
 @generate_swagger_docs()

howler/api/v1/utils/etag.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""ETag utility module for handling HTTP ETags in Flask responses.
+ETags (Entity Tags) are HTTP headers used for web cache validation and conditional requests.
+They help optimize performance by allowing clients to cache responses and only fetch
+new data when the resource has actually changed.
+"""
 import functools
 import re
@@ -6,38 +13,69 @@ from flask import Response, request
 from howler.api import not_modified
-def add_etag(getter, check_if_match=False):
-    """Decorator to add etag handling to a flask response"""
+def add_etag(getter, check_if_match=True):
+    """Decorator to add ETag handling to a Flask response.
+    This decorator implements HTTP ETag functionality for API endpoints, enabling:
+    - Conditional requests using If-Match headers
+    - Cache validation to prevent unnecessary data transfers
+    - Version tracking for resources
+    Args:
+        getter: Function that retrieves the object and its version
+        check_if_match (bool): Whether to check If-Match headers for conditional requests
+    Returns:
+        Decorated function with ETag support
+    """
     def wrapper(f):
+        """Inner wrapper function that applies ETag functionality to the decorated function."""
         @functools.wraps(f)
         def generate_etag(*args, **kwargs):
+            """Generate and handle ETags for the HTTP response."""
+            # Retrieve the object and its version using the provided getter function
+            # The getter should return (object, version) tuple
             obj, version = getter(
                 kwargs.get("id", kwargs.get("username", None)),
                 as_odm=True,
                 version=True,
             )
+            # Handle conditional requests with If-Match header
+            # If the client's version matches the current version and it's a GET request
+            # without metadata parameter, return 304 Not Modified to save bandwidth
             if (
-                not check_if_match
+                check_if_match
                 and "If-Match" in request.headers
                 and request.headers["If-Match"] == version
                 and request.method == "GET"
+                and "metadata" not in request.args
             ):
                 return not_modified()
+            # Extract the resource type from the API path and create a cache key
+            # e.g., "/api/v1/users/123" becomes "cached_users"
             key = re.sub(r"^\/api\/v\d+\/(\w+)\/.+$", r"cached_\1", request.path)
             kwargs[key] = obj
+            # Call the original function with the cached object and version
             values = f(*args, server_version=version, **kwargs)
-            # If there is only one return, Its just the response
+            # Handle different return value formats from the decorated function
+            # If there is only one return, it's just the response
             if isinstance(values, Response):
+                # Only add ETag header for successful responses (not 409 Conflict or 400 Bad Request)
                 if values.status_code != 409 and values.status_code != 400:
                     values.headers["ETag"] = version
                 return values
-            # If there is two returns, its the response and the new version
+            # If there are two returns, it's the response and the new version
+            # This happens when the function modifies the resource and returns an updated version
             else:
                 if values[0].status_code != 409 and values[0].status_code != 400:
+                    # Add the new ETag version to successful responses
                     values[0].headers["ETag"] = values[1]
                 return values[0]

howler-api 2.10.0.dev255__py3-none-any.whl → 2.13.0.dev344__py3-none-any.whl

Potentially problematic release.

howler-api 2.10.0.dev255py3-none-any.whl → 2.13.0.dev344py3-none-any.whl