PyPI - clue-api - Versions diffs - 1.0.0.dev7__py3-none-any.whl - Mend

clue-api 1.0.0.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

clue/.gitignore +21 -0
clue/__init__.py +0 -0
clue/api/__init__.py +211 -0
clue/api/base.py +99 -0
clue/api/v1/__init__.py +82 -0
clue/api/v1/actions.py +92 -0
clue/api/v1/auth.py +243 -0
clue/api/v1/configs.py +83 -0
clue/api/v1/fetchers.py +94 -0
clue/api/v1/lookup.py +221 -0
clue/api/v1/registration.py +109 -0
clue/api/v1/static.py +94 -0
clue/app.py +166 -0
clue/cache/__init__.py +129 -0
clue/common/__init__.py +0 -0
clue/common/classification.py +1006 -0
clue/common/classification.yml +130 -0
clue/common/dict_utils.py +130 -0
clue/common/exceptions.py +199 -0
clue/common/forge.py +152 -0
clue/common/json_utils.py +10 -0
clue/common/list_utils.py +11 -0
clue/common/logging/__init__.py +291 -0
clue/common/logging/audit.py +157 -0
clue/common/logging/format.py +42 -0
clue/common/regex.py +31 -0
clue/common/str_utils.py +213 -0
clue/common/swagger.py +139 -0
clue/common/uid.py +47 -0
clue/config.py +60 -0
clue/constants/__init__.py +0 -0
clue/constants/supported_types.py +38 -0
clue/cronjobs/__init__.py +30 -0
clue/cronjobs/plugins.py +32 -0
clue/error.py +129 -0
clue/gunicorn_config.py +29 -0
clue/healthz.py +74 -0
clue/helper/discover.py +53 -0
clue/helper/headers.py +30 -0
clue/helper/oauth.py +128 -0
clue/models/__init__.py +0 -0
clue/models/actions.py +243 -0
clue/models/config.py +456 -0
clue/models/fetchers.py +136 -0
clue/models/graph.py +162 -0
clue/models/model_list.py +52 -0
clue/models/network.py +430 -0
clue/models/results/__init__.py +34 -0
clue/models/results/base.py +10 -0
clue/models/results/graph.py +26 -0
clue/models/results/image.py +22 -0
clue/models/results/status.py +55 -0
clue/models/results/validation.py +57 -0
clue/models/selector.py +67 -0
clue/models/utils.py +52 -0
clue/models/validators.py +19 -0
clue/patched.py +8 -0
clue/plugin/__init__.py +1008 -0
clue/plugin/helpers/__init__.py +0 -0
clue/plugin/helpers/central_server.py +27 -0
clue/plugin/helpers/email_render.py +228 -0
clue/plugin/helpers/token.py +34 -0
clue/plugin/helpers/trino.py +103 -0
clue/plugin/interactive.py +270 -0
clue/plugin/models.py +19 -0
clue/plugin/utils.py +78 -0
clue/remote/__init__.py +0 -0
clue/remote/datatypes/__init__.py +130 -0
clue/remote/datatypes/cache.py +62 -0
clue/remote/datatypes/events.py +118 -0
clue/remote/datatypes/hash.py +193 -0
clue/remote/datatypes/queues/__init__.py +0 -0
clue/remote/datatypes/queues/comms.py +62 -0
clue/remote/datatypes/set.py +96 -0
clue/remote/datatypes/user_quota_tracker.py +54 -0
clue/security/__init__.py +211 -0
clue/security/obo.py +95 -0
clue/security/utils.py +34 -0
clue/services/action_service.py +186 -0
clue/services/auth_service.py +348 -0
clue/services/config_service.py +38 -0
clue/services/fetcher_service.py +203 -0
clue/services/jwt_service.py +233 -0
clue/services/lookup_service.py +786 -0
clue/services/type_service.py +165 -0
clue/services/user_service.py +152 -0
clue_api-1.0.0.dev7.dist-info/METADATA +111 -0
clue_api-1.0.0.dev7.dist-info/RECORD +91 -0
clue_api-1.0.0.dev7.dist-info/WHEEL +4 -0
clue_api-1.0.0.dev7.dist-info/entry_points.txt +8 -0
clue_api-1.0.0.dev7.dist-info/licenses/LICENSE +11 -0

clue/api/v1/auth.py ADDED Viewed

@@ -0,0 +1,243 @@
+import typing
+from typing import Any, Optional
+from urllib.parse import urlparse
+from authlib.integrations.base_client import OAuthError
+from flask import current_app, request
+import clue.services.auth_service as auth_service
+import clue.services.user_service as user_service
+from clue.api import (
+    bad_request,
+    forbidden,
+    internal_error,
+    make_subapi_blueprint,
+    ok,
+    unauthorized,
+)
+from clue.common.exceptions import (
+    AccessDeniedException,
+    AuthenticationException,
+    ClueException,
+    ClueValueError,
+    InvalidDataException,
+)
+from clue.common.logging import get_logger
+from clue.common.str_utils import default_string_value
+from clue.common.swagger import generate_swagger_docs
+from clue.config import config
+from clue.security.utils import generate_random_secret
+logger = get_logger(__file__)
+SUB_API = "auth"
+auth_api = make_subapi_blueprint(SUB_API, api_version=1)
+auth_api._doc = "Allow user to authenticate to the web server"
+logger = get_logger(__file__)
+# noinspection PyBroadException,PyPropertyAccess
+@generate_swagger_docs()
+@auth_api.route("/login", methods=["GET", "POST"])
+def login(**_) -> dict[str, Any]:  # noqa: C901
+    """Log the user into the system, in one of three ways.
+    1. Username/Password Authentication
+    2. Username/API Key Authentication
+    3. OAuth Login flow
+        (See here: https://auth0.com/docs/get-started/authentication-and-authorization-flow/authorization-code-flow)
+    Variables:
+    None
+    Arguments:
+    NOTE: The arguments are used only when completing the OAuth authorization flow.
+    provider    => The provider of the OAuth code.
+    state       => Random state used in the OAuth authentication flow.
+    code        => The code provided by the OAuth provider used to exchange for an access token.
+    Data Block:
+    {
+        "user": "user",                 # The username to authenticate as (optional)
+        "password": "password",         # The password used to authenticate (optional)
+        "apikey": "devkey:user",        # The apikey used ot authenticate (optional)
+        "oauth_provider": "keycloak"    # The oauth provider initiate an OAuth Authorization Flow with (optional)
+    }
+    Result Example:
+    {
+        # Profile picture for the user
+        "avatar": "data:image/png;base64, ...",
+        # Username of the authenticated user
+        "username": "user",
+        # Different privileges that the user will get for this session
+        "privileges": ["R", "W"],
+        # A token generated by us the user can use to authenticate with clue
+        "app_token": "asdfsd876opqwm465a89sdf4",
+        # A JSON Web Access Token generated by an OAuth provider to authenticate with them
+        "access_token": "<JWT>",
+    }
+    """
+    data: dict[str, Any]
+    if request.is_json and len(request.data) > 0:
+        data = request.json  # type: ignore
+    else:
+        data = request.values
+    # Get the ip the request came from - used in logging later
+    ip = request.headers.get("X-Forwarded-For", request.remote_addr)
+    # Get the data from the request
+    # TODO: Figure out how to fix this inconsistency
+    oauth_provider = data.get("provider", data.get("oauth_provider", None))
+    user = data.get("user", None)
+    data.get("password", None)
+    data.get("apikey", None)
+    # These variables are what will eventually be returned, if authentication is successful
+    logged_in_uname = None
+    access_token = None
+    refresh_token = data.get("refresh_token", None)
+    priv: Optional[list[str]] = []
+    try:
+        # First, we'll try oauth
+        if oauth_provider:
+            if not config.auth.oauth.enabled:
+                raise InvalidDataException("OAuth is disabled.")
+            oauth = current_app.extensions.get("authlib.integrations.flask_client")
+            if not oauth:
+                logger.critical("Authlib integration missing!")
+                raise ClueValueError()
+            provider = oauth.create_client(oauth_provider)
+            if not provider:
+                logger.critical("OAuth client failed to create!")
+                raise ClueValueError()
+            # This means that they want to start the oauth process, so we'll redirect them to their chosen provider
+            if "code" not in request.args and not refresh_token:
+                referer = request.headers.get("Referer", None)
+                uri = urlparse(referer if referer else request.host_url)
+                port_portion = ":" + str(uri.port) if uri.port else ""
+                redirect_uri = f"{uri.scheme}://{uri.hostname}{port_portion}/login?provider={oauth_provider}"
+                return provider.authorize_redirect(redirect_uri=redirect_uri)
+            # At this point we know the code exists, so we're good to use that to exchange for an JSON Web Token with
+            # user data in it. token_data contains the access token, expiry, refresh token, and id token,
+            # in JWT format: https://jwt.io/
+            oauth_provider_config = config.auth.oauth.providers[oauth_provider]
+            # We need to figure out what information the provider already has, and provide whatever it doesn't.
+            # Without this step, the provider will try and send the client_id and/or secret *twice*, leading to an
+            # error.
+            kwargs = {}
+            # Does the provider have the client id? If not provide it
+            if not provider.client_id:
+                kwargs["client_id"] = default_string_value(
+                    oauth_provider_config.client_id,
+                    env_name=f"{oauth_provider.upper()}_CLIENT_ID",
+                )
+                if not kwargs["client_id"]:
+                    logger.critical("client id not set! Cannot complete oauth")
+                    raise ClueValueError()
+            # Does the provider have the client secret? If not provide it
+            if not provider.client_secret:
+                kwargs["client_secret"] = default_string_value(
+                    oauth_provider_config.client_secret,
+                    env_name=f"{oauth_provider.upper()}_CLIENT_SECRET",
+                )
+                if not kwargs["client_secret"]:
+                    logger.critical("client secret not set! Cannot complete oauth")
+                    raise ClueValueError()
+            if refresh_token is not None:
+                token_data = provider.fetch_access_token(
+                    refresh_token=refresh_token,
+                    grant_type="refresh_token",
+                    **kwargs,
+                )
+            else:
+                # Finally, ask for the access token with whatever info the provider needs
+                token_data = provider.authorize_access_token(**kwargs)
+            access_token = token_data.get("access_token", None)
+            refresh_token = token_data.get("refresh_token", None)
+            # Get a useful dict of user data from the web token
+            cur_user = user_service.parse_user_data(token_data, oauth_provider)
+            logged_in_uname = cur_user["uname"]
+            priv = ["R", "W"]
+        # No oauth provider was specified, so we fall back to user/pass or user/apikey
+        # elif user and (password or apikey):
+        #     if password and apikey:
+        #         raise InvalidDataException("Cannot specify password and API key.")
+        #     user_data, priv = auth_service.basic_auth(
+        #         f"{user}:{password or apikey}",
+        #         is_base64=False,
+        #         # No need to validate for api keys if we know they provided a password, and vice versa
+        #         skip_apikey=bool(password),
+        #         skip_password=bool(apikey),
+        #     )
+        #     if not user_data:
+        #         raise AuthenticationException("User does not exist, or authentication was invalid")
+        #     logged_in_uname = user_data["uname"]
+        else:
+            raise AuthenticationException("Not enough information to proceed with authentication")
+    # For sanity's sake, we throw exceptions throughout the authentication code and simply catch the exceptions here to
+    # return the corresponding HTTP Code to the user
+    except (OAuthError, AuthenticationException) as err:
+        logger.warning(f"Authentication failure. (U:{user} - IP:{ip}) [{err}]")
+        return unauthorized(err=str(err))
+    except AccessDeniedException as err:
+        logger.warning(f"Authorization failure. (U:{user} - IP:{ip}) [{err}]")
+        return forbidden(err=err.message)
+    except InvalidDataException as err:
+        return bad_request(err=err.message or str(err))
+    except ClueException:
+        logger.exception(f"Internal Authentication Error. (U:{user} - IP:{ip})")
+        return internal_error(
+            err="Unhandled exception occured while Authenticating. Contact your administrator.",
+        )
+    logger.info(f"Login successful. (U:{logged_in_uname} - IP:{ip})")
+    xsrf_token = generate_random_secret()
+    # Generate the token this user can use to authenticate from now on
+    if access_token:
+        app_token = access_token
+    else:
+        app_token = f"{logged_in_uname}:{auth_service.create_token(logged_in_uname, typing.cast(list[str], priv))}"
+    return ok(
+        {
+            "app_token": app_token,
+            "provider": oauth_provider,
+            "refresh_token": refresh_token,
+            "privileges": priv,
+            "user": cur_user,
+        },
+        cookies={"XSRF-TOKEN": xsrf_token},
+    )

clue/api/v1/configs.py ADDED Viewed

@@ -0,0 +1,83 @@
+import clue.services.config_service as config_service
+from clue.api import make_subapi_blueprint, not_found, ok
+from clue.common.swagger import generate_swagger_docs
+from clue.models.network import QueryResult
+SUB_API = "configs"
+configs_api = make_subapi_blueprint(SUB_API, api_version=1)
+configs_api._doc = "Read configuration data about the system"
+@generate_swagger_docs()
+@configs_api.route("/", methods=["GET"])
+def configs(**kwargs):
+    """Return all of the configuration information about the deployment.
+    Variables:
+    None
+    Arguments:
+    None
+    Result Example:
+    {
+        "configuration": {                         # Configuration block
+            "auth": {                                # Authentication block
+                "oauth_providers": [                   # List of oAuth providers available
+                    "azure_ad",
+                    "keyclock",
+                    ...
+                ],
+            },
+            "system": {                              # System Configuration
+                "branch": "develop",                   # Branch the current deployment is connected to
+                "commit": "123456789abcdef",           # Last commit ID
+                "version": "1.0"                       # Clue version
+            },
+            "ui": {                                  # UI Configuration
+                "apps": [],                            # List of apps shown in the apps switcher
+            }
+        },
+        "c12nDef": {},                             # Classification definition block
+    }
+    """
+    return ok(config_service.get_configuration())
+@configs_api.route("/schema/<model>", methods=["GET"])
+def schemas(model: str, **kwargs):
+    """Return a JSON schema for a given model.
+    Variables:
+    model   =>  The model for which to return the schema. Valid options: plugin_response
+    Arguments:
+    None
+    Result Example:
+    {
+        "properties": {
+            "error": {
+                "anyOf": [
+                    {
+                        "type": "string"
+                    },
+                    {
+                        "type": "null"
+                    }
+                ],
+                "default": null,
+                "description": "Error message returned by data source",
+                "title": "Error"
+            },
+            ...
+        },
+        "title": "QueryResult",
+        "type": "object"
+    }
+    """
+    if model == "plugin_response":
+        return ok(QueryResult.model_json_schema())
+    return not_found(err="Not a valid model")

clue/api/v1/fetchers.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Enrichment Fetchers
+List and execute fetchers that provide data to be rendered client-side
+* Provides endpoints to list valid fetchers exposed by plugins.
+* Provides endpoints to run these fetchers.
+"""
+from flask_cors import CORS
+from clue.api import bad_gateway, bad_request, make_subapi_blueprint, not_found, ok
+from clue.common.exceptions import ClueException, NotFoundException
+from clue.common.logging import get_logger
+from clue.common.swagger import generate_swagger_docs
+from clue.config import config
+from clue.models.fetchers import FetcherDefinition
+from clue.security import api_login
+from clue.services import fetcher_service
+logger = get_logger(__file__)
+SUB_API = "fetchers"
+fetchers_api = make_subapi_blueprint(SUB_API, api_version=1)
+fetchers_api._doc = "Run fetchers for a given ID through configured external data sources/systems."
+CORS(fetchers_api, origins=config.ui.cors_origins, supports_credentials=True)
+@generate_swagger_docs(responses={200: "A list of types and their classification"})
+@fetchers_api.route("/", methods=["GET"])
+@api_login()
+def get_fetchers(**kwargs) -> dict[str, FetcherDefinition]:
+    """Return the supported fetchers of each external service.
+    Variables:
+    None
+    Arguments:
+    None
+    Result Example:
+    { # A dictionary of sources with their supported fetchers.
+        <source_id>.<fetcher_id>: {
+            "id": "<fetcher_id>",
+            "classification": "",
+            "description": "",
+            "format": ""
+            "supported_types": ["ip", ...]
+        },
+        ...,
+    }
+    """
+    return ok(fetcher_service.get_plugins_supported_fetchers(kwargs["user"]))
+@generate_swagger_docs(responses={200: "Successful lookup to selected plugins"})
+@fetchers_api.route("/<plugin_id>/<fetcher_id>", methods=["POST"])
+@api_login()
+def run_fetcher(plugin_id: str, fetcher_id: str, **kwargs):
+    """Search other services for additional information related to the provided data.
+    Variables:
+    plugin_id (str): the ID of the plugin who owns the action to execute
+    fetcher_id (str): the ID of the action to execute
+    Arguments:
+    None
+    Data Block:
+    {
+        type: "ip",
+        value: "127.0.0.1",
+        ...
+    }
+    Result Example:
+    {
+        "outcome": "success | failure", # was this execution a success or failure?
+        "format": "link", # What format is the output in?
+        "output": "http://example.com" # The output of the action. Can be any data structure.
+    }
+    """
+    try:
+        return ok(fetcher_service.run_fetcher(plugin_id, fetcher_id, kwargs["user"]))
+    except NotFoundException as err:
+        return not_found(err=err.message)
+    except ClueException as err:
+        if err.status_code == 400:
+            logger.warning("Bad request from fetcher %s.%s: %s", plugin_id, fetcher_id, err.message)
+            return bad_request(err=err.message)
+        logger.warning("Unknown error from fetcher %s.%s: %s", plugin_id, fetcher_id, err.message)
+        return bad_gateway(err=err.message)

clue/api/v1/lookup.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Enrichment Lookup
+Lookup related data from external systems.
+* Provide endpoints to list accepted types of data.
+* Provide endpoints to query other systems to enable enrichment of such types.
+"""
+import json
+import urllib.parse
+from flask import request
+from flask_cors import CORS
+from pydantic import ValidationError
+from clue.api import bad_request, make_subapi_blueprint, ok, unauthorized
+from clue.common.exceptions import AuthenticationException, InvalidDataException
+from clue.common.logging import get_logger
+from clue.common.swagger import generate_swagger_docs
+from clue.config import config
+from clue.models.network import QueryResult
+from clue.models.selector import Selector
+from clue.security import api_login
+from clue.services import lookup_service, type_service
+logger = get_logger(__file__)
+SUB_API = "lookup"
+lookup_api = make_subapi_blueprint(SUB_API, api_version=1)
+lookup_api._doc = "Lookup related data through configured external data sources/systems."
+CORS(lookup_api, origins=config.ui.cors_origins, supports_credentials=True)
+@generate_swagger_docs(responses={200: "A list of types and their classification"})
+@lookup_api.route("/types/", methods=["GET"])
+@api_login()
+def get_types(**kwargs) -> dict[str, list[str]]:
+    """Return the supported types of each external service.
+    Variables:
+    None
+    Arguments:
+    None
+    Result Example:
+    { # A dictionary of sources with their supported types.
+        <source_name>: [
+            <type name>,
+            <type name>,
+            ...,
+        ],
+        ...,
+    }
+    """
+    return ok(type_service.get_plugins_supported_types(kwargs["user"]))
+@generate_swagger_docs(responses={200: "A list of types and their regex detectors"})
+@lookup_api.route("/types_detection/", methods=["GET"])
+@api_login()
+def get_types_detection(**kwargs) -> dict[str, str]:
+    """Return the regular expression to detect the different types
+    Variables:
+    None
+    Arguments:
+    None
+    Result Example:
+    { # A dictionary of types with their associated regular expressions
+        <type>: <regex>,
+        ...
+    }
+    """
+    return ok(type_service.get_types_regular_expressions(kwargs["user"]))
+@generate_swagger_docs(responses={200: "Successful bulk lookup to selected plugins for included values"})
+@lookup_api.route("/enrich", methods=["POST"])
+@api_login()
+def bulk_enrich(**kwargs) -> dict[str, dict[str, dict[str, QueryResult]]]:
+    """Search other services for additional information related to the provided data.
+    Variables:
+    None
+    Optional Arguments:
+    classification: string  => Classification of the type [Default: minimum configured classification]
+    sources: string         => | separated list of data sources. If empty, all configured sources are used.
+    max_timeout: number     => Maximum execution time for the call in seconds
+    limit: number           => limit the amount of returned results counted per source
+    no_annotation: boolean  => Do not return any anotations
+    no_cache: boolean       => Skip the cache and ask the plugins again
+    include_raw: boolean    => Return raw plugin data
+    exclude_unset: boolean  => Do not return any values that were not set by the plugin
+    Data Block:
+    [
+        {"type": "ip", "value": "127.0.0.1"},
+        ...
+    ]
+    Result Example:
+    {                           # Dictionary of data source queried
+        "ip": {
+            "127.0.0.1":{
+                "vt": {
+                    "error": null,          # Error message returned by data source
+                    "items": [              # list of results from the source
+                        {
+                            "link": "https://www.virustotal.com/gui/url/<id>",  # link to results
+                            "count": 1,                                         # number of hits from the search
+                            "classification": "TLP:C",                          # classification of the search result
+                            "annotations": [                                    # Semi structured details about data
+                                <Annotation data>
+                            ],
+                        },
+                        ...,
+                    ],
+                },
+                ...,
+            },
+            ...
+        },
+        ...
+    }
+    """
+    user = kwargs["user"]
+    post_data = request.json
+    if not isinstance(post_data, list):
+        return bad_request(err="Request data is not in the correct format")
+    try:
+        data = [Selector.model_validate(entry) for entry in post_data]
+    except ValidationError as err:
+        pydantic_errs: list[str] = []
+        for validation_err in err.errors():
+            loc = ".".join(
+                section if isinstance(section, str) else f"[{str(section)}]" for section in validation_err["loc"]
+            )
+            pydantic_errs.append(f'"{loc}": {validation_err["msg"]}')
+        return bad_request(err=f"Request data is not in the correct format: {', '.join(pydantic_errs)}")
+    try:
+        results = lookup_service.bulk_enrich(data, user)
+    except AuthenticationException as e:
+        return unauthorized(err=str(e))
+    except InvalidDataException as e:
+        return bad_request(err=str(e))
+    return ok(results)
+@generate_swagger_docs(responses={200: "Successful lookup to selected plugins"})
+@lookup_api.route("/enrich/<type_name>/<value>/", methods=["GET"])
+@api_login()
+def enrich(type_name: str, value: str, **kwargs) -> dict[str, QueryResult]:
+    """Search other services for additional information related to the provided data.
+    Variables:
+    type_name => Type of data to lookup in the external system.
+    value => Value of the data to lookup. *Must be double URL encoded.*
+    Optional Arguments:
+    classification: string  => Classification of the type [Default: minimum configured classification]
+    sources: string         => | separated list of data sources. If empty, all configured sources are used.
+    max_timeout: number     => Maximum execution time for the call in seconds
+    limit: number           => limit the amount of returned results counted per source
+    no_annotation: boolean  => Do not return any anotations
+    no_cache: boolean       => Skip the cache and ask the plugins again
+    include_raw: boolean    => Return raw plugin data
+    exclude_unset: boolean  => Do not return any values that were not set by the plugin
+    API Call Examples:
+    /api/v1/lookup/enrich/domain/malicious.domain/
+    /api/v1/lookup/enrich/ip/1.1.1.1/?sources=vt|malware_bazar
+    Result Example:
+    {                           # Dictionary of data source queried
+        "vt": {
+            "error": null,          # Error message returned by data source
+            "items": [              # list of results from the source
+                {
+                    "link": "https://www.virustotal.com/gui/url/<id>",   # link to results
+                    "count": 1,                                          # number of hits from the search
+                    "classification": "TLP:C",                           # classification of the search result
+                    "annotations": [                                      # Semi structured details about type of data
+                        <Annotation data>
+                    ],
+                },
+                ...,
+            ],
+        },
+        ...,
+    }
+    """
+    user = kwargs["user"]
+    # For backwards compatability, if eml is used it is replaced with email
+    type_name = type_name.replace("eml", "email")
+    if type_name == "telemetry":
+        try:
+            json.loads(urllib.parse.unquote(value))
+        except json.JSONDecodeError:
+            return bad_request(err="If type is telemetry, value must be a valid JSON object.")
+    # re-encode the type after being decoded going through flask/wsgi route
+    value = urllib.parse.quote(value, safe="")
+    results = lookup_service.enrich(type_name, value, user)
+    return ok(results)