howler-api 2.13.0.dev329__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- howler/__init__.py +0 -0
- howler/actions/__init__.py +167 -0
- howler/actions/add_label.py +111 -0
- howler/actions/add_to_bundle.py +159 -0
- howler/actions/change_field.py +76 -0
- howler/actions/demote.py +160 -0
- howler/actions/example_plugin.py +104 -0
- howler/actions/prioritization.py +93 -0
- howler/actions/promote.py +147 -0
- howler/actions/remove_from_bundle.py +133 -0
- howler/actions/remove_label.py +111 -0
- howler/actions/transition.py +200 -0
- howler/api/__init__.py +249 -0
- howler/api/base.py +88 -0
- howler/api/socket.py +114 -0
- howler/api/v1/__init__.py +97 -0
- howler/api/v1/action.py +372 -0
- howler/api/v1/analytic.py +748 -0
- howler/api/v1/auth.py +382 -0
- howler/api/v1/borealis.py +101 -0
- howler/api/v1/configs.py +55 -0
- howler/api/v1/dossier.py +222 -0
- howler/api/v1/help.py +28 -0
- howler/api/v1/hit.py +1181 -0
- howler/api/v1/notebook.py +82 -0
- howler/api/v1/overview.py +191 -0
- howler/api/v1/search.py +715 -0
- howler/api/v1/template.py +206 -0
- howler/api/v1/tool.py +183 -0
- howler/api/v1/user.py +414 -0
- howler/api/v1/utils/__init__.py +0 -0
- howler/api/v1/utils/etag.py +84 -0
- howler/api/v1/view.py +288 -0
- howler/app.py +235 -0
- howler/common/README.md +144 -0
- howler/common/__init__.py +0 -0
- howler/common/classification.py +979 -0
- howler/common/classification.yml +107 -0
- howler/common/exceptions.py +167 -0
- howler/common/hexdump.py +48 -0
- howler/common/iprange.py +171 -0
- howler/common/loader.py +154 -0
- howler/common/logging/__init__.py +241 -0
- howler/common/logging/audit.py +138 -0
- howler/common/logging/format.py +38 -0
- howler/common/net.py +79 -0
- howler/common/net_static.py +1494 -0
- howler/common/random_user.py +316 -0
- howler/common/swagger.py +117 -0
- howler/config.py +64 -0
- howler/cronjobs/__init__.py +29 -0
- howler/cronjobs/retention.py +61 -0
- howler/cronjobs/rules.py +274 -0
- howler/cronjobs/view_cleanup.py +88 -0
- howler/datastore/README.md +112 -0
- howler/datastore/__init__.py +0 -0
- howler/datastore/bulk.py +72 -0
- howler/datastore/collection.py +2327 -0
- howler/datastore/constants.py +117 -0
- howler/datastore/exceptions.py +41 -0
- howler/datastore/howler_store.py +105 -0
- howler/datastore/migrations/fix_process.py +41 -0
- howler/datastore/operations.py +130 -0
- howler/datastore/schemas.py +90 -0
- howler/datastore/store.py +231 -0
- howler/datastore/support/__init__.py +0 -0
- howler/datastore/support/build.py +214 -0
- howler/datastore/support/schemas.py +90 -0
- howler/datastore/types.py +22 -0
- howler/error.py +91 -0
- howler/external/__init__.py +0 -0
- howler/external/generate_mitre.py +96 -0
- howler/external/generate_sigma_rules.py +31 -0
- howler/external/generate_tlds.py +47 -0
- howler/external/reindex_data.py +46 -0
- howler/external/wipe_databases.py +58 -0
- howler/gunicorn_config.py +25 -0
- howler/healthz.py +47 -0
- howler/helper/__init__.py +0 -0
- howler/helper/azure.py +50 -0
- howler/helper/discover.py +59 -0
- howler/helper/hit.py +236 -0
- howler/helper/oauth.py +247 -0
- howler/helper/search.py +92 -0
- howler/helper/workflow.py +110 -0
- howler/helper/ws.py +378 -0
- howler/odm/README.md +102 -0
- howler/odm/__init__.py +1 -0
- howler/odm/base.py +1504 -0
- howler/odm/charter.txt +146 -0
- howler/odm/helper.py +416 -0
- howler/odm/howler_enum.py +25 -0
- howler/odm/models/__init__.py +0 -0
- howler/odm/models/action.py +33 -0
- howler/odm/models/analytic.py +90 -0
- howler/odm/models/assemblyline.py +48 -0
- howler/odm/models/aws.py +23 -0
- howler/odm/models/azure.py +16 -0
- howler/odm/models/cbs.py +44 -0
- howler/odm/models/config.py +558 -0
- howler/odm/models/dossier.py +33 -0
- howler/odm/models/ecs/__init__.py +0 -0
- howler/odm/models/ecs/agent.py +17 -0
- howler/odm/models/ecs/autonomous_system.py +16 -0
- howler/odm/models/ecs/client.py +149 -0
- howler/odm/models/ecs/cloud.py +141 -0
- howler/odm/models/ecs/code_signature.py +27 -0
- howler/odm/models/ecs/container.py +32 -0
- howler/odm/models/ecs/dns.py +62 -0
- howler/odm/models/ecs/egress.py +10 -0
- howler/odm/models/ecs/elf.py +74 -0
- howler/odm/models/ecs/email.py +122 -0
- howler/odm/models/ecs/error.py +14 -0
- howler/odm/models/ecs/event.py +140 -0
- howler/odm/models/ecs/faas.py +24 -0
- howler/odm/models/ecs/file.py +84 -0
- howler/odm/models/ecs/geo.py +30 -0
- howler/odm/models/ecs/group.py +18 -0
- howler/odm/models/ecs/hash.py +16 -0
- howler/odm/models/ecs/host.py +17 -0
- howler/odm/models/ecs/http.py +37 -0
- howler/odm/models/ecs/ingress.py +12 -0
- howler/odm/models/ecs/interface.py +21 -0
- howler/odm/models/ecs/network.py +30 -0
- howler/odm/models/ecs/observer.py +45 -0
- howler/odm/models/ecs/organization.py +12 -0
- howler/odm/models/ecs/os.py +21 -0
- howler/odm/models/ecs/pe.py +17 -0
- howler/odm/models/ecs/process.py +216 -0
- howler/odm/models/ecs/registry.py +26 -0
- howler/odm/models/ecs/related.py +45 -0
- howler/odm/models/ecs/rule.py +51 -0
- howler/odm/models/ecs/server.py +24 -0
- howler/odm/models/ecs/threat.py +247 -0
- howler/odm/models/ecs/tls.py +58 -0
- howler/odm/models/ecs/url.py +51 -0
- howler/odm/models/ecs/user.py +57 -0
- howler/odm/models/ecs/user_agent.py +20 -0
- howler/odm/models/ecs/vulnerability.py +41 -0
- howler/odm/models/gcp.py +16 -0
- howler/odm/models/hit.py +356 -0
- howler/odm/models/howler_data.py +328 -0
- howler/odm/models/lead.py +33 -0
- howler/odm/models/localized_label.py +13 -0
- howler/odm/models/overview.py +16 -0
- howler/odm/models/pivot.py +40 -0
- howler/odm/models/template.py +24 -0
- howler/odm/models/user.py +83 -0
- howler/odm/models/view.py +34 -0
- howler/odm/random_data.py +888 -0
- howler/odm/randomizer.py +606 -0
- howler/patched.py +5 -0
- howler/plugins/__init__.py +25 -0
- howler/plugins/config.py +123 -0
- howler/remote/__init__.py +0 -0
- howler/remote/datatypes/README.md +355 -0
- howler/remote/datatypes/__init__.py +98 -0
- howler/remote/datatypes/counters.py +63 -0
- howler/remote/datatypes/events.py +66 -0
- howler/remote/datatypes/hash.py +206 -0
- howler/remote/datatypes/lock.py +42 -0
- howler/remote/datatypes/queues/__init__.py +0 -0
- howler/remote/datatypes/queues/comms.py +59 -0
- howler/remote/datatypes/queues/multi.py +32 -0
- howler/remote/datatypes/queues/named.py +93 -0
- howler/remote/datatypes/queues/priority.py +215 -0
- howler/remote/datatypes/set.py +118 -0
- howler/remote/datatypes/user_quota_tracker.py +54 -0
- howler/security/__init__.py +253 -0
- howler/security/socket.py +108 -0
- howler/security/utils.py +185 -0
- howler/services/__init__.py +0 -0
- howler/services/action_service.py +111 -0
- howler/services/analytic_service.py +128 -0
- howler/services/auth_service.py +323 -0
- howler/services/config_service.py +128 -0
- howler/services/dossier_service.py +252 -0
- howler/services/event_service.py +93 -0
- howler/services/hit_service.py +893 -0
- howler/services/jwt_service.py +158 -0
- howler/services/lucene_service.py +286 -0
- howler/services/notebook_service.py +119 -0
- howler/services/overview_service.py +44 -0
- howler/services/template_service.py +45 -0
- howler/services/user_service.py +330 -0
- howler/utils/__init__.py +0 -0
- howler/utils/annotations.py +28 -0
- howler/utils/chunk.py +38 -0
- howler/utils/dict_utils.py +200 -0
- howler/utils/isotime.py +17 -0
- howler/utils/list_utils.py +11 -0
- howler/utils/lucene.py +77 -0
- howler/utils/path.py +27 -0
- howler/utils/socket_utils.py +61 -0
- howler/utils/str_utils.py +256 -0
- howler/utils/uid.py +47 -0
- howler_api-2.13.0.dev329.dist-info/METADATA +71 -0
- howler_api-2.13.0.dev329.dist-info/RECORD +200 -0
- howler_api-2.13.0.dev329.dist-info/WHEEL +4 -0
- howler_api-2.13.0.dev329.dist-info/entry_points.txt +8 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# implementation based on this stackoverflow post:
|
|
2
|
+
# https://stackoverflow.com/a/67943659
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import jwt
|
|
8
|
+
import requests
|
|
9
|
+
from jwt.api_jwk import PyJWK
|
|
10
|
+
|
|
11
|
+
from howler.common.exceptions import ForbiddenException, HowlerKeyError, HowlerValueError
|
|
12
|
+
from howler.common.logging import get_logger
|
|
13
|
+
from howler.config import cache, config
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__file__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_jwk(access_token: str) -> PyJWK:
|
|
19
|
+
"""Get the JSON Web Key associated with the given JWT"""
|
|
20
|
+
# "kid" is the JSON Web Key's identifier. It tells us which key was used to validate the token.
|
|
21
|
+
kid = jwt.get_unverified_header(access_token).get("kid")
|
|
22
|
+
jwks, _ = get_jwks()
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
# Check to see if we have it cached
|
|
26
|
+
key = PyJWK(jwks[kid])
|
|
27
|
+
except KeyError:
|
|
28
|
+
# We don't, so we need to refresh the key set
|
|
29
|
+
cache.delete(key="get_jwks")
|
|
30
|
+
try:
|
|
31
|
+
jwks, _ = get_jwks()
|
|
32
|
+
key = jwks[kid]
|
|
33
|
+
except KeyError:
|
|
34
|
+
raise HowlerKeyError("Specified Key Set does not exist.")
|
|
35
|
+
|
|
36
|
+
return key
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_provider(access_token: str) -> str:
|
|
40
|
+
"""Get the provider of a given access token
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
access_token (str): The access token to determine the provider of
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
HowlerValueError: The provider of this access token does not match any supported providers
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
str: The provider of the token
|
|
50
|
+
"""
|
|
51
|
+
# "kid" is the JSON Web Key's identifier. It tells us which key was used to validate the token.
|
|
52
|
+
kid = jwt.get_unverified_header(access_token).get("kid")
|
|
53
|
+
_, providers = get_jwks()
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Check to see if we have it cached
|
|
57
|
+
oauth_provider = providers[kid]
|
|
58
|
+
except KeyError:
|
|
59
|
+
# We don't, so we need to refresh the key set
|
|
60
|
+
cache.delete(key="get_jwks")
|
|
61
|
+
try:
|
|
62
|
+
_, providers = get_jwks()
|
|
63
|
+
oauth_provider = providers[kid]
|
|
64
|
+
except KeyError:
|
|
65
|
+
raise HowlerValueError("The provider of this access token does not match any supported providers")
|
|
66
|
+
|
|
67
|
+
return oauth_provider
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@cache.cached(timeout=60 * 60 * 12, key_prefix="get_jwks") # Cached for 12hrs
|
|
71
|
+
def get_jwks() -> tuple[dict[str, dict[str, Any]], dict[str, str]]:
|
|
72
|
+
"""Get the JSON Web Key Set for all supported providers
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
tuple[dict[str, str], dict[str, str]]: The JWKS and the providers that are included in it
|
|
76
|
+
"""
|
|
77
|
+
# JWKS = JSON Web Key Set. We merge the key set from all oauth providers
|
|
78
|
+
jwks: dict[str, dict[str, Any]] = {}
|
|
79
|
+
# Mapping of keys to their provider (i.e. azure, keycloak)
|
|
80
|
+
providers: dict[str, str] = {}
|
|
81
|
+
|
|
82
|
+
for (
|
|
83
|
+
provider_name,
|
|
84
|
+
provider_data,
|
|
85
|
+
) in config.auth.oauth.providers.items():
|
|
86
|
+
# Fetch the JSON Web Key Set for each provider that supports them
|
|
87
|
+
if provider_data.jwks_uri:
|
|
88
|
+
provider_jwks: list[dict[str, Any]] = requests.get(provider_data.jwks_uri, timeout=10).json()["keys"]
|
|
89
|
+
for jwk in provider_jwks:
|
|
90
|
+
jwks[jwk["kid"]] = jwk
|
|
91
|
+
providers[jwk["kid"]] = provider_name
|
|
92
|
+
|
|
93
|
+
return (jwks, providers)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_audience(oauth_provider: str) -> str:
|
|
97
|
+
"""Get the audience for the specified OAuth provider
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
oauth_provider (str): The OAuth provider to retrieve the audience of
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
HowlerValueError: The provider is azure, and is improperly formatted
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
str: The audience of the provider
|
|
107
|
+
"""
|
|
108
|
+
audience: str = "howler"
|
|
109
|
+
provider_data = config.auth.oauth.providers[oauth_provider]
|
|
110
|
+
if provider_data.audience:
|
|
111
|
+
audience = provider_data.audience
|
|
112
|
+
elif provider_data.client_id:
|
|
113
|
+
audience = provider_data.client_id
|
|
114
|
+
|
|
115
|
+
if oauth_provider == "azure" and f"{audience}/.default" not in provider_data.scope:
|
|
116
|
+
raise HowlerValueError("Azure scope must contain the <client_id>/.default claim!")
|
|
117
|
+
|
|
118
|
+
return audience
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def decode(
|
|
122
|
+
access_token: str,
|
|
123
|
+
key: Optional[str] = None,
|
|
124
|
+
algorithms: Optional[list[str]] = None,
|
|
125
|
+
audience: Optional[str] = None,
|
|
126
|
+
validate_audience: bool = False,
|
|
127
|
+
**kwargs,
|
|
128
|
+
) -> dict[str, Any]:
|
|
129
|
+
"""Decode an access token into a JSON Web Token dict
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
access_token (str): The access token to decode
|
|
133
|
+
key (Optional[str], optional): The key used to sign the token. Defaults to None.
|
|
134
|
+
algorithms (Optional[list[str]], optional): The algorithm to use when decoding. Defaults to None.
|
|
135
|
+
audience (Optional[str], optional): The audience to check against, if validating the audience. Defaults to None.
|
|
136
|
+
validate_audience (bool, optional): Should we validate the audience? Defaults to False.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
dict[str, Any]: The decoded JWT, in dict format
|
|
140
|
+
"""
|
|
141
|
+
if not key:
|
|
142
|
+
key = get_jwk(access_token).key
|
|
143
|
+
|
|
144
|
+
if not algorithms:
|
|
145
|
+
algorithms = [jwt.get_unverified_header(access_token).get("alg", "HS256")]
|
|
146
|
+
|
|
147
|
+
if validate_audience and not audience:
|
|
148
|
+
audience = get_audience(get_provider(access_token))
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
logger.debug("Validating token against audience %s", audience)
|
|
152
|
+
return jwt.decode(jwt=access_token, key=key, algorithms=algorithms, audience=audience, **kwargs) # type: ignore
|
|
153
|
+
except jwt.ExpiredSignatureError as err:
|
|
154
|
+
logger.info("JWT has expired.")
|
|
155
|
+
raise ForbiddenException("Your JWT has expired.", cause=err)
|
|
156
|
+
except jwt.InvalidTokenError as err:
|
|
157
|
+
logger.exception("Error occurred when decoding JWT.")
|
|
158
|
+
raise HowlerValueError("There was an error when decoding your JWT.", cause=err)
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from hashlib import sha256
|
|
7
|
+
from typing import Any, Literal, Union, cast
|
|
8
|
+
|
|
9
|
+
from elasticsearch._sync.client.indices import IndicesClient
|
|
10
|
+
from luqum.parser import parser
|
|
11
|
+
from luqum.tree import AndOperation, BoolOperation, Phrase, Plus, Prohibit, Range, SearchField, Word
|
|
12
|
+
from luqum.utils import UnknownOperationResolver
|
|
13
|
+
from luqum.visitor import TreeVisitor
|
|
14
|
+
|
|
15
|
+
from howler.api import get_logger
|
|
16
|
+
from howler.common.exceptions import InvalidDataException
|
|
17
|
+
from howler.common.loader import datastore
|
|
18
|
+
from howler.config import redis
|
|
19
|
+
from howler.remote.datatypes.hash import Hash
|
|
20
|
+
from howler.utils.dict_utils import flatten_deep
|
|
21
|
+
from howler.utils.lucene import coerce, normalize_phrase, try_parse_date, try_parse_ip, try_parse_number
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__file__)
|
|
24
|
+
|
|
25
|
+
TRANSPORT_TIMEOUT = int(os.environ.get("HWL_DATASTORE_TRANSPORT_TIMEOUT", "10"))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LuceneProcessor(TreeVisitor):
|
|
29
|
+
"Tree visitor that evaluates a query on a given object"
|
|
30
|
+
|
|
31
|
+
def visit(self, tree: Any, context: dict[str, Any]) -> bool:
|
|
32
|
+
"Visit each node in a tree"
|
|
33
|
+
return super().visit(tree, context)[0]
|
|
34
|
+
|
|
35
|
+
def visit_search_field(self, node: SearchField, context: dict[str, Any]):
|
|
36
|
+
"Handle search fields"
|
|
37
|
+
# The actual validation happens in the word/phrases directly, not the search field.
|
|
38
|
+
# We pass the field name down for use later
|
|
39
|
+
for result in self.generic_visit(node, {**context, "field": node.name}):
|
|
40
|
+
yield result
|
|
41
|
+
|
|
42
|
+
def visit_and_operation(self, node: AndOperation, context: dict[str, Any]):
|
|
43
|
+
"Handle AND results in query"
|
|
44
|
+
yield all(list(self.generic_visit(node, context)))
|
|
45
|
+
|
|
46
|
+
def visit_or_operation(self, node: AndOperation, context: dict[str, Any]):
|
|
47
|
+
"Handle OR results in query"
|
|
48
|
+
yield any(list(self.generic_visit(node, context)))
|
|
49
|
+
|
|
50
|
+
def visit_bool_operation(self, node: BoolOperation, context: dict[str, Any]):
|
|
51
|
+
"""Handle the insanity that is boolean operations.
|
|
52
|
+
|
|
53
|
+
For information about how boolean operations work, see the following extremely helpful article:
|
|
54
|
+
|
|
55
|
+
https://lucidworks.com/resources/solr-boolean-operators/
|
|
56
|
+
|
|
57
|
+
However, we are operating in a boolean environment instead of rankings, so the behaviour is slightly modified.
|
|
58
|
+
"""
|
|
59
|
+
results: list[bool] = []
|
|
60
|
+
for child in node.children:
|
|
61
|
+
child_context = self.child_context(node, child, context)
|
|
62
|
+
for result in self.visit_iter(child, context=child_context):
|
|
63
|
+
# If we run across a MUST or MUST NOT (plus, probhit) object and the value doesn't match, we immediately
|
|
64
|
+
# shortcircuit and return false.
|
|
65
|
+
if isinstance(child, Plus) and not result:
|
|
66
|
+
yield False
|
|
67
|
+
return
|
|
68
|
+
elif isinstance(child, Prohibit) and result:
|
|
69
|
+
yield False
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Otherwise, we use a basic OR operation to return a result.
|
|
73
|
+
results.append(result)
|
|
74
|
+
|
|
75
|
+
yield any(results)
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def __parse_range(low: str, value: Union[list[str], str], high: str) -> Any:
|
|
79
|
+
"Generate the low, value and high components of a range check, ensuring correct types"
|
|
80
|
+
if datetime_result := coerce(value, try_parse_date):
|
|
81
|
+
low_datetime_result = cast(Any, datetime.fromtimestamp(int(low) / 1000, tz=datetime_result.tzinfo))
|
|
82
|
+
|
|
83
|
+
high_datetime_result = datetime.fromtimestamp(int(high) / 1000, tz=datetime_result.tzinfo)
|
|
84
|
+
high_datetime_result += timedelta(milliseconds=1)
|
|
85
|
+
|
|
86
|
+
return low_datetime_result, datetime_result, high_datetime_result
|
|
87
|
+
|
|
88
|
+
if number_result := coerce(value, try_parse_number):
|
|
89
|
+
low_number_result = coerce(low, try_parse_number)
|
|
90
|
+
high_number_result = coerce(high, try_parse_number)
|
|
91
|
+
|
|
92
|
+
if low_number_result is not None and high_number_result is not None:
|
|
93
|
+
return low_number_result, number_result, high_number_result
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Check if the value is a simple integer
|
|
97
|
+
return int(low), coerce(value, int), int(high)
|
|
98
|
+
except ValueError:
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
if ip_result := coerce(value, try_parse_ip):
|
|
102
|
+
low_ip_result = coerce(low, try_parse_ip)
|
|
103
|
+
high_ip_result = coerce(high, try_parse_ip)
|
|
104
|
+
|
|
105
|
+
if low_ip_result is not None and high_ip_result is not None:
|
|
106
|
+
return low_ip_result, ip_result, high_ip_result
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
# Check if the value is a float
|
|
110
|
+
return float(low), coerce(value, float), float(high)
|
|
111
|
+
except ValueError:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
raise InvalidDataException(f"Unknown range type for values {low} - {value} - {high}")
|
|
115
|
+
|
|
116
|
+
def visit_range(self, node: Range, context: dict[str, Any]):
|
|
117
|
+
"Handle range queries"
|
|
118
|
+
low, value, high = self.__parse_range(node.low.value, context["hit"].get(context["field"]), node.high.value)
|
|
119
|
+
|
|
120
|
+
if isinstance(value, list):
|
|
121
|
+
values = value
|
|
122
|
+
else:
|
|
123
|
+
values = [value]
|
|
124
|
+
|
|
125
|
+
result = False
|
|
126
|
+
for _value in values:
|
|
127
|
+
if low <= _value and _value <= high:
|
|
128
|
+
if not node.include_high and _value == high:
|
|
129
|
+
continue
|
|
130
|
+
elif not node.include_low and _value == low:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
result = True
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
yield result
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def __sanitize_value(value: str) -> str:
|
|
140
|
+
"Sanitize the value we are validating against"
|
|
141
|
+
# True/False are shorthanded by elastic - convert back to True/False
|
|
142
|
+
sanitized_value = re.sub(r"^F$", r"False", value)
|
|
143
|
+
sanitized_value = re.sub(r"^T$", r"True", sanitized_value)
|
|
144
|
+
|
|
145
|
+
# For phrases, remove the encapsulating quotations
|
|
146
|
+
sanitized_value = re.sub(r'"(.+)"', r"\1", sanitized_value)
|
|
147
|
+
|
|
148
|
+
# Unescape escaped colons in value
|
|
149
|
+
sanitized_value = sanitized_value.replace("\\:", ":")
|
|
150
|
+
|
|
151
|
+
return sanitized_value
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def __build_candidates(value: Union[list[str], str], type: Union[Literal["phrase"], Literal["word"]]) -> list[str]:
|
|
155
|
+
candidates: list[str] = []
|
|
156
|
+
if isinstance(value, list):
|
|
157
|
+
for entry in value:
|
|
158
|
+
candidates += normalize_phrase(str(entry), type)
|
|
159
|
+
else:
|
|
160
|
+
candidates = normalize_phrase(str(value), type)
|
|
161
|
+
|
|
162
|
+
return candidates
|
|
163
|
+
|
|
164
|
+
def __handle_word_or_phrase(self, node: Union[Phrase, Word], context: dict[str, Any]):
|
|
165
|
+
sanitized_value = self.__sanitize_value(node.value)
|
|
166
|
+
|
|
167
|
+
if "field" not in context:
|
|
168
|
+
yield any(value == sanitized_value for value in context["hit"].values())
|
|
169
|
+
elif context["field"] == "_exists_":
|
|
170
|
+
yield context["hit"].get(node.value) is not None
|
|
171
|
+
else:
|
|
172
|
+
candidates = self.__build_candidates(context["hit"].get(context["field"]), context["term_type"])
|
|
173
|
+
|
|
174
|
+
yield len(fnmatch.filter(candidates, sanitized_value)) > 0
|
|
175
|
+
|
|
176
|
+
def visit_word(self, node: Phrase, context: dict[str, Any]):
|
|
177
|
+
"Handle words"
|
|
178
|
+
yield from self.__handle_word_or_phrase(node, {**context, "term_type": "word"})
|
|
179
|
+
|
|
180
|
+
def visit_phrase(self, node: Phrase, context: dict[str, Any]):
|
|
181
|
+
"Handle phrases"
|
|
182
|
+
yield from self.__handle_word_or_phrase(node, {**context, "term_type": "phrase"})
|
|
183
|
+
|
|
184
|
+
def visit_prohibit(self, node: Prohibit, context: dict[str, Any]):
|
|
185
|
+
"Handle NOT operation"
|
|
186
|
+
yield from (not entry for entry in self.generic_visit(node, context))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
NORMALIZED_QUERY_CACHE: Hash[str] = Hash("normalized_queries", redis)
|
|
190
|
+
|
|
191
|
+
SEARCH_PHRASE_CACHE: dict[str, re.Match[str]] = {}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def replace_lucene_phrase(match: re.Match[str]) -> str:
|
|
195
|
+
"Replace a phrase in lucene with its sha256 hash, to circumvent mangling by ES"
|
|
196
|
+
result = match.group(2) or ""
|
|
197
|
+
|
|
198
|
+
value = match.group(3)
|
|
199
|
+
|
|
200
|
+
if try_parse_date(value.replace('"', "")):
|
|
201
|
+
result += value
|
|
202
|
+
elif try_parse_ip(value.replace('"', "")):
|
|
203
|
+
result += value.replace(":", "@colon")
|
|
204
|
+
else:
|
|
205
|
+
key = sha256(value.encode()).hexdigest()
|
|
206
|
+
|
|
207
|
+
SEARCH_PHRASE_CACHE[key] = match
|
|
208
|
+
|
|
209
|
+
result += key
|
|
210
|
+
|
|
211
|
+
result += match.group(4) or ""
|
|
212
|
+
|
|
213
|
+
return result
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def try_reinsert_lucene_phrase(match: re.Match[str]) -> str:
|
|
217
|
+
"Given a potential sha256 hash, replace that hash with the original lucene phrase (if it exists)"
|
|
218
|
+
key = match.group(1)
|
|
219
|
+
|
|
220
|
+
if key in SEARCH_PHRASE_CACHE:
|
|
221
|
+
return SEARCH_PHRASE_CACHE[key].group(3)
|
|
222
|
+
else:
|
|
223
|
+
return key
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def match(lucene: str, obj: dict[str, Any]):
|
|
227
|
+
"Check if a given lucene query matches the given object"
|
|
228
|
+
hash_key = sha256(lucene.encode()).hexdigest()
|
|
229
|
+
|
|
230
|
+
# We cache the results back from ES, since we will frequently run the same validation queries over and over again.
|
|
231
|
+
if (normalized_query := NORMALIZED_QUERY_CACHE.get(hash_key)) is None or "pytest" in sys.modules:
|
|
232
|
+
# This regex checks for lucene phrases (i.e. the "Example Analytic" part of howler.analytic:"Example Analytic")
|
|
233
|
+
# And then escapes them.
|
|
234
|
+
# https://regex101.com/r/8u5F6a/1
|
|
235
|
+
escaped_lucene = re.sub(r'((:\()?(".+?")(\)?))', replace_lucene_phrase, lucene)
|
|
236
|
+
|
|
237
|
+
# This may seem unintuitive, but elastic parses lucene queries in somewhat nonstandard ways (or at least,
|
|
238
|
+
# in ways luqum doesn't agree with). to circumvent this, we use validate_query, which returns a "normalized"
|
|
239
|
+
# query that works much better with luqum. It's also much faster than actually searching for the hit in
|
|
240
|
+
# question.
|
|
241
|
+
indices_client = IndicesClient(datastore().hit.datastore.client)
|
|
242
|
+
result = indices_client.validate_query(q=escaped_lucene, explain=True, index=datastore().hit.index_name)
|
|
243
|
+
|
|
244
|
+
if not result["valid"]:
|
|
245
|
+
logger.error("Invalid lucene query:\n%s", result["explanations"][0]["error"])
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
# As an example, the query:
|
|
249
|
+
# server.address:("supports" OR "their") AND howler.votes.benign:("edge" OR "also")
|
|
250
|
+
# becomes:
|
|
251
|
+
# +(server.address:supports server.address:their) +(howler.votes.benign:edge howler.votes.benign:also)
|
|
252
|
+
# which means the two are equivalent in elastic, but the second one is a lot less ambiguous to parse.
|
|
253
|
+
normalized_query = cast(str, result["explanations"][0]["explanation"])
|
|
254
|
+
|
|
255
|
+
# Elastic's explanation mangles exists queries. Since we will handle them the normal way, reset their changes
|
|
256
|
+
normalized_query = re.sub(r"FieldExistsQuery *\[.*?field=(.+?)]", r"_exists_:\1", normalized_query)
|
|
257
|
+
normalized_query = re.sub(r"ConstantScore", "", normalized_query)
|
|
258
|
+
# try and reinsert any phrases we have replaced with sha256 hashes
|
|
259
|
+
normalized_query = re.sub(r"([0-9a-f]{64})", try_reinsert_lucene_phrase, normalized_query)
|
|
260
|
+
|
|
261
|
+
# Properly convert escaped colons back
|
|
262
|
+
normalized_query = normalized_query.replace("@colon", ":")
|
|
263
|
+
|
|
264
|
+
# Cache the normalized query
|
|
265
|
+
NORMALIZED_QUERY_CACHE.set(hash_key, normalized_query)
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
# luqum's default tree will return UnknownOperations in cases where expilicit operators aren't used.
|
|
269
|
+
# Due to the normalization step undertaken by elastic, we know that all unknown operations are actually
|
|
270
|
+
# Boolean operations.
|
|
271
|
+
#
|
|
272
|
+
# NOTE: Boolean operations have a special meaning in lucene, and are not analgous to and/or operations.
|
|
273
|
+
# For more information, see: https://lucidworks.com/resources/solr-boolean-operators/
|
|
274
|
+
tree = UnknownOperationResolver(resolve_to=BoolOperation)(parser.parse(normalized_query))
|
|
275
|
+
|
|
276
|
+
# Actually run the validation
|
|
277
|
+
return LuceneProcessor(track_parents=True).visit(tree, {"hit": flatten_deep(obj)})
|
|
278
|
+
except Exception:
|
|
279
|
+
logger.exception("Exception on processing lucene:")
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __name__ == "__main__":
|
|
284
|
+
hit = datastore().hit.search("howler.id:*", rows=1, as_obj=False)["items"][0]
|
|
285
|
+
|
|
286
|
+
print(match(sys.argv[1], hit)) # noqa: T201
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from typing import Any, Callable, Optional
|
|
2
|
+
|
|
3
|
+
import chevron
|
|
4
|
+
import requests
|
|
5
|
+
from flask import request
|
|
6
|
+
|
|
7
|
+
from howler.common.exceptions import AuthenticationException, HowlerRuntimeError, HowlerValueError
|
|
8
|
+
from howler.common.logging import get_logger
|
|
9
|
+
from howler.config import cache, config
|
|
10
|
+
from howler.odm.models.analytic import Analytic
|
|
11
|
+
from howler.plugins import get_plugins
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__file__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@cache.memoize(15 * 60)
|
|
17
|
+
def get_token(access_token: str) -> str:
|
|
18
|
+
"""Get a notebook token based on the current howler token"""
|
|
19
|
+
get_notebook_token: Optional[Callable[[str], str]] = None
|
|
20
|
+
|
|
21
|
+
for plugin in get_plugins():
|
|
22
|
+
if get_notebook_token := plugin.modules.token_functions.get("notebook", None):
|
|
23
|
+
break
|
|
24
|
+
else:
|
|
25
|
+
logger.info("Plugin %s does not modify the notebook access token.")
|
|
26
|
+
|
|
27
|
+
if get_notebook_token:
|
|
28
|
+
notebook_access_token = get_notebook_token(access_token)
|
|
29
|
+
else:
|
|
30
|
+
logger.info("No custom notebook token logic provided, continuing with howler credentials")
|
|
31
|
+
notebook_access_token = access_token
|
|
32
|
+
|
|
33
|
+
return notebook_access_token
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_nbgallery_nb(link: str):
|
|
37
|
+
"""Get a notebook from a given nbgallery link"""
|
|
38
|
+
# /notebooks/1-example-nb
|
|
39
|
+
# get the id (1)
|
|
40
|
+
nb_id = link.rsplit("/", 1)[-1].rsplit("-")[0]
|
|
41
|
+
auth_data: Optional[str] = request.headers.get("Authorization", None, type=str)
|
|
42
|
+
|
|
43
|
+
if not auth_data:
|
|
44
|
+
raise AuthenticationException("No Authorization header present")
|
|
45
|
+
|
|
46
|
+
access_token = get_token(auth_data.split(" ")[1])
|
|
47
|
+
|
|
48
|
+
# use obo token to retrieve notebook value
|
|
49
|
+
notebook_req = requests.get(
|
|
50
|
+
f"{config.core.notebook.url}/notebooks/{nb_id}/download.json",
|
|
51
|
+
headers={
|
|
52
|
+
"accept": "application/json",
|
|
53
|
+
"Authorization": f"Bearer {access_token}",
|
|
54
|
+
},
|
|
55
|
+
timeout=5,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if notebook_req.ok:
|
|
59
|
+
notebook: dict[str, Any] = notebook_req.json()
|
|
60
|
+
|
|
61
|
+
name = notebook["metadata"]["gallery"]["title"]
|
|
62
|
+
|
|
63
|
+
return (notebook, name)
|
|
64
|
+
else:
|
|
65
|
+
return None, None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_user_envs():
|
|
69
|
+
"""Get a user's environments from nbgallery"""
|
|
70
|
+
auth_data: Optional[str] = request.headers.get("Authorization", None, type=str)
|
|
71
|
+
|
|
72
|
+
if not auth_data:
|
|
73
|
+
raise AuthenticationException("No Authorization header present")
|
|
74
|
+
|
|
75
|
+
access_token = get_token(auth_data.split(" ")[1])
|
|
76
|
+
|
|
77
|
+
# get environment info from jupyterhub
|
|
78
|
+
# how to get environment without nbgallery?
|
|
79
|
+
# https://nbgallery.dev.analysis.cyber.gc.ca/environments.json
|
|
80
|
+
env = requests.get(
|
|
81
|
+
f"{config.core.notebook.url}/environments.json",
|
|
82
|
+
headers={
|
|
83
|
+
"accept": "application/json",
|
|
84
|
+
"Authorization": f"Bearer {access_token}",
|
|
85
|
+
},
|
|
86
|
+
timeout=5,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if env.ok:
|
|
90
|
+
env = env.json()
|
|
91
|
+
else:
|
|
92
|
+
raise HowlerRuntimeError(f"NBGallery returned {env.status_code}")
|
|
93
|
+
|
|
94
|
+
return env
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_nb_information(nb_link: str, analytic: Analytic, hit: dict[str, Any]):
|
|
98
|
+
"""Get a information about a notebook from nbgallery"""
|
|
99
|
+
# get notebook
|
|
100
|
+
# only from nbgallery for now
|
|
101
|
+
if "nbgallery" in nb_link:
|
|
102
|
+
json_content, name = get_nbgallery_nb(nb_link)
|
|
103
|
+
else:
|
|
104
|
+
raise HowlerValueError("Invalid notebook source")
|
|
105
|
+
|
|
106
|
+
if not json_content or not name:
|
|
107
|
+
raise HowlerRuntimeError("An error occurred when retrieving the notebook")
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
# patch first node containing code with hit/analytic info
|
|
111
|
+
cell_to_template = next(filter(lambda cell: cell["cell_type"] == "code", json_content["cells"]))
|
|
112
|
+
# goal: support any field from a hit/analytic object
|
|
113
|
+
cell_to_template["source"] = chevron.render(cell_to_template["source"], {"hit": hit, "analytic": analytic})
|
|
114
|
+
except StopIteration as e:
|
|
115
|
+
raise HowlerValueError("Notebook doesn't contain a cell with code.", e)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
raise HowlerRuntimeError("Unexpected error while processing notebook.", e)
|
|
118
|
+
|
|
119
|
+
return (json_content, name)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Any, Union
|
|
2
|
+
|
|
3
|
+
from howler.common.loader import datastore
|
|
4
|
+
from howler.common.logging import get_logger
|
|
5
|
+
from howler.datastore.exceptions import SearchException
|
|
6
|
+
from howler.odm.models.hit import Hit
|
|
7
|
+
from howler.odm.models.overview import Overview
|
|
8
|
+
from howler.utils.str_utils import sanitize_lucene_query
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__file__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_matching_overviews(
|
|
14
|
+
hits: Union[list[Hit], list[dict[str, Any]]], as_odm: bool = False
|
|
15
|
+
) -> Union[list[dict[str, Any]], list[Overview]]:
|
|
16
|
+
"""Generate a list of overviews matching a given list of analytic names from the provided hits.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
hits (list[Hit] | list[dict[str, Any]]): A list of Hit objects or dictionaries containing analytic information.
|
|
20
|
+
as_odm (bool, optional): If True, return Overview objects; otherwise, return dictionaries. Defaults to False.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
list[dict[str, Any]] | list[Overview]: A list of matching overviews, either as dictionaries or Overview objects.
|
|
24
|
+
"""
|
|
25
|
+
if len(hits) < 1:
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
analytic_names: set[str] = set()
|
|
29
|
+
for hit in hits:
|
|
30
|
+
analytic_names.add(f'"{sanitize_lucene_query(hit["howler"]["analytic"])}"')
|
|
31
|
+
|
|
32
|
+
if len(analytic_names) < 1:
|
|
33
|
+
return []
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
overview_candidates = datastore().overview.search(
|
|
37
|
+
f"analytic:({' OR '.join(analytic_names)})",
|
|
38
|
+
as_obj=as_odm,
|
|
39
|
+
)["items"]
|
|
40
|
+
|
|
41
|
+
return overview_candidates
|
|
42
|
+
except SearchException:
|
|
43
|
+
logger.exception("Exception on analytic matching")
|
|
44
|
+
return []
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
|
+
|
|
3
|
+
from howler.common.loader import datastore
|
|
4
|
+
from howler.common.logging import get_logger
|
|
5
|
+
from howler.datastore.exceptions import SearchException
|
|
6
|
+
from howler.odm.models.analytic import Analytic
|
|
7
|
+
from howler.odm.models.hit import Hit
|
|
8
|
+
from howler.utils.str_utils import sanitize_lucene_query
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__file__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_matching_templates(
|
|
14
|
+
hits: Union[list[Hit], list[dict[str, Any]]], uname: Optional[str] = None, as_odm: bool = False
|
|
15
|
+
) -> Union[list[dict[str, Any]], list[Analytic]]:
|
|
16
|
+
"""Generate a list of templates matching a given list of analytic names, and optionally a user.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
hits (list[Hit] | list[dict[str, Any]]]: List of hits, each containing analytic information.
|
|
20
|
+
uname (Optional[str], optional): Username to filter templates by owner. Defaults to None.
|
|
21
|
+
as_odm (bool, optional): If True, return results as ODM objects. If False, return as dicts. Defaults to False.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
list[dict[str, Any]] | list[Analytic]: List of matching templates, either as dicts or Analytic ODM objects.
|
|
25
|
+
"""
|
|
26
|
+
if len(hits) < 1:
|
|
27
|
+
return []
|
|
28
|
+
|
|
29
|
+
analytic_names: set[str] = set()
|
|
30
|
+
for hit in hits:
|
|
31
|
+
analytic_names.add(f'"{sanitize_lucene_query(hit["howler"]["analytic"])}"')
|
|
32
|
+
|
|
33
|
+
if len(analytic_names) < 1:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
template_candidates = datastore().template.search(
|
|
38
|
+
f"analytic:({' OR '.join(analytic_names)}) AND (type:global OR owner:{uname or '*'})",
|
|
39
|
+
as_obj=as_odm,
|
|
40
|
+
)["items"]
|
|
41
|
+
|
|
42
|
+
return template_candidates
|
|
43
|
+
except SearchException:
|
|
44
|
+
logger.exception("Exception on analytic matching")
|
|
45
|
+
return []
|