howler-api 2.13.0.dev329__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of howler-api might be problematic. Click here for more details.
- howler/__init__.py +0 -0
- howler/actions/__init__.py +167 -0
- howler/actions/add_label.py +111 -0
- howler/actions/add_to_bundle.py +159 -0
- howler/actions/change_field.py +76 -0
- howler/actions/demote.py +160 -0
- howler/actions/example_plugin.py +104 -0
- howler/actions/prioritization.py +93 -0
- howler/actions/promote.py +147 -0
- howler/actions/remove_from_bundle.py +133 -0
- howler/actions/remove_label.py +111 -0
- howler/actions/transition.py +200 -0
- howler/api/__init__.py +249 -0
- howler/api/base.py +88 -0
- howler/api/socket.py +114 -0
- howler/api/v1/__init__.py +97 -0
- howler/api/v1/action.py +372 -0
- howler/api/v1/analytic.py +748 -0
- howler/api/v1/auth.py +382 -0
- howler/api/v1/borealis.py +101 -0
- howler/api/v1/configs.py +55 -0
- howler/api/v1/dossier.py +222 -0
- howler/api/v1/help.py +28 -0
- howler/api/v1/hit.py +1181 -0
- howler/api/v1/notebook.py +82 -0
- howler/api/v1/overview.py +191 -0
- howler/api/v1/search.py +715 -0
- howler/api/v1/template.py +206 -0
- howler/api/v1/tool.py +183 -0
- howler/api/v1/user.py +414 -0
- howler/api/v1/utils/__init__.py +0 -0
- howler/api/v1/utils/etag.py +84 -0
- howler/api/v1/view.py +288 -0
- howler/app.py +235 -0
- howler/common/README.md +144 -0
- howler/common/__init__.py +0 -0
- howler/common/classification.py +979 -0
- howler/common/classification.yml +107 -0
- howler/common/exceptions.py +167 -0
- howler/common/hexdump.py +48 -0
- howler/common/iprange.py +171 -0
- howler/common/loader.py +154 -0
- howler/common/logging/__init__.py +241 -0
- howler/common/logging/audit.py +138 -0
- howler/common/logging/format.py +38 -0
- howler/common/net.py +79 -0
- howler/common/net_static.py +1494 -0
- howler/common/random_user.py +316 -0
- howler/common/swagger.py +117 -0
- howler/config.py +64 -0
- howler/cronjobs/__init__.py +29 -0
- howler/cronjobs/retention.py +61 -0
- howler/cronjobs/rules.py +274 -0
- howler/cronjobs/view_cleanup.py +88 -0
- howler/datastore/README.md +112 -0
- howler/datastore/__init__.py +0 -0
- howler/datastore/bulk.py +72 -0
- howler/datastore/collection.py +2327 -0
- howler/datastore/constants.py +117 -0
- howler/datastore/exceptions.py +41 -0
- howler/datastore/howler_store.py +105 -0
- howler/datastore/migrations/fix_process.py +41 -0
- howler/datastore/operations.py +130 -0
- howler/datastore/schemas.py +90 -0
- howler/datastore/store.py +231 -0
- howler/datastore/support/__init__.py +0 -0
- howler/datastore/support/build.py +214 -0
- howler/datastore/support/schemas.py +90 -0
- howler/datastore/types.py +22 -0
- howler/error.py +91 -0
- howler/external/__init__.py +0 -0
- howler/external/generate_mitre.py +96 -0
- howler/external/generate_sigma_rules.py +31 -0
- howler/external/generate_tlds.py +47 -0
- howler/external/reindex_data.py +46 -0
- howler/external/wipe_databases.py +58 -0
- howler/gunicorn_config.py +25 -0
- howler/healthz.py +47 -0
- howler/helper/__init__.py +0 -0
- howler/helper/azure.py +50 -0
- howler/helper/discover.py +59 -0
- howler/helper/hit.py +236 -0
- howler/helper/oauth.py +247 -0
- howler/helper/search.py +92 -0
- howler/helper/workflow.py +110 -0
- howler/helper/ws.py +378 -0
- howler/odm/README.md +102 -0
- howler/odm/__init__.py +1 -0
- howler/odm/base.py +1504 -0
- howler/odm/charter.txt +146 -0
- howler/odm/helper.py +416 -0
- howler/odm/howler_enum.py +25 -0
- howler/odm/models/__init__.py +0 -0
- howler/odm/models/action.py +33 -0
- howler/odm/models/analytic.py +90 -0
- howler/odm/models/assemblyline.py +48 -0
- howler/odm/models/aws.py +23 -0
- howler/odm/models/azure.py +16 -0
- howler/odm/models/cbs.py +44 -0
- howler/odm/models/config.py +558 -0
- howler/odm/models/dossier.py +33 -0
- howler/odm/models/ecs/__init__.py +0 -0
- howler/odm/models/ecs/agent.py +17 -0
- howler/odm/models/ecs/autonomous_system.py +16 -0
- howler/odm/models/ecs/client.py +149 -0
- howler/odm/models/ecs/cloud.py +141 -0
- howler/odm/models/ecs/code_signature.py +27 -0
- howler/odm/models/ecs/container.py +32 -0
- howler/odm/models/ecs/dns.py +62 -0
- howler/odm/models/ecs/egress.py +10 -0
- howler/odm/models/ecs/elf.py +74 -0
- howler/odm/models/ecs/email.py +122 -0
- howler/odm/models/ecs/error.py +14 -0
- howler/odm/models/ecs/event.py +140 -0
- howler/odm/models/ecs/faas.py +24 -0
- howler/odm/models/ecs/file.py +84 -0
- howler/odm/models/ecs/geo.py +30 -0
- howler/odm/models/ecs/group.py +18 -0
- howler/odm/models/ecs/hash.py +16 -0
- howler/odm/models/ecs/host.py +17 -0
- howler/odm/models/ecs/http.py +37 -0
- howler/odm/models/ecs/ingress.py +12 -0
- howler/odm/models/ecs/interface.py +21 -0
- howler/odm/models/ecs/network.py +30 -0
- howler/odm/models/ecs/observer.py +45 -0
- howler/odm/models/ecs/organization.py +12 -0
- howler/odm/models/ecs/os.py +21 -0
- howler/odm/models/ecs/pe.py +17 -0
- howler/odm/models/ecs/process.py +216 -0
- howler/odm/models/ecs/registry.py +26 -0
- howler/odm/models/ecs/related.py +45 -0
- howler/odm/models/ecs/rule.py +51 -0
- howler/odm/models/ecs/server.py +24 -0
- howler/odm/models/ecs/threat.py +247 -0
- howler/odm/models/ecs/tls.py +58 -0
- howler/odm/models/ecs/url.py +51 -0
- howler/odm/models/ecs/user.py +57 -0
- howler/odm/models/ecs/user_agent.py +20 -0
- howler/odm/models/ecs/vulnerability.py +41 -0
- howler/odm/models/gcp.py +16 -0
- howler/odm/models/hit.py +356 -0
- howler/odm/models/howler_data.py +328 -0
- howler/odm/models/lead.py +33 -0
- howler/odm/models/localized_label.py +13 -0
- howler/odm/models/overview.py +16 -0
- howler/odm/models/pivot.py +40 -0
- howler/odm/models/template.py +24 -0
- howler/odm/models/user.py +83 -0
- howler/odm/models/view.py +34 -0
- howler/odm/random_data.py +888 -0
- howler/odm/randomizer.py +606 -0
- howler/patched.py +5 -0
- howler/plugins/__init__.py +25 -0
- howler/plugins/config.py +123 -0
- howler/remote/__init__.py +0 -0
- howler/remote/datatypes/README.md +355 -0
- howler/remote/datatypes/__init__.py +98 -0
- howler/remote/datatypes/counters.py +63 -0
- howler/remote/datatypes/events.py +66 -0
- howler/remote/datatypes/hash.py +206 -0
- howler/remote/datatypes/lock.py +42 -0
- howler/remote/datatypes/queues/__init__.py +0 -0
- howler/remote/datatypes/queues/comms.py +59 -0
- howler/remote/datatypes/queues/multi.py +32 -0
- howler/remote/datatypes/queues/named.py +93 -0
- howler/remote/datatypes/queues/priority.py +215 -0
- howler/remote/datatypes/set.py +118 -0
- howler/remote/datatypes/user_quota_tracker.py +54 -0
- howler/security/__init__.py +253 -0
- howler/security/socket.py +108 -0
- howler/security/utils.py +185 -0
- howler/services/__init__.py +0 -0
- howler/services/action_service.py +111 -0
- howler/services/analytic_service.py +128 -0
- howler/services/auth_service.py +323 -0
- howler/services/config_service.py +128 -0
- howler/services/dossier_service.py +252 -0
- howler/services/event_service.py +93 -0
- howler/services/hit_service.py +893 -0
- howler/services/jwt_service.py +158 -0
- howler/services/lucene_service.py +286 -0
- howler/services/notebook_service.py +119 -0
- howler/services/overview_service.py +44 -0
- howler/services/template_service.py +45 -0
- howler/services/user_service.py +330 -0
- howler/utils/__init__.py +0 -0
- howler/utils/annotations.py +28 -0
- howler/utils/chunk.py +38 -0
- howler/utils/dict_utils.py +200 -0
- howler/utils/isotime.py +17 -0
- howler/utils/list_utils.py +11 -0
- howler/utils/lucene.py +77 -0
- howler/utils/path.py +27 -0
- howler/utils/socket_utils.py +61 -0
- howler/utils/str_utils.py +256 -0
- howler/utils/uid.py +47 -0
- howler_api-2.13.0.dev329.dist-info/METADATA +71 -0
- howler_api-2.13.0.dev329.dist-info/RECORD +200 -0
- howler_api-2.13.0.dev329.dist-info/WHEEL +4 -0
- howler_api-2.13.0.dev329.dist-info/entry_points.txt +8 -0
howler/cronjobs/rules.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import random
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
from apscheduler.schedulers.base import BaseScheduler
|
|
11
|
+
from apscheduler.triggers.cron import CronTrigger
|
|
12
|
+
from pytz import timezone
|
|
13
|
+
from sigma.backends.elasticsearch import LuceneBackend
|
|
14
|
+
from sigma.rule import SigmaRule
|
|
15
|
+
from yaml.scanner import ScannerError
|
|
16
|
+
|
|
17
|
+
from howler.common.exceptions import HowlerValueError
|
|
18
|
+
from howler.common.loader import datastore
|
|
19
|
+
from howler.common.logging import get_logger
|
|
20
|
+
from howler.config import DEBUG, HWL_ENABLE_RULES
|
|
21
|
+
from howler.datastore.collection import ESCollection
|
|
22
|
+
from howler.datastore.operations import OdmHelper, OdmUpdateOperation
|
|
23
|
+
from howler.odm.models.analytic import Analytic
|
|
24
|
+
from howler.odm.models.hit import Hit
|
|
25
|
+
from howler.odm.models.howler_data import HitOperationType
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__file__)
|
|
28
|
+
hit_helper = OdmHelper(Hit)
|
|
29
|
+
|
|
30
|
+
__scheduler_instance: Optional[BaseScheduler] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_correlated_bundle(rule: Analytic, query: str, correlated_hits: list[Hit]):
|
|
34
|
+
"Create a bundle based on the results of an analytic"
|
|
35
|
+
# We'll create a hash using the hashes of the children, and the analytic ID/current time
|
|
36
|
+
bundle_hash = hashlib.sha256()
|
|
37
|
+
bundle_hash.update(rule.analytic_id.encode())
|
|
38
|
+
bundle_hash.update(query.replace("now", datetime.now().isoformat()).encode())
|
|
39
|
+
for match in correlated_hits:
|
|
40
|
+
bundle_hash.update(match.howler.hash.encode())
|
|
41
|
+
|
|
42
|
+
hashed = bundle_hash.hexdigest()
|
|
43
|
+
|
|
44
|
+
# If a matching bundle exists already, just reused it (likely only ever lucene specific)
|
|
45
|
+
existing_result = datastore().hit.search(f"howler.hash:{hashed}", rows=1)
|
|
46
|
+
if existing_result["total"] > 0:
|
|
47
|
+
logger.debug(f"Rule hash {hashed} exists - skipping create")
|
|
48
|
+
return existing_result["items"][0]
|
|
49
|
+
|
|
50
|
+
child_ids = [match.howler.id for match in correlated_hits]
|
|
51
|
+
|
|
52
|
+
correlated_bundle = Hit(
|
|
53
|
+
{
|
|
54
|
+
"howler.analytic": rule.name,
|
|
55
|
+
"howler.detection": "Rule",
|
|
56
|
+
"howler.score": 0.0,
|
|
57
|
+
"howler.hash": hashed,
|
|
58
|
+
"howler.is_bundle": True,
|
|
59
|
+
"howler.hits": child_ids,
|
|
60
|
+
"howler.data": [
|
|
61
|
+
json.dumps(
|
|
62
|
+
{
|
|
63
|
+
"raw": rule.rule,
|
|
64
|
+
"sanitized": query,
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
],
|
|
68
|
+
"event.created": "NOW",
|
|
69
|
+
"event.kind": "alert",
|
|
70
|
+
"event.module": rule.rule_type,
|
|
71
|
+
"event.provider": "howler",
|
|
72
|
+
"event.reason": f"Children match {query}",
|
|
73
|
+
"event.type": ["info"],
|
|
74
|
+
}
|
|
75
|
+
)
|
|
76
|
+
correlated_bundle.event.id = correlated_bundle.howler.id
|
|
77
|
+
|
|
78
|
+
datastore().hit.save(correlated_bundle.howler.id, correlated_bundle)
|
|
79
|
+
|
|
80
|
+
if len(child_ids) > 0:
|
|
81
|
+
datastore().hit.update_by_query(
|
|
82
|
+
f"howler.id:({' OR '.join(child_ids)})",
|
|
83
|
+
[
|
|
84
|
+
hit_helper.list_add(
|
|
85
|
+
"howler.bundles",
|
|
86
|
+
correlated_bundle.howler.id,
|
|
87
|
+
if_missing=True,
|
|
88
|
+
),
|
|
89
|
+
OdmUpdateOperation(
|
|
90
|
+
ESCollection.UPDATE_APPEND,
|
|
91
|
+
"howler.log",
|
|
92
|
+
{
|
|
93
|
+
"timestamp": "NOW",
|
|
94
|
+
"key": "howler.bundles",
|
|
95
|
+
"explanation": f"This hit was correlated by the analytic '{rule.name}'.",
|
|
96
|
+
"new_value": correlated_bundle.howler.id,
|
|
97
|
+
"previous_value": "None",
|
|
98
|
+
"type": HitOperationType.APPENDED,
|
|
99
|
+
"user": "Howler",
|
|
100
|
+
},
|
|
101
|
+
),
|
|
102
|
+
],
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return correlated_bundle
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def create_executor(rule: Analytic): # noqa: C901
|
|
109
|
+
"Create a cronjob for a given analytic"
|
|
110
|
+
|
|
111
|
+
def execute(): # noqa: C901
|
|
112
|
+
"Execute the rule"
|
|
113
|
+
try:
|
|
114
|
+
if not rule.rule or not rule.rule_type:
|
|
115
|
+
logger.error("Invalid rule %s! Skipping", rule.analytic_id)
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
logger.info(
|
|
119
|
+
"Executing rule %s (%s)",
|
|
120
|
+
rule.name,
|
|
121
|
+
rule.analytic_id,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
correlated_hits: Optional[list[Hit]] = None
|
|
125
|
+
|
|
126
|
+
if rule.rule_type in ["lucene", "sigma"]:
|
|
127
|
+
if rule.rule_type == "lucene":
|
|
128
|
+
query = re.sub(r"\n+", " ", re.sub(r"#.+", "", rule.rule)).strip()
|
|
129
|
+
else:
|
|
130
|
+
try:
|
|
131
|
+
sigma_rule = SigmaRule.from_yaml(rule.rule)
|
|
132
|
+
except ScannerError as e:
|
|
133
|
+
raise HowlerValueError(
|
|
134
|
+
f"Error when parsing yaml: {e.problem} {e.problem_mark}",
|
|
135
|
+
cause=e,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
es_collection = datastore().hit
|
|
139
|
+
lucene_queries = LuceneBackend(index_names=[es_collection.index_name]).convert_rule(sigma_rule)
|
|
140
|
+
|
|
141
|
+
query = " AND ".join([f"({q})" for q in lucene_queries])
|
|
142
|
+
|
|
143
|
+
num_hits = datastore().hit.search(query, rows=1)["total"]
|
|
144
|
+
if num_hits > 0:
|
|
145
|
+
bundle = create_correlated_bundle(rule, query, [])
|
|
146
|
+
datastore().hit.update_by_query(
|
|
147
|
+
f"({query}) AND -howler.bundles:{bundle.howler.id}",
|
|
148
|
+
[
|
|
149
|
+
hit_helper.list_add(
|
|
150
|
+
"howler.bundles",
|
|
151
|
+
bundle.howler.id,
|
|
152
|
+
if_missing=True,
|
|
153
|
+
),
|
|
154
|
+
OdmUpdateOperation(
|
|
155
|
+
ESCollection.UPDATE_APPEND,
|
|
156
|
+
"howler.log",
|
|
157
|
+
{
|
|
158
|
+
"timestamp": "NOW",
|
|
159
|
+
"key": "howler.bundles",
|
|
160
|
+
"explanation": f"This hit was correlated by the analytic '{rule.name}'.",
|
|
161
|
+
"new_value": bundle.howler.id,
|
|
162
|
+
"previous_value": "None",
|
|
163
|
+
"type": HitOperationType.APPENDED,
|
|
164
|
+
"user": "Howler",
|
|
165
|
+
},
|
|
166
|
+
),
|
|
167
|
+
],
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
datastore().hit.commit()
|
|
171
|
+
|
|
172
|
+
child_hits: list[Hit] = datastore().hit.search(
|
|
173
|
+
f"howler.bundles:{bundle.howler.id}", rows=1000, fl="howler.id"
|
|
174
|
+
)["items"]
|
|
175
|
+
datastore().hit.update_by_query(
|
|
176
|
+
f"howler.id:{bundle.howler.id}",
|
|
177
|
+
[hit_helper.list_add("howler.hits", hit.howler.id, if_missing=True) for hit in child_hits],
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
elif rule.rule_type == "eql":
|
|
181
|
+
query = rule.rule
|
|
182
|
+
|
|
183
|
+
result = datastore().hit.raw_eql_search(query, rows=25, fl=",".join(Hit.flat_fields().keys()))
|
|
184
|
+
|
|
185
|
+
if len(result["sequences"]) > 0:
|
|
186
|
+
for sequence in result["sequences"]:
|
|
187
|
+
if len(sequence) > 0:
|
|
188
|
+
create_correlated_bundle(rule, query, sequence)
|
|
189
|
+
|
|
190
|
+
correlated_hits = result["items"]
|
|
191
|
+
|
|
192
|
+
else: # pragma: no cover
|
|
193
|
+
raise HowlerValueError(f"Unknown rule type: {rule.rule_type}") # noqa: TRY301
|
|
194
|
+
|
|
195
|
+
if correlated_hits and len(correlated_hits) > 0:
|
|
196
|
+
create_correlated_bundle(rule, query, correlated_hits)
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.debug(e, exc_info=True)
|
|
199
|
+
if __scheduler_instance:
|
|
200
|
+
__scheduler_instance.remove_job(f"rule_{rule.analytic_id}")
|
|
201
|
+
# TODO: Allow restarting of rules
|
|
202
|
+
logger.critical(
|
|
203
|
+
f"Rule {rule.name} ({rule.analytic_id}) has been stopped, due to an exception: {type(e)}",
|
|
204
|
+
exc_info=True,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
return execute
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def register_rules(new_rule: Optional[Analytic] = None, test_override: bool = False):
|
|
211
|
+
"Register all of the created analytic rules as cronjobs"
|
|
212
|
+
global __scheduler_instance
|
|
213
|
+
if not __scheduler_instance: # pragma: no cover
|
|
214
|
+
logger.error("Scheduler instance does not exist!")
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
if "pytest" in sys.modules and not test_override:
|
|
218
|
+
logger.info("Skipping registration, running in a test environment")
|
|
219
|
+
return
|
|
220
|
+
|
|
221
|
+
if new_rule:
|
|
222
|
+
if __scheduler_instance.get_job(f"rule_{new_rule.analytic_id}"):
|
|
223
|
+
logger.info(f"Updating existing rule: {new_rule.analytic_id} on interval {new_rule.rule_crontab}")
|
|
224
|
+
|
|
225
|
+
# remove the existing job
|
|
226
|
+
__scheduler_instance.remove_job(f"rule_{new_rule.analytic_id}")
|
|
227
|
+
else:
|
|
228
|
+
logger.info(f"Registering new rule: {new_rule.analytic_id} on interval {new_rule.rule_crontab}")
|
|
229
|
+
rules = [new_rule]
|
|
230
|
+
else:
|
|
231
|
+
logger.debug("Registering rules")
|
|
232
|
+
rules: list[Analytic] = datastore().analytic.search("_exists_:rule")["items"]
|
|
233
|
+
|
|
234
|
+
total_initialized = 0
|
|
235
|
+
for rule in rules:
|
|
236
|
+
job_id = f"rule_{rule.analytic_id}"
|
|
237
|
+
interval = rule.rule_crontab or f"{random.randint(0, 59)} * * * *" # noqa: S311
|
|
238
|
+
|
|
239
|
+
if __scheduler_instance.get_job(job_id):
|
|
240
|
+
logger.debug(f"Rule {job_id} already running!")
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
logger.debug(f"Initializing rule cronjob with:\tJob ID: {job_id}\tRule Name: {rule.name}\tCrontab: {interval}")
|
|
244
|
+
|
|
245
|
+
if DEBUG or new_rule:
|
|
246
|
+
_kwargs: dict[str, Any] = {"next_run_time": datetime.now()}
|
|
247
|
+
else:
|
|
248
|
+
_kwargs = {}
|
|
249
|
+
|
|
250
|
+
total_initialized += 1
|
|
251
|
+
__scheduler_instance.add_job(
|
|
252
|
+
id=job_id,
|
|
253
|
+
func=create_executor(rule),
|
|
254
|
+
trigger=CronTrigger.from_crontab(interval, timezone=timezone(os.getenv("SCHEDULER_TZ", "America/Toronto"))),
|
|
255
|
+
**_kwargs,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
logger.info(f"Initialized {total_initialized} rules")
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def setup_job(sched: BaseScheduler):
|
|
262
|
+
"Initialize the rules cronjobs"
|
|
263
|
+
if not DEBUG and not HWL_ENABLE_RULES: # pragma: no cover
|
|
264
|
+
logger.debug("Rule integration disabled")
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
logger.debug("Rule integration enabled")
|
|
268
|
+
|
|
269
|
+
global __scheduler_instance
|
|
270
|
+
__scheduler_instance = sched
|
|
271
|
+
|
|
272
|
+
register_rules()
|
|
273
|
+
|
|
274
|
+
logger.debug("Initialization complete")
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, List
|
|
4
|
+
|
|
5
|
+
from apscheduler.schedulers.base import BaseScheduler
|
|
6
|
+
from apscheduler.triggers.cron import CronTrigger
|
|
7
|
+
from pytz import timezone
|
|
8
|
+
|
|
9
|
+
from howler.common.logging import get_logger
|
|
10
|
+
from howler.config import DEBUG, config
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__file__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def execute():
|
|
16
|
+
"""Delete any pinned views that no longer exist"""
|
|
17
|
+
from howler.common.loader import datastore
|
|
18
|
+
|
|
19
|
+
# Initialize datastore
|
|
20
|
+
ds = datastore()
|
|
21
|
+
# fetch the first result from user ds (needed to initialize total)
|
|
22
|
+
result = ds.user.search("*:*", rows=250, fl="*")
|
|
23
|
+
total_user_count = result["total"]
|
|
24
|
+
user_list: List[Any] = result["items"]
|
|
25
|
+
# Do the same thing for the views
|
|
26
|
+
result = ds.view.search("*:*", rows=250)
|
|
27
|
+
total_view_count = result["total"]
|
|
28
|
+
view_list: List[Any] = result["items"]
|
|
29
|
+
view_ids: List[str] = []
|
|
30
|
+
|
|
31
|
+
# Collect all views
|
|
32
|
+
while len(view_list) < total_view_count:
|
|
33
|
+
view_list.extend(ds.view.search("*:*", rows=250, offset=len(user_list)))
|
|
34
|
+
|
|
35
|
+
# Collect all users
|
|
36
|
+
while len(user_list) < total_user_count:
|
|
37
|
+
user_list.extend(ds.user.search("*:*", rows=250, offset=len(user_list)))
|
|
38
|
+
|
|
39
|
+
for view in view_list:
|
|
40
|
+
view_ids.append(view["view_id"])
|
|
41
|
+
|
|
42
|
+
# Iterate over each user to see if the dashboard contains invalid entries (deleted views)
|
|
43
|
+
for user in user_list:
|
|
44
|
+
valid_entries = []
|
|
45
|
+
# No views/analytics saved to the dashboard? Skip it
|
|
46
|
+
if user["dashboard"] == []:
|
|
47
|
+
continue
|
|
48
|
+
for dashboard_entry in user["dashboard"]:
|
|
49
|
+
if dashboard_entry["type"] != "view" or (
|
|
50
|
+
dashboard_entry["type"] == "view" and dashboard_entry["entry_id"] in view_ids
|
|
51
|
+
):
|
|
52
|
+
valid_entries.append(dashboard_entry)
|
|
53
|
+
# If the length of valid entries is less than the current dashboard, one or more pins are invalid
|
|
54
|
+
if len(valid_entries) < len(user["dashboard"]):
|
|
55
|
+
# set the user dashboard to valid entries
|
|
56
|
+
user["dashboard"] = valid_entries
|
|
57
|
+
# update the user
|
|
58
|
+
ds.user.save(user["uname"], user)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def setup_job(sched: BaseScheduler):
|
|
62
|
+
"""Initialize the view cleanup job"""
|
|
63
|
+
if not config.system.view_cleanup.enabled:
|
|
64
|
+
if not DEBUG or config.system.type == "production":
|
|
65
|
+
logger.warning("view cleanup cronjob disabled! This is not recommended for a production settings.")
|
|
66
|
+
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
logger.debug(f"Initializing view cleanup cronjob with cron {config.system.view_cleanup.crontab}")
|
|
70
|
+
|
|
71
|
+
if DEBUG:
|
|
72
|
+
_kwargs: dict[str, Any] = {"next_run_time": datetime.now()}
|
|
73
|
+
else:
|
|
74
|
+
_kwargs = {}
|
|
75
|
+
|
|
76
|
+
if sched.get_job("view_cleanup"):
|
|
77
|
+
logger.debug("view cleanup job already running!")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
sched.add_job(
|
|
81
|
+
id="view_cleanup",
|
|
82
|
+
func=execute,
|
|
83
|
+
trigger=CronTrigger.from_crontab(
|
|
84
|
+
config.system.view_cleanup.crontab, timezone=timezone(os.getenv("SCHEDULER_TZ", "America/Toronto"))
|
|
85
|
+
),
|
|
86
|
+
**_kwargs,
|
|
87
|
+
)
|
|
88
|
+
logger.debug("Initialization complete")
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Elasticsearch datastore support
|
|
2
|
+
|
|
3
|
+
This component aims to simplify the connection between your app and Elasticsearch by providing a single interface to use with all your different indices.
|
|
4
|
+
|
|
5
|
+
Advantages:
|
|
6
|
+
|
|
7
|
+
- Connection keep alive and retries
|
|
8
|
+
- No need to worry if you elastic cluster goes down, your app will resume where it was when it's back online.
|
|
9
|
+
- Keep index management simple:
|
|
10
|
+
- If you register a new index to the data, the associated index in Elastic will be created.
|
|
11
|
+
- If you add or remove a field in an index, the associated index in Elastic will be updated.
|
|
12
|
+
- You can easily re-index, re-shard or change and index replication.
|
|
13
|
+
- Support bulk operations and archiving
|
|
14
|
+
- Support all basic operation get, put, update, search, facet, stats, histogram...
|
|
15
|
+
|
|
16
|
+
Disadvantages:
|
|
17
|
+
|
|
18
|
+
- Search uses lucene only (covers 99% of use-cases but may be extended if needed)
|
|
19
|
+
|
|
20
|
+
## Naming convention
|
|
21
|
+
|
|
22
|
+
Take note of the different naming convention:
|
|
23
|
+
|
|
24
|
+
- An Elastic index will be refered as a `Collection` because it may have multiple indexes as it's backend
|
|
25
|
+
- The object that holds multiple collection as a `Datastore`
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Instanciating a datastore
|
|
30
|
+
|
|
31
|
+
When instanciating an datastore object, there are no collection associated to it. You need to register each collection in the object so it be kept in sync and have access to it. After the collection is registered, you have access to this collection as a property of the datastore object.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from howler.common import loader
|
|
37
|
+
from myapp.models.mymodel import MyModel
|
|
38
|
+
|
|
39
|
+
ds = loader.get_esstore()
|
|
40
|
+
ds.register('mymodel', MyModel)
|
|
41
|
+
|
|
42
|
+
my_document = ds.mymodel.get(document_id)
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Creating your own datastore
|
|
47
|
+
|
|
48
|
+
This get very complicated when you have multiple collections which is why we recommend that you create your own datastore helper class that has all collections pre-loaded.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from howler.common import loader
|
|
54
|
+
from howler.datastore.collection import ESCollection
|
|
55
|
+
from howler.datastore.store import ESStore
|
|
56
|
+
|
|
57
|
+
from myapp.models.mycollection import MyCollection
|
|
58
|
+
# ... + all other collection
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MyDatastore(object):
|
|
62
|
+
def __init__(self, esstore_object: ESStore = None ):
|
|
63
|
+
|
|
64
|
+
self.ds = esstore_object or loader.get_esstore()
|
|
65
|
+
self.ds.register('mycollection', MyCollection)
|
|
66
|
+
# ... + all other collections
|
|
67
|
+
|
|
68
|
+
def __enter__(self):
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
72
|
+
self.ds.close()
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def mycollection(self) -> ESCollection[MyCollection]:
|
|
76
|
+
return self.ds.mycollection
|
|
77
|
+
|
|
78
|
+
# ... + all other properties tied to the different collections
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Collection functions
|
|
82
|
+
|
|
83
|
+
Once you've setup your own datastore object, you can start using the different functions that each collection offers. Here's a breakdown:
|
|
84
|
+
|
|
85
|
+
- `archive(query)`: Send all meatching documents to the archive of the collection
|
|
86
|
+
- `multiget(id_list)`: Get multiple documents for the id_list
|
|
87
|
+
- `exists(id)`: Check if a document matching this id exists
|
|
88
|
+
- `get(id)`: Get a document matching the id (retry twice if missing)
|
|
89
|
+
- `get_if_exists(id)`: Get a document matching the id (do not retry)
|
|
90
|
+
- `require(id)`: Try to get a document matching the id and retry forever until it exists
|
|
91
|
+
- `save(id, doc)`: Save a document to this id and overrite it if it exists
|
|
92
|
+
- `delete(id)`: Delete the document matching this id
|
|
93
|
+
- `delete_by_query(query)`: Delete all documents matching this query
|
|
94
|
+
- `update(id, operations)`: Perform the following update operation on this id
|
|
95
|
+
- `update_by_query(query, operations)`: Perform the following update operation all document matching this query
|
|
96
|
+
- `search(query)`: Find document matching the query and return one page
|
|
97
|
+
- `stream_search(query)`: Return all document matching the query
|
|
98
|
+
- `histogram(field, start, end, gap)`: Count how many documents are found in each gap from the start to the end (works on dates and int fields)
|
|
99
|
+
- `facet(field)`: Return the top 10 values of a field
|
|
100
|
+
- `stats(field)`: Generate min, max, avg, count of an int field
|
|
101
|
+
- `grouped_search(group_field, query)`: Find all document matching a query and group the result by this field
|
|
102
|
+
- `fields()`: List all fields of a collection
|
|
103
|
+
|
|
104
|
+
Management related function: (*These should not really be used in normal code but are more tailored to fix issues and test the system*)
|
|
105
|
+
|
|
106
|
+
- `commit()`: Save the indexes to disc now and make all documents available for search
|
|
107
|
+
- `keys()`: Return ids of all the document in the index
|
|
108
|
+
- `fix_ilm()`: Fix Index Lifecycle management configuration for the associated indices
|
|
109
|
+
- `fix_replicas()`: Fix the number of copies of the associated indices
|
|
110
|
+
- `fix_shards()`: Fix the number of shards of the associated indices
|
|
111
|
+
- `reindex()`: Reindex all documents
|
|
112
|
+
- `wipe()`: Delete and create empty version of this collection
|
|
File without changes
|
howler/datastore/bulk.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ElasticBulkPlan(object):
|
|
6
|
+
def __init__(self, indexes, model=None):
|
|
7
|
+
self.indexes = indexes
|
|
8
|
+
self.model = model
|
|
9
|
+
self.operations = []
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def empty(self):
|
|
13
|
+
return len(self.operations) == 0
|
|
14
|
+
|
|
15
|
+
def add_delete_operation(self, doc_id, index=None):
|
|
16
|
+
if index:
|
|
17
|
+
self.operations.append(json.dumps({"delete": {"_index": index, "_id": doc_id}}))
|
|
18
|
+
else:
|
|
19
|
+
for cur_index in self.indexes:
|
|
20
|
+
self.operations.append(json.dumps({"delete": {"_index": cur_index, "_id": doc_id}}))
|
|
21
|
+
|
|
22
|
+
def add_insert_operation(self, doc_id, doc, index=None):
|
|
23
|
+
if isinstance(doc, self.model):
|
|
24
|
+
saved_doc = doc.as_primitives(hidden_fields=True)
|
|
25
|
+
elif self.model:
|
|
26
|
+
saved_doc = self.model(doc).as_primitives(hidden_fields=True)
|
|
27
|
+
else:
|
|
28
|
+
if not isinstance(doc, dict):
|
|
29
|
+
saved_doc = {"__non_doc_raw__": doc}
|
|
30
|
+
else:
|
|
31
|
+
saved_doc = deepcopy(doc)
|
|
32
|
+
saved_doc["id"] = doc_id
|
|
33
|
+
|
|
34
|
+
self.operations.append(json.dumps({"create": {"_index": index or self.indexes[0], "_id": doc_id}}))
|
|
35
|
+
self.operations.append(json.dumps(saved_doc))
|
|
36
|
+
|
|
37
|
+
def add_upsert_operation(self, doc_id, doc, index=None):
|
|
38
|
+
if isinstance(doc, self.model):
|
|
39
|
+
saved_doc = doc.as_primitives(hidden_fields=True)
|
|
40
|
+
elif self.model:
|
|
41
|
+
saved_doc = self.model(doc).as_primitives(hidden_fields=True)
|
|
42
|
+
else:
|
|
43
|
+
if not isinstance(doc, dict):
|
|
44
|
+
saved_doc = {"__non_doc_raw__": doc}
|
|
45
|
+
else:
|
|
46
|
+
saved_doc = deepcopy(doc)
|
|
47
|
+
saved_doc["id"] = doc_id
|
|
48
|
+
|
|
49
|
+
self.operations.append(json.dumps({"update": {"_index": index or self.indexes[0], "_id": doc_id}}))
|
|
50
|
+
self.operations.append(json.dumps({"doc": saved_doc, "doc_as_upsert": True}))
|
|
51
|
+
|
|
52
|
+
def add_update_operation(self, doc_id, doc, index=None):
|
|
53
|
+
if isinstance(doc, self.model):
|
|
54
|
+
saved_doc = doc.as_primitives(hidden_fields=True)
|
|
55
|
+
elif self.model:
|
|
56
|
+
saved_doc = self.model(doc, mask=list(doc.keys())).as_primitives(hidden_fields=True)
|
|
57
|
+
else:
|
|
58
|
+
if not isinstance(doc, dict):
|
|
59
|
+
saved_doc = {"__non_doc_raw__": doc}
|
|
60
|
+
else:
|
|
61
|
+
saved_doc = deepcopy(doc)
|
|
62
|
+
|
|
63
|
+
if index:
|
|
64
|
+
self.operations.append(json.dumps({"update": {"_index": index, "_id": doc_id}}))
|
|
65
|
+
self.operations.append(json.dumps({"doc": saved_doc}))
|
|
66
|
+
else:
|
|
67
|
+
for cur_index in self.indexes:
|
|
68
|
+
self.operations.append(json.dumps({"update": {"_index": cur_index, "_id": doc_id}}))
|
|
69
|
+
self.operations.append(json.dumps({"doc": saved_doc}))
|
|
70
|
+
|
|
71
|
+
def get_plan_data(self):
|
|
72
|
+
return "\n".join(self.operations)
|