howler-api 2.13.0.dev329__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of howler-api might be problematic. Click here for more details.

Files changed (200) hide show
  1. howler/__init__.py +0 -0
  2. howler/actions/__init__.py +167 -0
  3. howler/actions/add_label.py +111 -0
  4. howler/actions/add_to_bundle.py +159 -0
  5. howler/actions/change_field.py +76 -0
  6. howler/actions/demote.py +160 -0
  7. howler/actions/example_plugin.py +104 -0
  8. howler/actions/prioritization.py +93 -0
  9. howler/actions/promote.py +147 -0
  10. howler/actions/remove_from_bundle.py +133 -0
  11. howler/actions/remove_label.py +111 -0
  12. howler/actions/transition.py +200 -0
  13. howler/api/__init__.py +249 -0
  14. howler/api/base.py +88 -0
  15. howler/api/socket.py +114 -0
  16. howler/api/v1/__init__.py +97 -0
  17. howler/api/v1/action.py +372 -0
  18. howler/api/v1/analytic.py +748 -0
  19. howler/api/v1/auth.py +382 -0
  20. howler/api/v1/borealis.py +101 -0
  21. howler/api/v1/configs.py +55 -0
  22. howler/api/v1/dossier.py +222 -0
  23. howler/api/v1/help.py +28 -0
  24. howler/api/v1/hit.py +1181 -0
  25. howler/api/v1/notebook.py +82 -0
  26. howler/api/v1/overview.py +191 -0
  27. howler/api/v1/search.py +715 -0
  28. howler/api/v1/template.py +206 -0
  29. howler/api/v1/tool.py +183 -0
  30. howler/api/v1/user.py +414 -0
  31. howler/api/v1/utils/__init__.py +0 -0
  32. howler/api/v1/utils/etag.py +84 -0
  33. howler/api/v1/view.py +288 -0
  34. howler/app.py +235 -0
  35. howler/common/README.md +144 -0
  36. howler/common/__init__.py +0 -0
  37. howler/common/classification.py +979 -0
  38. howler/common/classification.yml +107 -0
  39. howler/common/exceptions.py +167 -0
  40. howler/common/hexdump.py +48 -0
  41. howler/common/iprange.py +171 -0
  42. howler/common/loader.py +154 -0
  43. howler/common/logging/__init__.py +241 -0
  44. howler/common/logging/audit.py +138 -0
  45. howler/common/logging/format.py +38 -0
  46. howler/common/net.py +79 -0
  47. howler/common/net_static.py +1494 -0
  48. howler/common/random_user.py +316 -0
  49. howler/common/swagger.py +117 -0
  50. howler/config.py +64 -0
  51. howler/cronjobs/__init__.py +29 -0
  52. howler/cronjobs/retention.py +61 -0
  53. howler/cronjobs/rules.py +274 -0
  54. howler/cronjobs/view_cleanup.py +88 -0
  55. howler/datastore/README.md +112 -0
  56. howler/datastore/__init__.py +0 -0
  57. howler/datastore/bulk.py +72 -0
  58. howler/datastore/collection.py +2327 -0
  59. howler/datastore/constants.py +117 -0
  60. howler/datastore/exceptions.py +41 -0
  61. howler/datastore/howler_store.py +105 -0
  62. howler/datastore/migrations/fix_process.py +41 -0
  63. howler/datastore/operations.py +130 -0
  64. howler/datastore/schemas.py +90 -0
  65. howler/datastore/store.py +231 -0
  66. howler/datastore/support/__init__.py +0 -0
  67. howler/datastore/support/build.py +214 -0
  68. howler/datastore/support/schemas.py +90 -0
  69. howler/datastore/types.py +22 -0
  70. howler/error.py +91 -0
  71. howler/external/__init__.py +0 -0
  72. howler/external/generate_mitre.py +96 -0
  73. howler/external/generate_sigma_rules.py +31 -0
  74. howler/external/generate_tlds.py +47 -0
  75. howler/external/reindex_data.py +46 -0
  76. howler/external/wipe_databases.py +58 -0
  77. howler/gunicorn_config.py +25 -0
  78. howler/healthz.py +47 -0
  79. howler/helper/__init__.py +0 -0
  80. howler/helper/azure.py +50 -0
  81. howler/helper/discover.py +59 -0
  82. howler/helper/hit.py +236 -0
  83. howler/helper/oauth.py +247 -0
  84. howler/helper/search.py +92 -0
  85. howler/helper/workflow.py +110 -0
  86. howler/helper/ws.py +378 -0
  87. howler/odm/README.md +102 -0
  88. howler/odm/__init__.py +1 -0
  89. howler/odm/base.py +1504 -0
  90. howler/odm/charter.txt +146 -0
  91. howler/odm/helper.py +416 -0
  92. howler/odm/howler_enum.py +25 -0
  93. howler/odm/models/__init__.py +0 -0
  94. howler/odm/models/action.py +33 -0
  95. howler/odm/models/analytic.py +90 -0
  96. howler/odm/models/assemblyline.py +48 -0
  97. howler/odm/models/aws.py +23 -0
  98. howler/odm/models/azure.py +16 -0
  99. howler/odm/models/cbs.py +44 -0
  100. howler/odm/models/config.py +558 -0
  101. howler/odm/models/dossier.py +33 -0
  102. howler/odm/models/ecs/__init__.py +0 -0
  103. howler/odm/models/ecs/agent.py +17 -0
  104. howler/odm/models/ecs/autonomous_system.py +16 -0
  105. howler/odm/models/ecs/client.py +149 -0
  106. howler/odm/models/ecs/cloud.py +141 -0
  107. howler/odm/models/ecs/code_signature.py +27 -0
  108. howler/odm/models/ecs/container.py +32 -0
  109. howler/odm/models/ecs/dns.py +62 -0
  110. howler/odm/models/ecs/egress.py +10 -0
  111. howler/odm/models/ecs/elf.py +74 -0
  112. howler/odm/models/ecs/email.py +122 -0
  113. howler/odm/models/ecs/error.py +14 -0
  114. howler/odm/models/ecs/event.py +140 -0
  115. howler/odm/models/ecs/faas.py +24 -0
  116. howler/odm/models/ecs/file.py +84 -0
  117. howler/odm/models/ecs/geo.py +30 -0
  118. howler/odm/models/ecs/group.py +18 -0
  119. howler/odm/models/ecs/hash.py +16 -0
  120. howler/odm/models/ecs/host.py +17 -0
  121. howler/odm/models/ecs/http.py +37 -0
  122. howler/odm/models/ecs/ingress.py +12 -0
  123. howler/odm/models/ecs/interface.py +21 -0
  124. howler/odm/models/ecs/network.py +30 -0
  125. howler/odm/models/ecs/observer.py +45 -0
  126. howler/odm/models/ecs/organization.py +12 -0
  127. howler/odm/models/ecs/os.py +21 -0
  128. howler/odm/models/ecs/pe.py +17 -0
  129. howler/odm/models/ecs/process.py +216 -0
  130. howler/odm/models/ecs/registry.py +26 -0
  131. howler/odm/models/ecs/related.py +45 -0
  132. howler/odm/models/ecs/rule.py +51 -0
  133. howler/odm/models/ecs/server.py +24 -0
  134. howler/odm/models/ecs/threat.py +247 -0
  135. howler/odm/models/ecs/tls.py +58 -0
  136. howler/odm/models/ecs/url.py +51 -0
  137. howler/odm/models/ecs/user.py +57 -0
  138. howler/odm/models/ecs/user_agent.py +20 -0
  139. howler/odm/models/ecs/vulnerability.py +41 -0
  140. howler/odm/models/gcp.py +16 -0
  141. howler/odm/models/hit.py +356 -0
  142. howler/odm/models/howler_data.py +328 -0
  143. howler/odm/models/lead.py +33 -0
  144. howler/odm/models/localized_label.py +13 -0
  145. howler/odm/models/overview.py +16 -0
  146. howler/odm/models/pivot.py +40 -0
  147. howler/odm/models/template.py +24 -0
  148. howler/odm/models/user.py +83 -0
  149. howler/odm/models/view.py +34 -0
  150. howler/odm/random_data.py +888 -0
  151. howler/odm/randomizer.py +606 -0
  152. howler/patched.py +5 -0
  153. howler/plugins/__init__.py +25 -0
  154. howler/plugins/config.py +123 -0
  155. howler/remote/__init__.py +0 -0
  156. howler/remote/datatypes/README.md +355 -0
  157. howler/remote/datatypes/__init__.py +98 -0
  158. howler/remote/datatypes/counters.py +63 -0
  159. howler/remote/datatypes/events.py +66 -0
  160. howler/remote/datatypes/hash.py +206 -0
  161. howler/remote/datatypes/lock.py +42 -0
  162. howler/remote/datatypes/queues/__init__.py +0 -0
  163. howler/remote/datatypes/queues/comms.py +59 -0
  164. howler/remote/datatypes/queues/multi.py +32 -0
  165. howler/remote/datatypes/queues/named.py +93 -0
  166. howler/remote/datatypes/queues/priority.py +215 -0
  167. howler/remote/datatypes/set.py +118 -0
  168. howler/remote/datatypes/user_quota_tracker.py +54 -0
  169. howler/security/__init__.py +253 -0
  170. howler/security/socket.py +108 -0
  171. howler/security/utils.py +185 -0
  172. howler/services/__init__.py +0 -0
  173. howler/services/action_service.py +111 -0
  174. howler/services/analytic_service.py +128 -0
  175. howler/services/auth_service.py +323 -0
  176. howler/services/config_service.py +128 -0
  177. howler/services/dossier_service.py +252 -0
  178. howler/services/event_service.py +93 -0
  179. howler/services/hit_service.py +893 -0
  180. howler/services/jwt_service.py +158 -0
  181. howler/services/lucene_service.py +286 -0
  182. howler/services/notebook_service.py +119 -0
  183. howler/services/overview_service.py +44 -0
  184. howler/services/template_service.py +45 -0
  185. howler/services/user_service.py +330 -0
  186. howler/utils/__init__.py +0 -0
  187. howler/utils/annotations.py +28 -0
  188. howler/utils/chunk.py +38 -0
  189. howler/utils/dict_utils.py +200 -0
  190. howler/utils/isotime.py +17 -0
  191. howler/utils/list_utils.py +11 -0
  192. howler/utils/lucene.py +77 -0
  193. howler/utils/path.py +27 -0
  194. howler/utils/socket_utils.py +61 -0
  195. howler/utils/str_utils.py +256 -0
  196. howler/utils/uid.py +47 -0
  197. howler_api-2.13.0.dev329.dist-info/METADATA +71 -0
  198. howler_api-2.13.0.dev329.dist-info/RECORD +200 -0
  199. howler_api-2.13.0.dev329.dist-info/WHEEL +4 -0
  200. howler_api-2.13.0.dev329.dist-info/entry_points.txt +8 -0
@@ -0,0 +1,274 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ import random
5
+ import re
6
+ import sys
7
+ from datetime import datetime
8
+ from typing import Any, Optional
9
+
10
+ from apscheduler.schedulers.base import BaseScheduler
11
+ from apscheduler.triggers.cron import CronTrigger
12
+ from pytz import timezone
13
+ from sigma.backends.elasticsearch import LuceneBackend
14
+ from sigma.rule import SigmaRule
15
+ from yaml.scanner import ScannerError
16
+
17
+ from howler.common.exceptions import HowlerValueError
18
+ from howler.common.loader import datastore
19
+ from howler.common.logging import get_logger
20
+ from howler.config import DEBUG, HWL_ENABLE_RULES
21
+ from howler.datastore.collection import ESCollection
22
+ from howler.datastore.operations import OdmHelper, OdmUpdateOperation
23
+ from howler.odm.models.analytic import Analytic
24
+ from howler.odm.models.hit import Hit
25
+ from howler.odm.models.howler_data import HitOperationType
26
+
27
+ logger = get_logger(__file__)
28
+ hit_helper = OdmHelper(Hit)
29
+
30
+ __scheduler_instance: Optional[BaseScheduler] = None
31
+
32
+
33
+ def create_correlated_bundle(rule: Analytic, query: str, correlated_hits: list[Hit]):
34
+ "Create a bundle based on the results of an analytic"
35
+ # We'll create a hash using the hashes of the children, and the analytic ID/current time
36
+ bundle_hash = hashlib.sha256()
37
+ bundle_hash.update(rule.analytic_id.encode())
38
+ bundle_hash.update(query.replace("now", datetime.now().isoformat()).encode())
39
+ for match in correlated_hits:
40
+ bundle_hash.update(match.howler.hash.encode())
41
+
42
+ hashed = bundle_hash.hexdigest()
43
+
44
+ # If a matching bundle exists already, just reused it (likely only ever lucene specific)
45
+ existing_result = datastore().hit.search(f"howler.hash:{hashed}", rows=1)
46
+ if existing_result["total"] > 0:
47
+ logger.debug(f"Rule hash {hashed} exists - skipping create")
48
+ return existing_result["items"][0]
49
+
50
+ child_ids = [match.howler.id for match in correlated_hits]
51
+
52
+ correlated_bundle = Hit(
53
+ {
54
+ "howler.analytic": rule.name,
55
+ "howler.detection": "Rule",
56
+ "howler.score": 0.0,
57
+ "howler.hash": hashed,
58
+ "howler.is_bundle": True,
59
+ "howler.hits": child_ids,
60
+ "howler.data": [
61
+ json.dumps(
62
+ {
63
+ "raw": rule.rule,
64
+ "sanitized": query,
65
+ }
66
+ )
67
+ ],
68
+ "event.created": "NOW",
69
+ "event.kind": "alert",
70
+ "event.module": rule.rule_type,
71
+ "event.provider": "howler",
72
+ "event.reason": f"Children match {query}",
73
+ "event.type": ["info"],
74
+ }
75
+ )
76
+ correlated_bundle.event.id = correlated_bundle.howler.id
77
+
78
+ datastore().hit.save(correlated_bundle.howler.id, correlated_bundle)
79
+
80
+ if len(child_ids) > 0:
81
+ datastore().hit.update_by_query(
82
+ f"howler.id:({' OR '.join(child_ids)})",
83
+ [
84
+ hit_helper.list_add(
85
+ "howler.bundles",
86
+ correlated_bundle.howler.id,
87
+ if_missing=True,
88
+ ),
89
+ OdmUpdateOperation(
90
+ ESCollection.UPDATE_APPEND,
91
+ "howler.log",
92
+ {
93
+ "timestamp": "NOW",
94
+ "key": "howler.bundles",
95
+ "explanation": f"This hit was correlated by the analytic '{rule.name}'.",
96
+ "new_value": correlated_bundle.howler.id,
97
+ "previous_value": "None",
98
+ "type": HitOperationType.APPENDED,
99
+ "user": "Howler",
100
+ },
101
+ ),
102
+ ],
103
+ )
104
+
105
+ return correlated_bundle
106
+
107
+
108
+ def create_executor(rule: Analytic): # noqa: C901
109
+ "Create a cronjob for a given analytic"
110
+
111
+ def execute(): # noqa: C901
112
+ "Execute the rule"
113
+ try:
114
+ if not rule.rule or not rule.rule_type:
115
+ logger.error("Invalid rule %s! Skipping", rule.analytic_id)
116
+ return
117
+
118
+ logger.info(
119
+ "Executing rule %s (%s)",
120
+ rule.name,
121
+ rule.analytic_id,
122
+ )
123
+
124
+ correlated_hits: Optional[list[Hit]] = None
125
+
126
+ if rule.rule_type in ["lucene", "sigma"]:
127
+ if rule.rule_type == "lucene":
128
+ query = re.sub(r"\n+", " ", re.sub(r"#.+", "", rule.rule)).strip()
129
+ else:
130
+ try:
131
+ sigma_rule = SigmaRule.from_yaml(rule.rule)
132
+ except ScannerError as e:
133
+ raise HowlerValueError(
134
+ f"Error when parsing yaml: {e.problem} {e.problem_mark}",
135
+ cause=e,
136
+ )
137
+
138
+ es_collection = datastore().hit
139
+ lucene_queries = LuceneBackend(index_names=[es_collection.index_name]).convert_rule(sigma_rule)
140
+
141
+ query = " AND ".join([f"({q})" for q in lucene_queries])
142
+
143
+ num_hits = datastore().hit.search(query, rows=1)["total"]
144
+ if num_hits > 0:
145
+ bundle = create_correlated_bundle(rule, query, [])
146
+ datastore().hit.update_by_query(
147
+ f"({query}) AND -howler.bundles:{bundle.howler.id}",
148
+ [
149
+ hit_helper.list_add(
150
+ "howler.bundles",
151
+ bundle.howler.id,
152
+ if_missing=True,
153
+ ),
154
+ OdmUpdateOperation(
155
+ ESCollection.UPDATE_APPEND,
156
+ "howler.log",
157
+ {
158
+ "timestamp": "NOW",
159
+ "key": "howler.bundles",
160
+ "explanation": f"This hit was correlated by the analytic '{rule.name}'.",
161
+ "new_value": bundle.howler.id,
162
+ "previous_value": "None",
163
+ "type": HitOperationType.APPENDED,
164
+ "user": "Howler",
165
+ },
166
+ ),
167
+ ],
168
+ )
169
+
170
+ datastore().hit.commit()
171
+
172
+ child_hits: list[Hit] = datastore().hit.search(
173
+ f"howler.bundles:{bundle.howler.id}", rows=1000, fl="howler.id"
174
+ )["items"]
175
+ datastore().hit.update_by_query(
176
+ f"howler.id:{bundle.howler.id}",
177
+ [hit_helper.list_add("howler.hits", hit.howler.id, if_missing=True) for hit in child_hits],
178
+ )
179
+
180
+ elif rule.rule_type == "eql":
181
+ query = rule.rule
182
+
183
+ result = datastore().hit.raw_eql_search(query, rows=25, fl=",".join(Hit.flat_fields().keys()))
184
+
185
+ if len(result["sequences"]) > 0:
186
+ for sequence in result["sequences"]:
187
+ if len(sequence) > 0:
188
+ create_correlated_bundle(rule, query, sequence)
189
+
190
+ correlated_hits = result["items"]
191
+
192
+ else: # pragma: no cover
193
+ raise HowlerValueError(f"Unknown rule type: {rule.rule_type}") # noqa: TRY301
194
+
195
+ if correlated_hits and len(correlated_hits) > 0:
196
+ create_correlated_bundle(rule, query, correlated_hits)
197
+ except Exception as e:
198
+ logger.debug(e, exc_info=True)
199
+ if __scheduler_instance:
200
+ __scheduler_instance.remove_job(f"rule_{rule.analytic_id}")
201
+ # TODO: Allow restarting of rules
202
+ logger.critical(
203
+ f"Rule {rule.name} ({rule.analytic_id}) has been stopped, due to an exception: {type(e)}",
204
+ exc_info=True,
205
+ )
206
+
207
+ return execute
208
+
209
+
210
+ def register_rules(new_rule: Optional[Analytic] = None, test_override: bool = False):
211
+ "Register all of the created analytic rules as cronjobs"
212
+ global __scheduler_instance
213
+ if not __scheduler_instance: # pragma: no cover
214
+ logger.error("Scheduler instance does not exist!")
215
+ return
216
+
217
+ if "pytest" in sys.modules and not test_override:
218
+ logger.info("Skipping registration, running in a test environment")
219
+ return
220
+
221
+ if new_rule:
222
+ if __scheduler_instance.get_job(f"rule_{new_rule.analytic_id}"):
223
+ logger.info(f"Updating existing rule: {new_rule.analytic_id} on interval {new_rule.rule_crontab}")
224
+
225
+ # remove the existing job
226
+ __scheduler_instance.remove_job(f"rule_{new_rule.analytic_id}")
227
+ else:
228
+ logger.info(f"Registering new rule: {new_rule.analytic_id} on interval {new_rule.rule_crontab}")
229
+ rules = [new_rule]
230
+ else:
231
+ logger.debug("Registering rules")
232
+ rules: list[Analytic] = datastore().analytic.search("_exists_:rule")["items"]
233
+
234
+ total_initialized = 0
235
+ for rule in rules:
236
+ job_id = f"rule_{rule.analytic_id}"
237
+ interval = rule.rule_crontab or f"{random.randint(0, 59)} * * * *" # noqa: S311
238
+
239
+ if __scheduler_instance.get_job(job_id):
240
+ logger.debug(f"Rule {job_id} already running!")
241
+ return
242
+
243
+ logger.debug(f"Initializing rule cronjob with:\tJob ID: {job_id}\tRule Name: {rule.name}\tCrontab: {interval}")
244
+
245
+ if DEBUG or new_rule:
246
+ _kwargs: dict[str, Any] = {"next_run_time": datetime.now()}
247
+ else:
248
+ _kwargs = {}
249
+
250
+ total_initialized += 1
251
+ __scheduler_instance.add_job(
252
+ id=job_id,
253
+ func=create_executor(rule),
254
+ trigger=CronTrigger.from_crontab(interval, timezone=timezone(os.getenv("SCHEDULER_TZ", "America/Toronto"))),
255
+ **_kwargs,
256
+ )
257
+
258
+ logger.info(f"Initialized {total_initialized} rules")
259
+
260
+
261
+ def setup_job(sched: BaseScheduler):
262
+ "Initialize the rules cronjobs"
263
+ if not DEBUG and not HWL_ENABLE_RULES: # pragma: no cover
264
+ logger.debug("Rule integration disabled")
265
+ return
266
+
267
+ logger.debug("Rule integration enabled")
268
+
269
+ global __scheduler_instance
270
+ __scheduler_instance = sched
271
+
272
+ register_rules()
273
+
274
+ logger.debug("Initialization complete")
@@ -0,0 +1,88 @@
1
+ import os
2
+ from datetime import datetime
3
+ from typing import Any, List
4
+
5
+ from apscheduler.schedulers.base import BaseScheduler
6
+ from apscheduler.triggers.cron import CronTrigger
7
+ from pytz import timezone
8
+
9
+ from howler.common.logging import get_logger
10
+ from howler.config import DEBUG, config
11
+
12
+ logger = get_logger(__file__)
13
+
14
+
15
+ def execute():
16
+ """Delete any pinned views that no longer exist"""
17
+ from howler.common.loader import datastore
18
+
19
+ # Initialize datastore
20
+ ds = datastore()
21
+ # fetch the first result from user ds (needed to initialize total)
22
+ result = ds.user.search("*:*", rows=250, fl="*")
23
+ total_user_count = result["total"]
24
+ user_list: List[Any] = result["items"]
25
+ # Do the same thing for the views
26
+ result = ds.view.search("*:*", rows=250)
27
+ total_view_count = result["total"]
28
+ view_list: List[Any] = result["items"]
29
+ view_ids: List[str] = []
30
+
31
+ # Collect all views
32
+ while len(view_list) < total_view_count:
33
+ view_list.extend(ds.view.search("*:*", rows=250, offset=len(user_list)))
34
+
35
+ # Collect all users
36
+ while len(user_list) < total_user_count:
37
+ user_list.extend(ds.user.search("*:*", rows=250, offset=len(user_list)))
38
+
39
+ for view in view_list:
40
+ view_ids.append(view["view_id"])
41
+
42
+ # Iterate over each user to see if the dashboard contains invalid entries (deleted views)
43
+ for user in user_list:
44
+ valid_entries = []
45
+ # No views/analytics saved to the dashboard? Skip it
46
+ if user["dashboard"] == []:
47
+ continue
48
+ for dashboard_entry in user["dashboard"]:
49
+ if dashboard_entry["type"] != "view" or (
50
+ dashboard_entry["type"] == "view" and dashboard_entry["entry_id"] in view_ids
51
+ ):
52
+ valid_entries.append(dashboard_entry)
53
+ # If the length of valid entries is less than the current dashboard, one or more pins are invalid
54
+ if len(valid_entries) < len(user["dashboard"]):
55
+ # set the user dashboard to valid entries
56
+ user["dashboard"] = valid_entries
57
+ # update the user
58
+ ds.user.save(user["uname"], user)
59
+
60
+
61
+ def setup_job(sched: BaseScheduler):
62
+ """Initialize the view cleanup job"""
63
+ if not config.system.view_cleanup.enabled:
64
+ if not DEBUG or config.system.type == "production":
65
+ logger.warning("view cleanup cronjob disabled! This is not recommended for a production settings.")
66
+
67
+ return
68
+
69
+ logger.debug(f"Initializing view cleanup cronjob with cron {config.system.view_cleanup.crontab}")
70
+
71
+ if DEBUG:
72
+ _kwargs: dict[str, Any] = {"next_run_time": datetime.now()}
73
+ else:
74
+ _kwargs = {}
75
+
76
+ if sched.get_job("view_cleanup"):
77
+ logger.debug("view cleanup job already running!")
78
+ return
79
+
80
+ sched.add_job(
81
+ id="view_cleanup",
82
+ func=execute,
83
+ trigger=CronTrigger.from_crontab(
84
+ config.system.view_cleanup.crontab, timezone=timezone(os.getenv("SCHEDULER_TZ", "America/Toronto"))
85
+ ),
86
+ **_kwargs,
87
+ )
88
+ logger.debug("Initialization complete")
@@ -0,0 +1,112 @@
1
+ # Elasticsearch datastore support
2
+
3
+ This component aims to simplify the connection between your app and Elasticsearch by providing a single interface to use with all your different indices.
4
+
5
+ Advantages:
6
+
7
+ - Connection keep alive and retries
8
+ - No need to worry if you elastic cluster goes down, your app will resume where it was when it's back online.
9
+ - Keep index management simple:
10
+ - If you register a new index to the data, the associated index in Elastic will be created.
11
+ - If you add or remove a field in an index, the associated index in Elastic will be updated.
12
+ - You can easily re-index, re-shard or change and index replication.
13
+ - Support bulk operations and archiving
14
+ - Support all basic operation get, put, update, search, facet, stats, histogram...
15
+
16
+ Disadvantages:
17
+
18
+ - Search uses lucene only (covers 99% of use-cases but may be extended if needed)
19
+
20
+ ## Naming convention
21
+
22
+ Take note of the different naming convention:
23
+
24
+ - An Elastic index will be refered as a `Collection` because it may have multiple indexes as it's backend
25
+ - The object that holds multiple collection as a `Datastore`
26
+
27
+ ## Usage
28
+
29
+ ### Instanciating a datastore
30
+
31
+ When instanciating an datastore object, there are no collection associated to it. You need to register each collection in the object so it be kept in sync and have access to it. After the collection is registered, you have access to this collection as a property of the datastore object.
32
+
33
+ Example:
34
+
35
+ ```python
36
+ from howler.common import loader
37
+ from myapp.models.mymodel import MyModel
38
+
39
+ ds = loader.get_esstore()
40
+ ds.register('mymodel', MyModel)
41
+
42
+ my_document = ds.mymodel.get(document_id)
43
+
44
+ ```
45
+
46
+ ### Creating your own datastore
47
+
48
+ This get very complicated when you have multiple collections which is why we recommend that you create your own datastore helper class that has all collections pre-loaded.
49
+
50
+ Example:
51
+
52
+ ```python
53
+ from howler.common import loader
54
+ from howler.datastore.collection import ESCollection
55
+ from howler.datastore.store import ESStore
56
+
57
+ from myapp.models.mycollection import MyCollection
58
+ # ... + all other collection
59
+
60
+
61
+ class MyDatastore(object):
62
+ def __init__(self, esstore_object: ESStore = None ):
63
+
64
+ self.ds = esstore_object or loader.get_esstore()
65
+ self.ds.register('mycollection', MyCollection)
66
+ # ... + all other collections
67
+
68
+ def __enter__(self):
69
+ return self
70
+
71
+ def __exit__(self, exc_type, exc_val, exc_tb):
72
+ self.ds.close()
73
+
74
+ @property
75
+ def mycollection(self) -> ESCollection[MyCollection]:
76
+ return self.ds.mycollection
77
+
78
+ # ... + all other properties tied to the different collections
79
+ ```
80
+
81
+ ### Collection functions
82
+
83
+ Once you've setup your own datastore object, you can start using the different functions that each collection offers. Here's a breakdown:
84
+
85
+ - `archive(query)`: Send all meatching documents to the archive of the collection
86
+ - `multiget(id_list)`: Get multiple documents for the id_list
87
+ - `exists(id)`: Check if a document matching this id exists
88
+ - `get(id)`: Get a document matching the id (retry twice if missing)
89
+ - `get_if_exists(id)`: Get a document matching the id (do not retry)
90
+ - `require(id)`: Try to get a document matching the id and retry forever until it exists
91
+ - `save(id, doc)`: Save a document to this id and overrite it if it exists
92
+ - `delete(id)`: Delete the document matching this id
93
+ - `delete_by_query(query)`: Delete all documents matching this query
94
+ - `update(id, operations)`: Perform the following update operation on this id
95
+ - `update_by_query(query, operations)`: Perform the following update operation all document matching this query
96
+ - `search(query)`: Find document matching the query and return one page
97
+ - `stream_search(query)`: Return all document matching the query
98
+ - `histogram(field, start, end, gap)`: Count how many documents are found in each gap from the start to the end (works on dates and int fields)
99
+ - `facet(field)`: Return the top 10 values of a field
100
+ - `stats(field)`: Generate min, max, avg, count of an int field
101
+ - `grouped_search(group_field, query)`: Find all document matching a query and group the result by this field
102
+ - `fields()`: List all fields of a collection
103
+
104
+ Management related function: (*These should not really be used in normal code but are more tailored to fix issues and test the system*)
105
+
106
+ - `commit()`: Save the indexes to disc now and make all documents available for search
107
+ - `keys()`: Return ids of all the document in the index
108
+ - `fix_ilm()`: Fix Index Lifecycle management configuration for the associated indices
109
+ - `fix_replicas()`: Fix the number of copies of the associated indices
110
+ - `fix_shards()`: Fix the number of shards of the associated indices
111
+ - `reindex()`: Reindex all documents
112
+ - `wipe()`: Delete and create empty version of this collection
File without changes
@@ -0,0 +1,72 @@
1
+ import json
2
+ from copy import deepcopy
3
+
4
+
5
+ class ElasticBulkPlan(object):
6
+ def __init__(self, indexes, model=None):
7
+ self.indexes = indexes
8
+ self.model = model
9
+ self.operations = []
10
+
11
+ @property
12
+ def empty(self):
13
+ return len(self.operations) == 0
14
+
15
+ def add_delete_operation(self, doc_id, index=None):
16
+ if index:
17
+ self.operations.append(json.dumps({"delete": {"_index": index, "_id": doc_id}}))
18
+ else:
19
+ for cur_index in self.indexes:
20
+ self.operations.append(json.dumps({"delete": {"_index": cur_index, "_id": doc_id}}))
21
+
22
+ def add_insert_operation(self, doc_id, doc, index=None):
23
+ if isinstance(doc, self.model):
24
+ saved_doc = doc.as_primitives(hidden_fields=True)
25
+ elif self.model:
26
+ saved_doc = self.model(doc).as_primitives(hidden_fields=True)
27
+ else:
28
+ if not isinstance(doc, dict):
29
+ saved_doc = {"__non_doc_raw__": doc}
30
+ else:
31
+ saved_doc = deepcopy(doc)
32
+ saved_doc["id"] = doc_id
33
+
34
+ self.operations.append(json.dumps({"create": {"_index": index or self.indexes[0], "_id": doc_id}}))
35
+ self.operations.append(json.dumps(saved_doc))
36
+
37
+ def add_upsert_operation(self, doc_id, doc, index=None):
38
+ if isinstance(doc, self.model):
39
+ saved_doc = doc.as_primitives(hidden_fields=True)
40
+ elif self.model:
41
+ saved_doc = self.model(doc).as_primitives(hidden_fields=True)
42
+ else:
43
+ if not isinstance(doc, dict):
44
+ saved_doc = {"__non_doc_raw__": doc}
45
+ else:
46
+ saved_doc = deepcopy(doc)
47
+ saved_doc["id"] = doc_id
48
+
49
+ self.operations.append(json.dumps({"update": {"_index": index or self.indexes[0], "_id": doc_id}}))
50
+ self.operations.append(json.dumps({"doc": saved_doc, "doc_as_upsert": True}))
51
+
52
+ def add_update_operation(self, doc_id, doc, index=None):
53
+ if isinstance(doc, self.model):
54
+ saved_doc = doc.as_primitives(hidden_fields=True)
55
+ elif self.model:
56
+ saved_doc = self.model(doc, mask=list(doc.keys())).as_primitives(hidden_fields=True)
57
+ else:
58
+ if not isinstance(doc, dict):
59
+ saved_doc = {"__non_doc_raw__": doc}
60
+ else:
61
+ saved_doc = deepcopy(doc)
62
+
63
+ if index:
64
+ self.operations.append(json.dumps({"update": {"_index": index, "_id": doc_id}}))
65
+ self.operations.append(json.dumps({"doc": saved_doc}))
66
+ else:
67
+ for cur_index in self.indexes:
68
+ self.operations.append(json.dumps({"update": {"_index": cur_index, "_id": doc_id}}))
69
+ self.operations.append(json.dumps({"doc": saved_doc}))
70
+
71
+ def get_plan_data(self):
72
+ return "\n".join(self.operations)