howler-api 3.0.0.dev374__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of howler-api might be problematic. Click here for more details.

Files changed (198) hide show
  1. howler/__init__.py +0 -0
  2. howler/actions/__init__.py +168 -0
  3. howler/actions/add_label.py +111 -0
  4. howler/actions/add_to_bundle.py +159 -0
  5. howler/actions/change_field.py +76 -0
  6. howler/actions/demote.py +160 -0
  7. howler/actions/example_plugin.py +104 -0
  8. howler/actions/prioritization.py +93 -0
  9. howler/actions/promote.py +147 -0
  10. howler/actions/remove_from_bundle.py +133 -0
  11. howler/actions/remove_label.py +111 -0
  12. howler/actions/transition.py +200 -0
  13. howler/api/__init__.py +249 -0
  14. howler/api/base.py +88 -0
  15. howler/api/socket.py +114 -0
  16. howler/api/v1/__init__.py +97 -0
  17. howler/api/v1/action.py +372 -0
  18. howler/api/v1/analytic.py +748 -0
  19. howler/api/v1/auth.py +382 -0
  20. howler/api/v1/clue.py +99 -0
  21. howler/api/v1/configs.py +58 -0
  22. howler/api/v1/dossier.py +222 -0
  23. howler/api/v1/help.py +28 -0
  24. howler/api/v1/hit.py +1181 -0
  25. howler/api/v1/notebook.py +82 -0
  26. howler/api/v1/overview.py +191 -0
  27. howler/api/v1/search.py +788 -0
  28. howler/api/v1/template.py +206 -0
  29. howler/api/v1/tool.py +183 -0
  30. howler/api/v1/user.py +416 -0
  31. howler/api/v1/utils/__init__.py +0 -0
  32. howler/api/v1/utils/etag.py +84 -0
  33. howler/api/v1/view.py +288 -0
  34. howler/app.py +235 -0
  35. howler/common/README.md +125 -0
  36. howler/common/__init__.py +0 -0
  37. howler/common/classification.py +979 -0
  38. howler/common/classification.yml +107 -0
  39. howler/common/exceptions.py +167 -0
  40. howler/common/loader.py +154 -0
  41. howler/common/logging/__init__.py +241 -0
  42. howler/common/logging/audit.py +138 -0
  43. howler/common/logging/format.py +38 -0
  44. howler/common/net.py +79 -0
  45. howler/common/net_static.py +1494 -0
  46. howler/common/random_user.py +316 -0
  47. howler/common/swagger.py +117 -0
  48. howler/config.py +64 -0
  49. howler/cronjobs/__init__.py +29 -0
  50. howler/cronjobs/retention.py +61 -0
  51. howler/cronjobs/rules.py +274 -0
  52. howler/cronjobs/view_cleanup.py +88 -0
  53. howler/datastore/README.md +112 -0
  54. howler/datastore/__init__.py +0 -0
  55. howler/datastore/bulk.py +72 -0
  56. howler/datastore/collection.py +2342 -0
  57. howler/datastore/constants.py +119 -0
  58. howler/datastore/exceptions.py +41 -0
  59. howler/datastore/howler_store.py +105 -0
  60. howler/datastore/migrations/fix_process.py +41 -0
  61. howler/datastore/operations.py +130 -0
  62. howler/datastore/schemas.py +90 -0
  63. howler/datastore/store.py +231 -0
  64. howler/datastore/support/__init__.py +0 -0
  65. howler/datastore/support/build.py +215 -0
  66. howler/datastore/support/schemas.py +90 -0
  67. howler/datastore/types.py +22 -0
  68. howler/error.py +91 -0
  69. howler/external/__init__.py +0 -0
  70. howler/external/generate_mitre.py +96 -0
  71. howler/external/generate_sigma_rules.py +31 -0
  72. howler/external/generate_tlds.py +47 -0
  73. howler/external/reindex_data.py +66 -0
  74. howler/external/wipe_databases.py +58 -0
  75. howler/gunicorn_config.py +25 -0
  76. howler/healthz.py +47 -0
  77. howler/helper/__init__.py +0 -0
  78. howler/helper/azure.py +50 -0
  79. howler/helper/discover.py +59 -0
  80. howler/helper/hit.py +236 -0
  81. howler/helper/oauth.py +247 -0
  82. howler/helper/search.py +92 -0
  83. howler/helper/workflow.py +110 -0
  84. howler/helper/ws.py +378 -0
  85. howler/odm/README.md +102 -0
  86. howler/odm/__init__.py +1 -0
  87. howler/odm/base.py +1543 -0
  88. howler/odm/charter.txt +146 -0
  89. howler/odm/helper.py +416 -0
  90. howler/odm/howler_enum.py +25 -0
  91. howler/odm/models/__init__.py +0 -0
  92. howler/odm/models/action.py +33 -0
  93. howler/odm/models/analytic.py +90 -0
  94. howler/odm/models/assemblyline.py +48 -0
  95. howler/odm/models/aws.py +23 -0
  96. howler/odm/models/azure.py +16 -0
  97. howler/odm/models/cbs.py +44 -0
  98. howler/odm/models/config.py +558 -0
  99. howler/odm/models/dossier.py +33 -0
  100. howler/odm/models/ecs/__init__.py +0 -0
  101. howler/odm/models/ecs/agent.py +17 -0
  102. howler/odm/models/ecs/autonomous_system.py +16 -0
  103. howler/odm/models/ecs/client.py +149 -0
  104. howler/odm/models/ecs/cloud.py +141 -0
  105. howler/odm/models/ecs/code_signature.py +27 -0
  106. howler/odm/models/ecs/container.py +32 -0
  107. howler/odm/models/ecs/dns.py +62 -0
  108. howler/odm/models/ecs/egress.py +10 -0
  109. howler/odm/models/ecs/elf.py +74 -0
  110. howler/odm/models/ecs/email.py +122 -0
  111. howler/odm/models/ecs/error.py +14 -0
  112. howler/odm/models/ecs/event.py +140 -0
  113. howler/odm/models/ecs/faas.py +24 -0
  114. howler/odm/models/ecs/file.py +84 -0
  115. howler/odm/models/ecs/geo.py +30 -0
  116. howler/odm/models/ecs/group.py +18 -0
  117. howler/odm/models/ecs/hash.py +16 -0
  118. howler/odm/models/ecs/host.py +17 -0
  119. howler/odm/models/ecs/http.py +37 -0
  120. howler/odm/models/ecs/ingress.py +12 -0
  121. howler/odm/models/ecs/interface.py +21 -0
  122. howler/odm/models/ecs/network.py +30 -0
  123. howler/odm/models/ecs/observer.py +45 -0
  124. howler/odm/models/ecs/organization.py +12 -0
  125. howler/odm/models/ecs/os.py +21 -0
  126. howler/odm/models/ecs/pe.py +17 -0
  127. howler/odm/models/ecs/process.py +216 -0
  128. howler/odm/models/ecs/registry.py +26 -0
  129. howler/odm/models/ecs/related.py +45 -0
  130. howler/odm/models/ecs/rule.py +51 -0
  131. howler/odm/models/ecs/server.py +24 -0
  132. howler/odm/models/ecs/threat.py +247 -0
  133. howler/odm/models/ecs/tls.py +58 -0
  134. howler/odm/models/ecs/url.py +51 -0
  135. howler/odm/models/ecs/user.py +57 -0
  136. howler/odm/models/ecs/user_agent.py +20 -0
  137. howler/odm/models/ecs/vulnerability.py +41 -0
  138. howler/odm/models/gcp.py +16 -0
  139. howler/odm/models/hit.py +356 -0
  140. howler/odm/models/howler_data.py +328 -0
  141. howler/odm/models/lead.py +24 -0
  142. howler/odm/models/localized_label.py +13 -0
  143. howler/odm/models/overview.py +16 -0
  144. howler/odm/models/pivot.py +40 -0
  145. howler/odm/models/template.py +24 -0
  146. howler/odm/models/user.py +83 -0
  147. howler/odm/models/view.py +34 -0
  148. howler/odm/random_data.py +888 -0
  149. howler/odm/randomizer.py +609 -0
  150. howler/patched.py +5 -0
  151. howler/plugins/__init__.py +25 -0
  152. howler/plugins/config.py +123 -0
  153. howler/remote/__init__.py +0 -0
  154. howler/remote/datatypes/README.md +355 -0
  155. howler/remote/datatypes/__init__.py +98 -0
  156. howler/remote/datatypes/counters.py +63 -0
  157. howler/remote/datatypes/events.py +66 -0
  158. howler/remote/datatypes/hash.py +206 -0
  159. howler/remote/datatypes/lock.py +42 -0
  160. howler/remote/datatypes/queues/__init__.py +0 -0
  161. howler/remote/datatypes/queues/comms.py +59 -0
  162. howler/remote/datatypes/queues/multi.py +32 -0
  163. howler/remote/datatypes/queues/named.py +93 -0
  164. howler/remote/datatypes/queues/priority.py +215 -0
  165. howler/remote/datatypes/set.py +118 -0
  166. howler/remote/datatypes/user_quota_tracker.py +54 -0
  167. howler/security/__init__.py +253 -0
  168. howler/security/socket.py +108 -0
  169. howler/security/utils.py +185 -0
  170. howler/services/__init__.py +0 -0
  171. howler/services/action_service.py +111 -0
  172. howler/services/analytic_service.py +128 -0
  173. howler/services/auth_service.py +323 -0
  174. howler/services/config_service.py +128 -0
  175. howler/services/dossier_service.py +252 -0
  176. howler/services/event_service.py +93 -0
  177. howler/services/hit_service.py +893 -0
  178. howler/services/jwt_service.py +158 -0
  179. howler/services/lucene_service.py +286 -0
  180. howler/services/notebook_service.py +119 -0
  181. howler/services/overview_service.py +44 -0
  182. howler/services/template_service.py +45 -0
  183. howler/services/user_service.py +331 -0
  184. howler/utils/__init__.py +0 -0
  185. howler/utils/annotations.py +28 -0
  186. howler/utils/chunk.py +38 -0
  187. howler/utils/dict_utils.py +200 -0
  188. howler/utils/isotime.py +17 -0
  189. howler/utils/list_utils.py +11 -0
  190. howler/utils/lucene.py +77 -0
  191. howler/utils/path.py +27 -0
  192. howler/utils/socket_utils.py +61 -0
  193. howler/utils/str_utils.py +256 -0
  194. howler/utils/uid.py +47 -0
  195. howler_api-3.0.0.dev374.dist-info/METADATA +71 -0
  196. howler_api-3.0.0.dev374.dist-info/RECORD +198 -0
  197. howler_api-3.0.0.dev374.dist-info/WHEEL +4 -0
  198. howler_api-3.0.0.dev374.dist-info/entry_points.txt +8 -0
@@ -0,0 +1,788 @@
1
+ import re
2
+ from copy import deepcopy
3
+ from typing import Any, Union
4
+
5
+ from elasticsearch import BadRequestError
6
+ from elasticsearch._sync.client.indices import IndicesClient
7
+ from flask import request
8
+ from sigma.backends.elasticsearch import LuceneBackend
9
+ from sigma.rule import SigmaRule
10
+ from werkzeug.exceptions import BadRequest
11
+ from yaml.scanner import ScannerError
12
+
13
+ from howler.api import bad_request, make_subapi_blueprint, ok
14
+ from howler.common.loader import datastore
15
+ from howler.common.logging import get_logger
16
+ from howler.common.swagger import generate_swagger_docs
17
+ from howler.datastore.exceptions import SearchException
18
+ from howler.helper.search import get_collection, get_default_sort, has_access_control, list_all_fields
19
+ from howler.security import api_login
20
+ from howler.services import hit_service, lucene_service
21
+
22
+ SUB_API = "search"
23
+ search_api = make_subapi_blueprint(SUB_API, api_version=1)
24
+ search_api._doc = "Perform search queries"
25
+
26
+ logger = get_logger(__file__)
27
+
28
+
29
+ def generate_params(request, fields, multi_fields, params=None):
30
+ """Generate a list of parameters, combining the request data and the query arguments"""
31
+ # I hate you, python
32
+ if params is None:
33
+ params = {}
34
+
35
+ if request.method == "POST":
36
+ try:
37
+ req_data = request.json
38
+ except BadRequest:
39
+ req_data = {"query": "*:*"}
40
+
41
+ params = {
42
+ **params,
43
+ **{k: req_data[k] for k in fields if k in req_data},
44
+ **{k: req_data[k] for k in multi_fields if k in req_data},
45
+ }
46
+
47
+ else:
48
+ req_data = request.args
49
+ params = {
50
+ **params,
51
+ **{k: req_data[k] for k in fields if k in req_data},
52
+ **{k: req_data.getlist(k, None) for k in multi_fields if k in req_data},
53
+ }
54
+
55
+ return params, req_data
56
+
57
+
58
+ @generate_swagger_docs()
59
+ @search_api.route("/<index>", methods=["GET", "POST"])
60
+ @api_login(required_priv=["R"])
61
+ def search(index, **kwargs):
62
+ """Search through specified index for a given query. Uses lucene search syntax for query.
63
+
64
+ Variables:
65
+ index => Index to search in (hit, user,...)
66
+
67
+ Arguments:
68
+ query => Query to search for
69
+
70
+ Optional Arguments:
71
+ deep_paging_id => ID of the next page or * to start deep paging
72
+ filters => List of additional filter queries limit the data
73
+ offset => Offset in the results
74
+ rows => Number of results per page
75
+ sort => How to sort the results (not available in deep paging)
76
+ fl => List of fields to return
77
+ timeout => Maximum execution time (ms)
78
+ use_archive => Allow access to the datastore achive (Default: False)
79
+ track_total_hits => Track the total number of query matches, instead of stopping at 10000 (Default: False)
80
+ metadata => A list of additional features to be added to the result alongside the raw results
81
+
82
+ Data Block:
83
+ # Note that the data block is for POST requests only!
84
+ {"query": "query", # Query to search for
85
+ "offset": 0, # Offset in the results
86
+ "rows": 100, # Max number of results
87
+ "sort": "field asc", # How to sort the results
88
+ "fl": "id,score", # List of fields to return
89
+ "timeout": 1000, # Maximum execution time (ms)
90
+ "filters": ['fq'], # List of additional filter queries limit the data
91
+ "metadata": ["dossiers"]} # List of additional features to add to the search
92
+
93
+
94
+ Result Example:
95
+ {"total": 201, # Total results found
96
+ "offset": 0, # Offset in the result list
97
+ "rows": 100, # Number of results returned
98
+ "next_deep_paging_id": "asX3f...342", # ID to pass back for the next page during deep paging
99
+ "items": []} # List of results
100
+ """
101
+ user = kwargs["user"]
102
+ collection = get_collection(index, user)
103
+ default_sort = get_default_sort(index, user)
104
+
105
+ if collection is None or default_sort is None:
106
+ return bad_request(err=f"Not a valid index to search in: {index}")
107
+
108
+ fields = [
109
+ "offset",
110
+ "rows",
111
+ "sort",
112
+ "fl",
113
+ "timeout",
114
+ "deep_paging_id",
115
+ "track_total_hits",
116
+ ]
117
+ multi_fields = ["filters", "metadata"]
118
+ boolean_fields = ["use_archive"]
119
+
120
+ params, req_data = generate_params(request, fields, multi_fields)
121
+
122
+ params.update(
123
+ {
124
+ k: str(req_data.get(k, "false")).lower() in ["true", ""]
125
+ for k in boolean_fields
126
+ if req_data.get(k, None) is not None
127
+ }
128
+ )
129
+
130
+ if has_access_control(index):
131
+ params.update({"access_control": user["access_control"]})
132
+
133
+ params["as_obj"] = False
134
+ params.update({"sort": (params.get("sort", None) or default_sort).split(",")})
135
+
136
+ query = req_data.get("query", None)
137
+ if not query:
138
+ return bad_request(err="There was no search query.")
139
+
140
+ try:
141
+ metadata = params.pop("metadata", [])
142
+ result = collection().search(query, **params)
143
+
144
+ if index == "hit" and len(metadata) > 0:
145
+ hit_service.augment_metadata(result["items"], metadata, user)
146
+
147
+ return ok(result)
148
+ except (SearchException, BadRequestError) as e:
149
+ return bad_request(err=f"SearchException: {e}")
150
+
151
+
152
+ @generate_swagger_docs()
153
+ @search_api.route("/<index>/explain", methods=["GET", "POST"])
154
+ @api_login(required_priv=["R"])
155
+ def explain_query(index, **kwargs):
156
+ """Search through specified index for a given Lucene query. Uses Lucene search syntax for query.
157
+
158
+ Variables:
159
+ index => Index to explain against (hit, user,...)
160
+
161
+ Arguments:
162
+ query => Lucene Query to explain
163
+
164
+ Data Block:
165
+ # Note that the data block is for POST requests only!
166
+ {
167
+ "query": "id:*", # Lucene Query to explain
168
+ }
169
+
170
+
171
+ Result Example:
172
+ {
173
+ 'valid': True,
174
+ 'explanations': [
175
+ {
176
+ 'valid': True,
177
+ 'explanation': 'ConstantScore(FieldExistsQuery [field=id])'
178
+ }
179
+ ]
180
+ }
181
+ """
182
+ user = kwargs["user"]
183
+ collection = get_collection(index, user)
184
+
185
+ if collection is None:
186
+ return bad_request(err=f"Not a valid index to explain: {index}")
187
+
188
+ fields = ["query"]
189
+ multi_fields: list[str] = []
190
+
191
+ params, req_data = generate_params(request, fields, multi_fields)
192
+
193
+ params["as_obj"] = False
194
+
195
+ query = req_data.get("query", None)
196
+ if not query:
197
+ return bad_request(err="There was no query.")
198
+
199
+ # This regex checks for lucene phrases (i.e. the "Example Analytic" part of howler.analytic:"Example Analytic")
200
+ # And then escapes them.
201
+ # https://regex101.com/r/8u5F6a/1
202
+ escaped_lucene = re.sub(r'((:\()?(".+?")(\)?))', lucene_service.replace_lucene_phrase, query)
203
+
204
+ try:
205
+ indices_client = IndicesClient(datastore().hit.datastore.client)
206
+
207
+ result = deepcopy(
208
+ indices_client.validate_query(q=escaped_lucene, explain=True, index=collection().index_name).body
209
+ )
210
+
211
+ del result["_shards"]
212
+
213
+ for explanation in result["explanations"]:
214
+ del explanation["index"]
215
+
216
+ return ok(result)
217
+ except Exception as e:
218
+ logger.exception("Exception on query explanation")
219
+ return bad_request(err=f"Exception: {e}")
220
+
221
+
222
+ @generate_swagger_docs()
223
+ @search_api.route("/<index>/eql", methods=["GET", "POST"])
224
+ @api_login(required_priv=["R"])
225
+ def eql_search(index, **kwargs):
226
+ """Search through specified index for a given EQL query. Uses EQL search syntax for query.
227
+
228
+ Variables:
229
+ index => Index to search in (hit, user,...)
230
+
231
+ Arguments:
232
+ eql_query => EQL Query to search for
233
+
234
+ Optional Arguments:
235
+ filters => List of additional filter queries limit the data, written in lucene
236
+ fl => Comma-separated list of fields to return
237
+ rows => Number of results per page
238
+ timeout => Maximum execution time (ms)
239
+
240
+ Data Block:
241
+ # Note that the data block is for POST requests only!
242
+ {"eql_query": "query", # EQL Query to search for
243
+ "rows": 100, # Max number of results
244
+ "fl": "id,score", # List of fields to return
245
+ "timeout": 1000, # Maximum execution time (ms)
246
+ "filters": ['fq']} # List of additional filter queries limit the data
247
+
248
+
249
+ Result Example:
250
+ {"total": 201, # Total results found
251
+ "offset": 0, # Offset in the result list
252
+ "rows": 100, # Number of results returned
253
+ "items": []} # List of results
254
+ """
255
+ user = kwargs["user"]
256
+ collection = get_collection(index, user)
257
+
258
+ if collection is None:
259
+ return bad_request(err=f"Not a valid index to search in: {index}")
260
+
261
+ fields = [
262
+ "eql_query",
263
+ "fl",
264
+ "rows",
265
+ "timeout",
266
+ ]
267
+ multi_fields = ["filters"]
268
+
269
+ params, req_data = generate_params(request, fields, multi_fields)
270
+
271
+ if has_access_control(index):
272
+ params.update({"access_control": user["access_control"]})
273
+
274
+ params["as_obj"] = False
275
+
276
+ eql_query = req_data.get("eql_query", None)
277
+ if not eql_query:
278
+ return bad_request(err="There was no EQL search query.")
279
+
280
+ try:
281
+ return ok(collection().raw_eql_search(**params))
282
+ except (SearchException, BadRequestError) as e:
283
+ logger.error("SearchException: %s", str(e), exc_info=True)
284
+ return bad_request(err=f"SearchException: {e}")
285
+
286
+
287
+ @generate_swagger_docs()
288
+ @search_api.route("/<index>/sigma", methods=["GET", "POST"])
289
+ @api_login(required_priv=["R"])
290
+ def sigma_search(index, **kwargs):
291
+ """Search through specified index using a given sigma rule. Uses sigma rule syntax for query.
292
+
293
+ Variables:
294
+ index => Index to search in (hit, user,...)
295
+
296
+ Arguments:
297
+ sigma => Sigma rule to search on
298
+
299
+ Optional Arguments:
300
+ filters => List of additional filter queries limit the data, written in lucene
301
+ fl => Comma-separated list of fields to return
302
+ rows => Number of results per page
303
+ timeout => Maximum execution time (ms)
304
+
305
+ Data Block:
306
+ # Note that the data block is for POST requests only!
307
+ {"sigma": "sigma yaml", # Sigma Rule to search for
308
+ "rows": 100, # Max number of results
309
+ "fl": "id,score", # List of fields to return
310
+ "timeout": 1000, # Maximum execution time (ms)
311
+ "filters": ['fq']} # List of additional filter queries limit the data
312
+
313
+
314
+ Result Example:
315
+ {"total": 201, # Total results found
316
+ "offset": 0, # Offset in the result list
317
+ "rows": 100, # Number of results returned
318
+ "items": []} # List of results
319
+ """
320
+ user = kwargs["user"]
321
+ collection = get_collection(index, user)
322
+ default_sort = get_default_sort(index, user)
323
+
324
+ if collection is None or default_sort is None:
325
+ return bad_request(err=f"Not a valid index to search in: {index}")
326
+
327
+ fields = [
328
+ "offset",
329
+ "rows",
330
+ "sort",
331
+ "fl",
332
+ "timeout",
333
+ "deep_paging_id",
334
+ "track_total_hits",
335
+ ]
336
+ multi_fields = ["filters"]
337
+ boolean_fields = ["use_archive"]
338
+
339
+ params, req_data = generate_params(request, fields, multi_fields)
340
+
341
+ params.update(
342
+ {
343
+ k: str(req_data.get(k, "false")).lower() in ["true", ""]
344
+ for k in boolean_fields
345
+ if req_data.get(k, None) is not None
346
+ }
347
+ )
348
+
349
+ if has_access_control(index):
350
+ params.update({"access_control": user["access_control"]})
351
+
352
+ params["as_obj"] = False
353
+ params.update({"sort": (params.get("sort", None) or default_sort).split(",")})
354
+
355
+ sigma = req_data.get("sigma", None)
356
+ if not sigma:
357
+ return bad_request(err="There was no sigma rule.")
358
+
359
+ try:
360
+ rule = SigmaRule.from_yaml(sigma)
361
+ except ScannerError as e:
362
+ return bad_request(err=f"Error when parsing yaml: {e.problem} {e.problem_mark}")
363
+
364
+ es_collection = collection()
365
+
366
+ lucene_queries = LuceneBackend(index_names=[es_collection.index_name]).convert_rule(rule)
367
+
368
+ try:
369
+ return ok(es_collection.search("*:*", **params, filters=[*params.get("filters", []), *lucene_queries]))
370
+ except (SearchException, BadRequestError) as e:
371
+ logger.error("SearchException: %s", str(e), exc_info=True)
372
+ return bad_request(err=f"SearchException: {e}")
373
+
374
+
375
+ @generate_swagger_docs()
376
+ @search_api.route("/grouped/<index>/<group_field>", methods=["GET", "POST"])
377
+ @api_login(required_priv=["R"])
378
+ def group_search(index, group_field, **kwargs):
379
+ """Search for a given query and groups the data based on a specific field. Uses lucene search syntax.
380
+
381
+ Variables:
382
+ index => Index to search in (hit, user,...)
383
+ group_field => Field to group on
384
+
385
+ Optional Arguments:
386
+ group_sort => How to sort the results inside the group
387
+ limit => Maximum number of results return for each groups
388
+ query => Query to search for
389
+ filters => List of additional filter queries limit the data
390
+ offset => Offset in the results
391
+ rows => Max number of results
392
+ sort => How to sort the results
393
+ fl => List of fields to return
394
+
395
+ Data Block:
396
+ # Note that the data block is for POST requests only!
397
+ {"group_sort": "score desc",
398
+ "limit": 10,
399
+ "query": "query",
400
+ "offset": 0,
401
+ "rows": 100,
402
+ "sort": "field asc",
403
+ "fl": "id,score",
404
+ "filters": ['fq']}
405
+
406
+
407
+ Result Example:
408
+ {
409
+ "total": 201, # Total results found
410
+ "offset": 0, # Offset in the result list
411
+ "rows": 100, # Number of results returned
412
+ "items": [], # List of results
413
+ "sequences": [], # List of matching sequences
414
+ }
415
+ """
416
+ user = kwargs["user"]
417
+ collection = get_collection(index, user)
418
+ default_sort = get_default_sort(index, user)
419
+ if collection is None or default_sort is None:
420
+ return bad_request(err=f"Not a valid index to search in: {index}")
421
+
422
+ fields = ["group_sort", "limit", "query", "offset", "rows", "sort", "fl"]
423
+ multi_fields = ["filters"]
424
+
425
+ params = generate_params(request, fields, multi_fields)[0]
426
+
427
+ if has_access_control(index):
428
+ params.update({"access_control": user["access_control"]})
429
+
430
+ params["as_obj"] = False
431
+ params.setdefault("sort", default_sort)
432
+
433
+ if not group_field:
434
+ return bad_request(err="The field to group on was not specified.")
435
+
436
+ try:
437
+ return ok(collection().grouped_search(group_field, **params))
438
+ except (SearchException, BadRequestError) as e:
439
+ logger.error("SearchException: %s", str(e), exc_info=True)
440
+ return bad_request(err=f"SearchException: {e}")
441
+
442
+
443
+ # noinspection PyUnusedLocal
444
+ @generate_swagger_docs()
445
+ @search_api.route("/fields/<index>", methods=["GET"])
446
+ @api_login(required_priv=["R"])
447
+ def list_index_fields(index, **kwargs):
448
+ """List all available fields for a given index
449
+
450
+ Variables:
451
+ index => Which specific index you want to know the fields for
452
+
453
+
454
+ Arguments:
455
+ None
456
+
457
+ Result Example:
458
+ {
459
+ "<<FIELD_NAME>>": { # For a given field
460
+ indexed: True, # Is the field indexed
461
+ stored: False, # Is the field stored
462
+ type: string # What type of data in the field
463
+ },
464
+ ...
465
+
466
+ }
467
+ """
468
+ user = kwargs["user"]
469
+ collection = get_collection(index, user)
470
+ if collection is not None:
471
+ return ok(collection().fields())
472
+ elif index == "ALL":
473
+ return ok(list_all_fields("admin" in user["type"]))
474
+ else:
475
+ return bad_request(err=f"Not a valid index to search in: {index}")
476
+
477
+
478
+ @generate_swagger_docs()
479
+ @search_api.route("/count/<index>", methods=["GET", "POST"])
480
+ @api_login(required_priv=["R"])
481
+ def count(index, **kwargs):
482
+ """Returns number of documents matching a query. Uses lucene search syntax for query.
483
+
484
+ Variables:
485
+ index => Index to search in (hit, user,...)
486
+
487
+ Arguments:
488
+ query => Query to search for
489
+
490
+ Optional Arguments:
491
+ filters => List of additional filter queries limit the data
492
+ timeout => Maximum execution time (ms)
493
+ use_archive => Allow access to the datastore achive (Default: False)
494
+
495
+ Data Block:
496
+ # Note that the data block is for POST requests only!
497
+ {
498
+ "query": "query", # Query to search for
499
+ "timeout": 1000, # Maximum execution time (ms)
500
+ }
501
+
502
+
503
+ Result Example:
504
+ {
505
+ "total": 201, # Total results found
506
+ }
507
+ """
508
+ user = kwargs["user"]
509
+ collection = get_collection(index, user)
510
+
511
+ if collection is None:
512
+ return bad_request(err=f"Not a valid index to search in: {index}")
513
+
514
+ params, req_data = generate_params(request, [], [])
515
+
516
+ boolean_fields = ["use_archive"]
517
+ params.update(
518
+ {
519
+ k: str(req_data.get(k, "false")).lower() in ["true", ""]
520
+ for k in boolean_fields
521
+ if req_data.get(k, None) is not None
522
+ }
523
+ )
524
+
525
+ if has_access_control(index):
526
+ params.update({"access_control": user["access_control"]})
527
+
528
+ query = req_data.get("query", None)
529
+ if not query:
530
+ return bad_request(err="There was no search query.")
531
+
532
+ try:
533
+ return ok(collection().count(query, **params))
534
+ except (SearchException, BadRequestError) as e:
535
+ return bad_request(err=f"SearchException: {e}")
536
+
537
+
538
+ @generate_swagger_docs()
539
+ @search_api.route("/facet/<index>", methods=["GET", "POST"])
540
+ @api_login(required_priv=["R"])
541
+ def facet(index, **kwargs):
542
+ """Perform field analysis on the selected fields. (Also known as facetting in lucene).
543
+
544
+ This essentially counts the number of instances a field is seen with each specific
545
+ values where the documents matches the specified queries.
546
+
547
+ Variables:
548
+ index => Index to search in (hit, user,...)
549
+
550
+ Optional Arguments:
551
+ query => Query to search for
552
+ mincount => Minimum item count for the fieldvalue to be returned
553
+ rows => The max number of fieldvalues to return
554
+ filters => Additional query to limit to output
555
+ fields => Field to analyse
556
+
557
+ Data Block:
558
+ # Note that the data block is for POST requests only!
559
+ {"fields": ["howler.id", ...]
560
+ "query": "id:*",
561
+ "mincount": "10",
562
+ "rows": "10",
563
+ "filters": ['fq']}
564
+
565
+ Result Example:
566
+ {
567
+ "howler.id": { # Facetting results
568
+ "value_0": 2,
569
+ ...
570
+ "value_N": 19,
571
+ },
572
+ ...
573
+ }
574
+ """
575
+ user = kwargs["user"]
576
+ collection = get_collection(index, user)
577
+ if collection is None:
578
+ return bad_request(err=f"Not a valid index to search in: {index}")
579
+
580
+ fields = ["query", "mincount", "rows"]
581
+ multi_fields = ["filters", "fields"]
582
+
583
+ params = generate_params(request, fields, multi_fields)[0]
584
+
585
+ if has_access_control(index):
586
+ params.update({"access_control": user["access_control"]})
587
+
588
+ try:
589
+ fields = params.pop("fields")
590
+ facet_result: dict[str, dict[str, Any]] = {}
591
+ for field in fields:
592
+ if field not in collection().fields():
593
+ logger.warning("Invalid field %s requested for faceting, skipping", field)
594
+ continue
595
+
596
+ facet_result[field] = collection().facet(field, **params)
597
+
598
+ return ok(facet_result)
599
+ except (SearchException, BadRequestError) as e:
600
+ logger.error("SearchException: %s", str(e), exc_info=True)
601
+ return bad_request(err=f"SearchException: {e}")
602
+
603
+
604
+ @generate_swagger_docs()
605
+ @search_api.route("/facet/<index>/<field>", methods=["GET", "POST"])
606
+ @api_login(required_priv=["R"])
607
+ def facet_field(index, field, **kwargs):
608
+ """Perform field analysis on the selected field. (Also known as facetting in lucene).
609
+
610
+ This essentially counts the number of instances a field is seen with each specific
611
+ values where the documents matches the specified queries.
612
+
613
+ Variables:
614
+ index => Index to search in (hit, user,...)
615
+ field => Field to analyse
616
+
617
+ Optional Arguments:
618
+ query => Query to search for
619
+ mincount => Minimum item count for the fieldvalue to be returned
620
+ rows => The max number of fieldvalues to return
621
+ filters => Additional query to limit to output
622
+
623
+ Data Block:
624
+ # Note that the data block is for POST requests only!
625
+ {"query": "id:*",
626
+ "mincount": "10",
627
+ "rows": "10",
628
+ "filters": ['fq']}
629
+
630
+ Result Example:
631
+ { # Facetting results
632
+ "value_0": 2,
633
+ ...
634
+ "value_N": 19,
635
+ }
636
+ """
637
+ user = kwargs["user"]
638
+ collection = get_collection(index, user)
639
+ if collection is None:
640
+ return bad_request(err=f"Not a valid index to search in: {index}")
641
+
642
+ field_info = collection().fields().get(field, None)
643
+ if field_info is None:
644
+ return bad_request(err=f"Field '{field}' is not a valid field in index: {index}")
645
+
646
+ fields = ["query", "mincount", "rows"]
647
+ multi_fields = ["filters"]
648
+
649
+ params = generate_params(request, fields, multi_fields)[0]
650
+
651
+ if has_access_control(index):
652
+ params.update({"access_control": user["access_control"]})
653
+
654
+ try:
655
+ return ok(collection().facet(field, **params))
656
+ except (SearchException, BadRequestError) as e:
657
+ logger.error("SearchException: %s", str(e), exc_info=True)
658
+ return bad_request(err=f"SearchException: {e}")
659
+
660
+
661
+ @generate_swagger_docs()
662
+ @search_api.route("/histogram/<index>/<field>", methods=["GET", "POST"])
663
+ @api_login(required_priv=["R"])
664
+ def histogram(index, field, **kwargs):
665
+ """Generate an histogram based on a time or and int field using a specific gap size
666
+
667
+ Variables:
668
+ index => Index to search in (hit, user,...)
669
+ field => Field to generate the histogram from
670
+
671
+ Optional Arguments:
672
+ query => Query to search for
673
+ mincount => Minimum item count for the fieldvalue to be returned
674
+ filters => Additional query to limit to output
675
+ start => Value at which to start creating the histogram
676
+ * Defaults: 0 or now-1d
677
+ end => Value at which to end the histogram. Defaults: 2000 or now
678
+ gap => Size of each step in the histogram. Defaults: 100 or +1h
679
+
680
+ Data Block:
681
+ # Note that the data block is for POST requests only!
682
+ {"query": "id:*",
683
+ "mincount": "10",
684
+ "filters": ['fq'],
685
+ "start": 0,
686
+ "end": 100,
687
+ "gap": 10}
688
+
689
+ Result Example:
690
+ { # Histogram results
691
+ "step_0": 2,
692
+ ...
693
+ "step_N": 19,
694
+ }
695
+ """
696
+ fields = ["query", "mincount", "start", "end", "gap"]
697
+ multi_fields = ["filters"]
698
+ user = kwargs["user"]
699
+
700
+ collection = get_collection(index, user)
701
+ if collection is None:
702
+ return bad_request(err=f"Not a valid index to search in: {index}")
703
+
704
+ # Get fields default values
705
+ field_info = collection().fields().get(field, None)
706
+ params: dict[str, Union[str, int]] = {}
707
+ if field_info is None:
708
+ return bad_request(err=f"Field '{field}' is not a valid field in index: {index}")
709
+ elif field_info["type"] == "integer":
710
+ params = {"start": 0, "end": 2000, "gap": 100}
711
+ elif field_info["type"] == "date":
712
+ storage = datastore()
713
+ params = {
714
+ "start": f"{storage.ds.now}-1{storage.ds.day}",
715
+ "end": f"{storage.ds.now}",
716
+ "gap": f"+1{storage.ds.hour}",
717
+ }
718
+ else:
719
+ err_msg = f"Field '{field}' is of type '{field_info['type']}'. Only 'integer' or 'date' are acceptable."
720
+ return bad_request(err=err_msg)
721
+
722
+ # Load API variables
723
+ params = generate_params(request, fields, multi_fields, params)[0]
724
+
725
+ # Make sure access control is enforced
726
+ if has_access_control(index):
727
+ params.update({"access_control": user["access_control"]})
728
+
729
+ try:
730
+ return ok(collection().histogram(field, **params))
731
+ except (SearchException, BadRequestError) as e:
732
+ logger.error("SearchException: %s", str(e), exc_info=True)
733
+ return bad_request(err=f"SearchException: {e}")
734
+
735
+
736
+ @generate_swagger_docs()
737
+ @search_api.route("/stats/<index>/<int_field>", methods=["GET", "POST"])
738
+ @api_login(required_priv=["R"])
739
+ def stats(index, int_field, **kwargs):
740
+ """Perform statistical analysis of an integer field to get its min, max, average and count values
741
+
742
+ Variables:
743
+ index => Index to search in (hit, user,...)
744
+ int_field => Integer field to analyse
745
+
746
+ Optional Arguments:
747
+ query => Query to search for
748
+ filters => Additional query to limit to output
749
+
750
+ Data Block:
751
+ # Note that the data block is for POST requests only!
752
+ {"query": "id:*",
753
+ "filters": ['fq']}
754
+
755
+ Result Example:
756
+ { # Stats results
757
+ "count": 1, # Number of times this field is seen
758
+ "min": 1, # Minimum value
759
+ "max": 1, # Maximum value
760
+ "avg": 1, # Average value
761
+ "sum": 1 # Sum of all values
762
+ }
763
+ """
764
+ user = kwargs["user"]
765
+ collection = get_collection(index, user)
766
+ if collection is None:
767
+ return bad_request(err=f"Not a valid index to search in: {index}")
768
+
769
+ field_info = collection().fields().get(int_field, None)
770
+ if field_info is None:
771
+ return bad_request(err=f"Field '{int_field}' is not a valid field in index: {index}")
772
+
773
+ if field_info["type"] not in ["integer", "float"]:
774
+ return bad_request(err=f"Field '{int_field}' is not a numeric field.")
775
+
776
+ fields = ["query"]
777
+ multi_fields = ["filters"]
778
+
779
+ params = generate_params(request, fields, multi_fields)[0]
780
+
781
+ if has_access_control(index):
782
+ params.update({"access_control": user["access_control"]})
783
+
784
+ try:
785
+ return ok(collection().stats(int_field, **params))
786
+ except (SearchException, BadRequestError) as e:
787
+ logger.error("SearchException: %s", str(e), exc_info=True)
788
+ return bad_request(err=f"SearchException: {e}")