howler-api 3.0.0.dev374__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of howler-api might be problematic. Click here for more details.

Files changed (198) hide show
  1. howler/__init__.py +0 -0
  2. howler/actions/__init__.py +168 -0
  3. howler/actions/add_label.py +111 -0
  4. howler/actions/add_to_bundle.py +159 -0
  5. howler/actions/change_field.py +76 -0
  6. howler/actions/demote.py +160 -0
  7. howler/actions/example_plugin.py +104 -0
  8. howler/actions/prioritization.py +93 -0
  9. howler/actions/promote.py +147 -0
  10. howler/actions/remove_from_bundle.py +133 -0
  11. howler/actions/remove_label.py +111 -0
  12. howler/actions/transition.py +200 -0
  13. howler/api/__init__.py +249 -0
  14. howler/api/base.py +88 -0
  15. howler/api/socket.py +114 -0
  16. howler/api/v1/__init__.py +97 -0
  17. howler/api/v1/action.py +372 -0
  18. howler/api/v1/analytic.py +748 -0
  19. howler/api/v1/auth.py +382 -0
  20. howler/api/v1/clue.py +99 -0
  21. howler/api/v1/configs.py +58 -0
  22. howler/api/v1/dossier.py +222 -0
  23. howler/api/v1/help.py +28 -0
  24. howler/api/v1/hit.py +1181 -0
  25. howler/api/v1/notebook.py +82 -0
  26. howler/api/v1/overview.py +191 -0
  27. howler/api/v1/search.py +788 -0
  28. howler/api/v1/template.py +206 -0
  29. howler/api/v1/tool.py +183 -0
  30. howler/api/v1/user.py +416 -0
  31. howler/api/v1/utils/__init__.py +0 -0
  32. howler/api/v1/utils/etag.py +84 -0
  33. howler/api/v1/view.py +288 -0
  34. howler/app.py +235 -0
  35. howler/common/README.md +125 -0
  36. howler/common/__init__.py +0 -0
  37. howler/common/classification.py +979 -0
  38. howler/common/classification.yml +107 -0
  39. howler/common/exceptions.py +167 -0
  40. howler/common/loader.py +154 -0
  41. howler/common/logging/__init__.py +241 -0
  42. howler/common/logging/audit.py +138 -0
  43. howler/common/logging/format.py +38 -0
  44. howler/common/net.py +79 -0
  45. howler/common/net_static.py +1494 -0
  46. howler/common/random_user.py +316 -0
  47. howler/common/swagger.py +117 -0
  48. howler/config.py +64 -0
  49. howler/cronjobs/__init__.py +29 -0
  50. howler/cronjobs/retention.py +61 -0
  51. howler/cronjobs/rules.py +274 -0
  52. howler/cronjobs/view_cleanup.py +88 -0
  53. howler/datastore/README.md +112 -0
  54. howler/datastore/__init__.py +0 -0
  55. howler/datastore/bulk.py +72 -0
  56. howler/datastore/collection.py +2342 -0
  57. howler/datastore/constants.py +119 -0
  58. howler/datastore/exceptions.py +41 -0
  59. howler/datastore/howler_store.py +105 -0
  60. howler/datastore/migrations/fix_process.py +41 -0
  61. howler/datastore/operations.py +130 -0
  62. howler/datastore/schemas.py +90 -0
  63. howler/datastore/store.py +231 -0
  64. howler/datastore/support/__init__.py +0 -0
  65. howler/datastore/support/build.py +215 -0
  66. howler/datastore/support/schemas.py +90 -0
  67. howler/datastore/types.py +22 -0
  68. howler/error.py +91 -0
  69. howler/external/__init__.py +0 -0
  70. howler/external/generate_mitre.py +96 -0
  71. howler/external/generate_sigma_rules.py +31 -0
  72. howler/external/generate_tlds.py +47 -0
  73. howler/external/reindex_data.py +66 -0
  74. howler/external/wipe_databases.py +58 -0
  75. howler/gunicorn_config.py +25 -0
  76. howler/healthz.py +47 -0
  77. howler/helper/__init__.py +0 -0
  78. howler/helper/azure.py +50 -0
  79. howler/helper/discover.py +59 -0
  80. howler/helper/hit.py +236 -0
  81. howler/helper/oauth.py +247 -0
  82. howler/helper/search.py +92 -0
  83. howler/helper/workflow.py +110 -0
  84. howler/helper/ws.py +378 -0
  85. howler/odm/README.md +102 -0
  86. howler/odm/__init__.py +1 -0
  87. howler/odm/base.py +1543 -0
  88. howler/odm/charter.txt +146 -0
  89. howler/odm/helper.py +416 -0
  90. howler/odm/howler_enum.py +25 -0
  91. howler/odm/models/__init__.py +0 -0
  92. howler/odm/models/action.py +33 -0
  93. howler/odm/models/analytic.py +90 -0
  94. howler/odm/models/assemblyline.py +48 -0
  95. howler/odm/models/aws.py +23 -0
  96. howler/odm/models/azure.py +16 -0
  97. howler/odm/models/cbs.py +44 -0
  98. howler/odm/models/config.py +558 -0
  99. howler/odm/models/dossier.py +33 -0
  100. howler/odm/models/ecs/__init__.py +0 -0
  101. howler/odm/models/ecs/agent.py +17 -0
  102. howler/odm/models/ecs/autonomous_system.py +16 -0
  103. howler/odm/models/ecs/client.py +149 -0
  104. howler/odm/models/ecs/cloud.py +141 -0
  105. howler/odm/models/ecs/code_signature.py +27 -0
  106. howler/odm/models/ecs/container.py +32 -0
  107. howler/odm/models/ecs/dns.py +62 -0
  108. howler/odm/models/ecs/egress.py +10 -0
  109. howler/odm/models/ecs/elf.py +74 -0
  110. howler/odm/models/ecs/email.py +122 -0
  111. howler/odm/models/ecs/error.py +14 -0
  112. howler/odm/models/ecs/event.py +140 -0
  113. howler/odm/models/ecs/faas.py +24 -0
  114. howler/odm/models/ecs/file.py +84 -0
  115. howler/odm/models/ecs/geo.py +30 -0
  116. howler/odm/models/ecs/group.py +18 -0
  117. howler/odm/models/ecs/hash.py +16 -0
  118. howler/odm/models/ecs/host.py +17 -0
  119. howler/odm/models/ecs/http.py +37 -0
  120. howler/odm/models/ecs/ingress.py +12 -0
  121. howler/odm/models/ecs/interface.py +21 -0
  122. howler/odm/models/ecs/network.py +30 -0
  123. howler/odm/models/ecs/observer.py +45 -0
  124. howler/odm/models/ecs/organization.py +12 -0
  125. howler/odm/models/ecs/os.py +21 -0
  126. howler/odm/models/ecs/pe.py +17 -0
  127. howler/odm/models/ecs/process.py +216 -0
  128. howler/odm/models/ecs/registry.py +26 -0
  129. howler/odm/models/ecs/related.py +45 -0
  130. howler/odm/models/ecs/rule.py +51 -0
  131. howler/odm/models/ecs/server.py +24 -0
  132. howler/odm/models/ecs/threat.py +247 -0
  133. howler/odm/models/ecs/tls.py +58 -0
  134. howler/odm/models/ecs/url.py +51 -0
  135. howler/odm/models/ecs/user.py +57 -0
  136. howler/odm/models/ecs/user_agent.py +20 -0
  137. howler/odm/models/ecs/vulnerability.py +41 -0
  138. howler/odm/models/gcp.py +16 -0
  139. howler/odm/models/hit.py +356 -0
  140. howler/odm/models/howler_data.py +328 -0
  141. howler/odm/models/lead.py +24 -0
  142. howler/odm/models/localized_label.py +13 -0
  143. howler/odm/models/overview.py +16 -0
  144. howler/odm/models/pivot.py +40 -0
  145. howler/odm/models/template.py +24 -0
  146. howler/odm/models/user.py +83 -0
  147. howler/odm/models/view.py +34 -0
  148. howler/odm/random_data.py +888 -0
  149. howler/odm/randomizer.py +609 -0
  150. howler/patched.py +5 -0
  151. howler/plugins/__init__.py +25 -0
  152. howler/plugins/config.py +123 -0
  153. howler/remote/__init__.py +0 -0
  154. howler/remote/datatypes/README.md +355 -0
  155. howler/remote/datatypes/__init__.py +98 -0
  156. howler/remote/datatypes/counters.py +63 -0
  157. howler/remote/datatypes/events.py +66 -0
  158. howler/remote/datatypes/hash.py +206 -0
  159. howler/remote/datatypes/lock.py +42 -0
  160. howler/remote/datatypes/queues/__init__.py +0 -0
  161. howler/remote/datatypes/queues/comms.py +59 -0
  162. howler/remote/datatypes/queues/multi.py +32 -0
  163. howler/remote/datatypes/queues/named.py +93 -0
  164. howler/remote/datatypes/queues/priority.py +215 -0
  165. howler/remote/datatypes/set.py +118 -0
  166. howler/remote/datatypes/user_quota_tracker.py +54 -0
  167. howler/security/__init__.py +253 -0
  168. howler/security/socket.py +108 -0
  169. howler/security/utils.py +185 -0
  170. howler/services/__init__.py +0 -0
  171. howler/services/action_service.py +111 -0
  172. howler/services/analytic_service.py +128 -0
  173. howler/services/auth_service.py +323 -0
  174. howler/services/config_service.py +128 -0
  175. howler/services/dossier_service.py +252 -0
  176. howler/services/event_service.py +93 -0
  177. howler/services/hit_service.py +893 -0
  178. howler/services/jwt_service.py +158 -0
  179. howler/services/lucene_service.py +286 -0
  180. howler/services/notebook_service.py +119 -0
  181. howler/services/overview_service.py +44 -0
  182. howler/services/template_service.py +45 -0
  183. howler/services/user_service.py +331 -0
  184. howler/utils/__init__.py +0 -0
  185. howler/utils/annotations.py +28 -0
  186. howler/utils/chunk.py +38 -0
  187. howler/utils/dict_utils.py +200 -0
  188. howler/utils/isotime.py +17 -0
  189. howler/utils/list_utils.py +11 -0
  190. howler/utils/lucene.py +77 -0
  191. howler/utils/path.py +27 -0
  192. howler/utils/socket_utils.py +61 -0
  193. howler/utils/str_utils.py +256 -0
  194. howler/utils/uid.py +47 -0
  195. howler_api-3.0.0.dev374.dist-info/METADATA +71 -0
  196. howler_api-3.0.0.dev374.dist-info/RECORD +198 -0
  197. howler_api-3.0.0.dev374.dist-info/WHEEL +4 -0
  198. howler_api-3.0.0.dev374.dist-info/entry_points.txt +8 -0
howler/odm/base.py ADDED
@@ -0,0 +1,1543 @@
1
+ """HOWLER's built in Object Document Model tool.
2
+
3
+ The classes in this module can be composed to build database
4
+ independent data models in python. This gives us:
5
+ - single source of truth for our data schemas
6
+ - database independent serialization
7
+ - type checking
8
+
9
+
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import copy
15
+ import json
16
+ import re
17
+ import typing
18
+ from datetime import datetime
19
+ from enum import Enum as PyEnum
20
+ from enum import EnumMeta
21
+ from typing import Any as _Any
22
+ from typing import Dict, Tuple, Union
23
+ from venv import logger
24
+
25
+ import arrow
26
+ import validators
27
+ from dateutil.tz import tzutc
28
+
29
+ from howler.common import loader
30
+ from howler.common.exceptions import HowlerKeyError, HowlerNotImplementedError, HowlerTypeError, HowlerValueError
31
+ from howler.common.net import is_valid_domain, is_valid_ip
32
+ from howler.utils.dict_utils import flatten, recursive_update
33
+ from howler.utils.isotime import now_as_iso
34
+ from howler.utils.uid import get_random_id
35
+
36
+ BANNED_FIELDS = {
37
+ "_id",
38
+ "__access_grp1__",
39
+ "__access_lvl__",
40
+ "__access_req__",
41
+ "__access_grp2__",
42
+ }
43
+ DATEFORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
44
+ FIELD_SANITIZER = re.compile("^[a-z][a-z0-9_-]*$")
45
+ FLATTENED_OBJECT_SANITIZER = re.compile("^[a-z][a-z0-9_.]*$")
46
+ NOT_INDEXED_SANITIZER = re.compile("^[A-Za-z0-9_ -]*$")
47
+ UTC_TZ = tzutc()
48
+
49
+ DOMAIN_REGEX = (
50
+ r"(?:(?:[A-Za-z0-9\u00a1-\uffff][A-Za-z0-9\u00a1-\uffff_-]{0,62})?[A-Za-z0-9\u00a1-\uffff]\.)+"
51
+ r"(?:xn--)?(?:[A-Za-z0-9\u00a1-\uffff]{2,}\.?)"
52
+ )
53
+ DOMAIN_ONLY_REGEX = f"^{DOMAIN_REGEX}$"
54
+ EMAIL_REGEX = f"^[a-zA-Z0-9!#$%&'*+/=?^_‘{{|}}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_‘{{|}}~-]+)*@({DOMAIN_REGEX})$"
55
+ IPV4_REGEX = r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
56
+ IPV6_REGEX = (
57
+ r"(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|"
58
+ r"(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|"
59
+ r"(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|"
60
+ r"(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|"
61
+ r":(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|"
62
+ r"::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|"
63
+ r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|"
64
+ r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"
65
+ )
66
+ IP_REGEX = f"(?:{IPV4_REGEX}|{IPV6_REGEX})"
67
+ IP_ONLY_REGEX = f"^{IP_REGEX}$"
68
+ PRIVATE_IP = (
69
+ r"(?:(?:127|10)(?:\.(?:[2](?:[0-5][0-5]|[01234][6-9])|[1][0-9][0-9]|[1-9][0-9]|[0-9])){3})|"
70
+ r"(?:172\.(?:1[6-9]|2[0-9]|3[0-1])(?:\.(?:2[0-4][0-9]|25[0-5]|[1][0-9][0-9]|[1-9][0-9]|[0-9])){2}|"
71
+ r"(?:192\.168(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])){2}))"
72
+ )
73
+ PHONE_REGEX = r"^(\+?\d{1,2})?[ .-]?(\(\d{3}\)|\d{3})[ .-](\d{3})[ .-](\d{4})$"
74
+ SSDEEP_REGEX = r"^[0-9]{1,18}:[a-zA-Z0-9/+]{0,64}:[a-zA-Z0-9/+]{0,64}$"
75
+ MD5_REGEX = r"^[a-f0-9]{32}$"
76
+ SHA1_REGEX = r"^[a-f0-9]{40}$"
77
+ SHA256_REGEX = r"^[a-f0-9]{64}$"
78
+ HOWLER_HASH_REGEX = r"^[a-f0-9]{1,64}$"
79
+ MAC_REGEX = r"^(?:(?:[0-9a-f]{2}-){5}[0-9a-f]{2}|(?:[0-9a-f]{2}:){5}[0-9a-f]{2})$"
80
+ URI_PATH = r"(?:[/?#]\S*)"
81
+ FULL_URI = f"^((?:(?:[A-Za-z]*:)?//)?(?:\\S+(?::\\S*)?@)?({IP_REGEX}|{DOMAIN_REGEX})(?::\\d{{2,5}})?){URI_PATH}?$"
82
+ PLATFORM_REGEX = r"^(Windows|Linux|MacOS|Android|iOS)$"
83
+ PROCESSOR_REGEX = r"^x(64|86)$"
84
+
85
+
86
+ def flat_to_nested(data: dict[str, _Any]) -> dict[str, _Any]:
87
+ sub_data: dict[str, _Any] = {}
88
+ nested_keys = []
89
+ for key, value in data.items():
90
+ if "." in key:
91
+ child, sub_key = key.split(".", 1)
92
+ nested_keys.append(child)
93
+ try:
94
+ sub_data[child][sub_key] = value
95
+ except (KeyError, TypeError):
96
+ sub_data[child] = {sub_key: value}
97
+ else:
98
+ sub_data[key] = value
99
+
100
+ for key in nested_keys:
101
+ sub_data[key] = flat_to_nested(sub_data[key])
102
+
103
+ return sub_data
104
+
105
+
106
+ class KeyMaskException(HowlerKeyError):
107
+ pass
108
+
109
+
110
+ class _Field:
111
+ def __init__(
112
+ self,
113
+ name=None,
114
+ index=None,
115
+ store=None,
116
+ copyto=None,
117
+ default=None,
118
+ description=None,
119
+ deprecated_description=None,
120
+ reference=None,
121
+ optional=False,
122
+ deprecated=False,
123
+ ):
124
+ self.index = index
125
+ self.store = store
126
+ self.multivalued = False
127
+ self.copyto = []
128
+ if isinstance(copyto, str):
129
+ self.copyto.append(copyto)
130
+ elif copyto:
131
+ self.copyto.extend(copyto)
132
+
133
+ self.name = name
134
+ self.parent_name = None
135
+ self.getter_function = None
136
+ self.setter_function = None
137
+ self.description = description
138
+ self.reference = reference
139
+ self.optional = optional
140
+ self.deprecated = deprecated
141
+ self.deprecated_description = deprecated_description
142
+
143
+ self.default = default
144
+ self.default_set = default is not None
145
+
146
+ # noinspection PyProtectedMember
147
+ def __get__(self, obj, objtype=None):
148
+ """Read the value of this field from the model instance (obj)."""
149
+ if obj is None:
150
+ return obj
151
+ if self.name in obj._odm_removed:
152
+ raise KeyMaskException(self.name)
153
+ if self.getter_function is not None:
154
+ return self.getter_function(obj, obj._odm_py_obj[self.name.rstrip("_")])
155
+
156
+ return obj._odm_py_obj[self.name.rstrip("_")]
157
+
158
+ # noinspection PyProtectedMember
159
+ def __set__(self, obj, value):
160
+ """Set the value of this field, calling a setter method if available."""
161
+ if self.name in obj._odm_removed:
162
+ raise KeyMaskException(self.name)
163
+ value = self.check(value)
164
+ if self.setter_function is not None:
165
+ value = self.setter_function(obj, value)
166
+ obj._odm_py_obj[self.name.rstrip("_")] = value
167
+
168
+ def getter(self, method):
169
+ """Decorator to create getter method for a field."""
170
+ out = copy.deepcopy(self)
171
+ out.getter_function = method
172
+ return out
173
+
174
+ def setter(self, method):
175
+ """Let fields be used as a decorator to define a setter method.
176
+
177
+ >>> expiry = Date()
178
+ >>>
179
+ >>> # noinspection PyUnusedLocal,PyUnresolvedReferences
180
+ >>> @expiry.setter
181
+ >>> def expiry(self, assign, value):
182
+ >>> assert value
183
+ >>> assign(value)
184
+ """
185
+ out = copy.deepcopy(self)
186
+ out.setter_function = method
187
+ return out
188
+
189
+ def apply_defaults(self, index, store):
190
+ """Used by the model decorator to pass through default parameters."""
191
+ if self.index is None:
192
+ self.index = index
193
+ if self.store is None:
194
+ self.store = store
195
+
196
+ def fields(self):
197
+ """Return the subfields/modified field data.
198
+
199
+ For simple fields this is an identity function.
200
+ """
201
+ return {"": self}
202
+
203
+ def check(self, value, **kwargs):
204
+ raise HowlerNotImplementedError(
205
+ "This function is not defined in the default field. " "Each fields has to have their own definition"
206
+ )
207
+
208
+ def __repr__(self) -> str:
209
+ keys = [
210
+ key
211
+ for key in self.__dir__()
212
+ if not key.startswith("_") and not callable(getattr(self, key)) and getattr(self, key) is not None
213
+ ]
214
+ return f"{type(self).__name__}({', '.join([f'{key}={str(getattr(self, key))}' for key in keys])})"
215
+
216
+
217
+ class _DeletedField:
218
+ pass
219
+
220
+
221
+ class Date(_Field):
222
+ """A field storing a datetime value."""
223
+
224
+ def check(self, value, context=[], **kwargs):
225
+ if value is None:
226
+ return None
227
+
228
+ if value == "NOW":
229
+ value = now_as_iso()
230
+
231
+ try:
232
+ try:
233
+ return datetime.strptime(value, DATEFORMAT).replace(tzinfo=UTC_TZ)
234
+ except (TypeError, ValueError):
235
+ return arrow.get(value).datetime
236
+ except Exception as e:
237
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}]: {str(e)}")
238
+
239
+
240
+ class Boolean(_Field):
241
+ """A field storing a boolean value."""
242
+
243
+ def check(self, value, context=[], **kwargs):
244
+ if self.optional and value is None:
245
+ return None
246
+
247
+ try:
248
+ return bool(value)
249
+ except ValueError as e:
250
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}]: {str(e)}")
251
+
252
+
253
+ class Json(_Field):
254
+ """A field storing serializeable structure with their JSON encoded representations.
255
+
256
+ Examples: metadata
257
+ """
258
+
259
+ def check(self, value, context=[], **kwargs):
260
+ if self.optional and value is None:
261
+ return None
262
+
263
+ if not isinstance(value, str):
264
+ try:
265
+ return json.dumps(value)
266
+ except (ValueError, OverflowError, TypeError) as e:
267
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}]: {str(e)}")
268
+
269
+ return value
270
+
271
+
272
+ class Keyword(_Field):
273
+ """A field storing a short string with a technical interpretation.
274
+
275
+ Examples: file hashes, service names, document ids
276
+ """
277
+
278
+ def check(self, value, context=[], **kwargs):
279
+ # We have a special case for bytes here due to how often strings and bytes
280
+ # get mixed up in python apis
281
+ if self.optional and value is None:
282
+ return None
283
+
284
+ if isinstance(value, bytes):
285
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}] Keyword doesn't accept bytes values")
286
+
287
+ if value == "" or value is None:
288
+ if self.default_set:
289
+ value = self.default
290
+ else:
291
+ raise HowlerValueError(
292
+ f"[{'.'.join(context) or self.name}] Empty strings are not allowed without defaults"
293
+ )
294
+
295
+ if value is None:
296
+ return None
297
+
298
+ return str(value)
299
+
300
+
301
+ class EmptyableKeyword(_Field):
302
+ """A keyword which allow to differentiate between empty and None values."""
303
+
304
+ def check(self, value, context=[], **kwargs):
305
+ if self.optional and value is None:
306
+ return None
307
+
308
+ # We have a special case for bytes here due to how often strings and bytes
309
+ # get mixed up in python apis
310
+ if isinstance(value, bytes):
311
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}] EmptyableKeyword doesn't accept bytes values")
312
+
313
+ if value is None and self.default_set:
314
+ value = self.default
315
+
316
+ if value is None:
317
+ return None
318
+
319
+ return str(value)
320
+
321
+
322
+ class UpperKeyword(Keyword):
323
+ """A field storing a short uppercase string with a technical interpretation."""
324
+
325
+ def check(self, value, context=[], **kwargs):
326
+ kw_val = super().check(value, context=context, **kwargs)
327
+
328
+ if kw_val is None:
329
+ return None
330
+
331
+ return kw_val.upper()
332
+
333
+
334
+ class LowerKeyword(Keyword):
335
+ """
336
+ A field storing a short lowercase string with a technical interpretation.
337
+ """
338
+
339
+ def check(self, value, context=[], **kwargs):
340
+ kw_val = super().check(value, context=context, **kwargs)
341
+
342
+ if kw_val is None:
343
+ return None
344
+
345
+ return kw_val.lower()
346
+
347
+
348
+ class CaseInsensitiveKeyword(Keyword):
349
+ """
350
+ A field storing a string with a technical interpretation, but is case-insensitive when searching.
351
+ """
352
+
353
+
354
+ class Any(Keyword):
355
+ """A field that can hold any value whatsoever but which is stored as a
356
+ Keyword in the datastore index
357
+ """
358
+
359
+ def __init__(self, *args, **kwargs):
360
+ kwargs["index"] = False
361
+ kwargs["store"] = False
362
+ super().__init__(*args, **kwargs)
363
+
364
+ def check(self, value, **_):
365
+ return value
366
+
367
+
368
+ class ValidatedKeyword(Keyword):
369
+ """Keyword field which the values are validated by a regular expression"""
370
+
371
+ def __init__(self, validation_regex, *args, **kwargs):
372
+ super().__init__(*args, **kwargs)
373
+ self.validation_regex = re.compile(validation_regex)
374
+
375
+ def __deepcopy__(self, memo=None):
376
+ # NOTE: This deepcopy code does not work with a sub-class that add args of kwargs that should be copied.
377
+ # If that is the case, the sub-class should implement its own deepcopy function.
378
+ valid_fields = ["name", "index", "store", "copyto", "default", "description"]
379
+ if "validation_regex" in self.__class__.__init__.__code__.co_varnames:
380
+ return self.__class__(
381
+ self.validation_regex.pattern,
382
+ **{k: v for k, v in self.__dict__.items() if k in valid_fields},
383
+ )
384
+ else:
385
+ return self.__class__(**{k: v for k, v in self.__dict__.items() if k in valid_fields})
386
+
387
+ def check(self, value, context=[], **kwargs):
388
+ if self.optional and value is None:
389
+ return None
390
+
391
+ if not value:
392
+ if self.default_set:
393
+ value = self.default
394
+ else:
395
+ raise HowlerValueError(
396
+ f"[{'.'.join(context) or self.name}]: Empty strings are not allowed without defaults"
397
+ )
398
+
399
+ if value is None:
400
+ return value
401
+
402
+ if not self.validation_regex.match(value):
403
+ raise HowlerValueError(
404
+ f"[{'.'.join(context) or self.name}]: '{value}' not match the "
405
+ f"validator: {self.validation_regex.pattern}"
406
+ )
407
+
408
+ return str(value)
409
+
410
+
411
+ class IP(Keyword):
412
+ def __init__(self, *args, **kwargs):
413
+ super().__init__(*args, **kwargs)
414
+ self.validation_regex = re.compile(IP_ONLY_REGEX)
415
+
416
+ def check(self, value, context=[], **kwargs):
417
+ if not value:
418
+ return None
419
+
420
+ if not self.validation_regex.match(value):
421
+ raise HowlerValueError(
422
+ f"[{'.'.join(context) or self.name}]: '{value}' not match the "
423
+ f"validator: {self.validation_regex.pattern}"
424
+ )
425
+
426
+ return value
427
+
428
+
429
+ class Domain(Keyword):
430
+ def __init__(self, *args, strict=True, **kwargs):
431
+ super().__init__(*args, **kwargs)
432
+ self.strict = strict
433
+
434
+ def check(self, value, context=[], **kwargs):
435
+ if not value:
436
+ return None
437
+
438
+ domain_result = validators.domain(value)
439
+ # We'll only raise the exception if strict mode is enabled - otherwise, we'll check hostname validation as well
440
+ if isinstance(domain_result, Exception) and self.strict:
441
+ raise HowlerValueError(
442
+ f"[{'.'.join(context) or self.name}] '{value}' did not pass validation."
443
+ ) from domain_result
444
+
445
+ hostname_result = validators.hostname(value)
446
+ if isinstance(hostname_result, Exception):
447
+ raise HowlerValueError(
448
+ f"[{'.'.join(context) or self.name}] '{value}' did not pass validation."
449
+ ) from hostname_result
450
+
451
+ return value.lower()
452
+
453
+
454
+ class Email(Keyword):
455
+ def __init__(self, *args, **kwargs):
456
+ super().__init__(*args, **kwargs)
457
+ self.validation_regex = re.compile(EMAIL_REGEX)
458
+
459
+ def check(self, value, context=[], **kwargs):
460
+ if not value:
461
+ return None
462
+
463
+ validation_result = validators.email(value)
464
+ if isinstance(validation_result, Exception):
465
+ raise HowlerValueError(
466
+ f"[{'.'.join(context) or self.name}] '{value}' did not pass validation."
467
+ ) from validation_result
468
+
469
+ match = self.validation_regex.match(value)
470
+ if not is_valid_domain(match.group(1)):
471
+ raise HowlerValueError(
472
+ f"[{'.'.join(context) or self.name}] '{match.group(1)}' in email '{value}'" " is not a valid Domain."
473
+ )
474
+
475
+ return value.lower()
476
+
477
+
478
+ class URI(Keyword):
479
+ def __init__(self, *args, **kwargs):
480
+ super().__init__(*args, **kwargs)
481
+ self.validation_regex = re.compile(FULL_URI)
482
+
483
+ def check(self, value, context=[], **kwargs):
484
+ if not value:
485
+ return None
486
+
487
+ match = self.validation_regex.match(value)
488
+ if not match:
489
+ raise HowlerValueError(
490
+ f"[{'.'.join(context) or self.name}] '{value}' not match the "
491
+ f"validator: {self.validation_regex.pattern}"
492
+ )
493
+
494
+ if not is_valid_domain(match.group(2)) and not is_valid_ip(match.group(2)):
495
+ raise HowlerValueError(
496
+ f"[{'.'.join(context) or self.name}] '{match.group(2)}' in URI '{value}'"
497
+ " is not a valid Domain or IP."
498
+ )
499
+
500
+ return match.group(0).replace(match.group(1), match.group(1).lower())
501
+
502
+
503
+ class URIPath(ValidatedKeyword):
504
+ def __init__(self, *args, **kwargs):
505
+ super().__init__(URI_PATH, *args, **kwargs)
506
+
507
+
508
+ class MAC(ValidatedKeyword):
509
+ def __init__(self, *args, **kwargs):
510
+ super().__init__(MAC_REGEX, *args, **kwargs)
511
+
512
+
513
+ class PhoneNumber(ValidatedKeyword):
514
+ def __init__(self, *args, **kwargs):
515
+ super().__init__(PHONE_REGEX, *args, **kwargs)
516
+
517
+
518
+ class SSDeepHash(ValidatedKeyword):
519
+ def __init__(self, *args, **kwargs):
520
+ super().__init__(SSDEEP_REGEX, *args, **kwargs)
521
+
522
+
523
+ class SHA1(ValidatedKeyword):
524
+ def __init__(self, *args, **kwargs):
525
+ super().__init__(SHA1_REGEX, *args, **kwargs)
526
+
527
+
528
+ class SHA256(ValidatedKeyword):
529
+ def __init__(self, *args, **kwargs):
530
+ super().__init__(SHA256_REGEX, *args, **kwargs)
531
+
532
+
533
+ class HowlerHash(ValidatedKeyword):
534
+ def __init__(self, *args, **kwargs):
535
+ super().__init__(HOWLER_HASH_REGEX, *args, **kwargs)
536
+
537
+
538
+ class MD5(ValidatedKeyword):
539
+ def __init__(self, *args, **kwargs):
540
+ super().__init__(MD5_REGEX, *args, **kwargs)
541
+
542
+
543
+ class Platform(ValidatedKeyword):
544
+ def __init__(self, *args, **kwargs):
545
+ super().__init__(PLATFORM_REGEX, *args, **kwargs)
546
+
547
+
548
+ class Processor(ValidatedKeyword):
549
+ def __init__(self, *args, **kwargs):
550
+ super().__init__(PROCESSOR_REGEX, *args, **kwargs)
551
+
552
+
553
+ class Enum(Keyword):
554
+ """A field storing a short string that has predefined list of possible values"""
555
+
556
+ def __init__(self, values: PyEnum | list[typing.Any] | set[typing.Any], *args, **kwargs):
557
+ super().__init__(*args, **kwargs)
558
+ if isinstance(values, set):
559
+ self.values = values
560
+ elif isinstance(values, (list, tuple)):
561
+ self.values = set(values)
562
+ elif isinstance(values, (PyEnum, EnumMeta)):
563
+ self.values = set([e.value for e in values])
564
+ else:
565
+ raise HowlerTypeError(f"Type unsupported for Enum odm: {type(values)}")
566
+
567
+ def check(self, value, context=[], **kwargs):
568
+ if self.optional and value is None:
569
+ return None
570
+
571
+ if not value:
572
+ if self.default_set:
573
+ value = self.default
574
+ else:
575
+ raise HowlerValueError(f"[{'.'.join(context)}] Empty enums are not allow without defaults")
576
+
577
+ if value not in self.values:
578
+ raise HowlerValueError(f"[{'.'.join(context)}] {value} not in the possible values: {self.values}")
579
+
580
+ if value is None:
581
+ return value
582
+
583
+ return str(value)
584
+
585
+
586
+ class UUID(Keyword):
587
+ """A field storing an auto-generated unique ID if None is provided"""
588
+
589
+ def __init__(self, *args, **kwargs):
590
+ super().__init__(*args, **kwargs)
591
+ self.default_set = True
592
+
593
+ def check(self, value, **kwargs):
594
+ if value is None:
595
+ value = get_random_id()
596
+
597
+ return str(value)
598
+
599
+
600
+ class Text(_Field):
601
+ """A field storing human readable text data."""
602
+
603
+ def check(self, value, context=[], **kwargs):
604
+ if self.optional and value is None:
605
+ return None
606
+
607
+ if not value:
608
+ if self.default_set:
609
+ value = self.default
610
+ else:
611
+ raise HowlerValueError(f"[{'.'.join(context)}] Empty strings are not allowed without defaults")
612
+
613
+ if value is None:
614
+ return None
615
+
616
+ return str(value)
617
+
618
+
619
+ class IndexText(_Field):
620
+ """A special field with special processing rules to simplify searching."""
621
+
622
+ def check(self, value, **kwargs):
623
+ if self.optional and value is None:
624
+ return None
625
+
626
+ return str(value)
627
+
628
+
629
+ class Integer(_Field):
630
+ """A field storing an integer value."""
631
+
632
+ def check(self, value, context=[], **kwargs):
633
+ if self.optional and value is None:
634
+ return None
635
+
636
+ if value is None or value == "":
637
+ if self.default_set:
638
+ return self.default
639
+
640
+ try:
641
+ return int(value)
642
+ except ValueError as e:
643
+ raise HowlerValueError(f"[{'.'.join(context)}]: {str(e)}")
644
+
645
+
646
+ class Long(_Field):
647
+ """
648
+ A field storing a long value. Equivalent to Integer in python, but sets the ES datatype to long.
649
+
650
+ In Elasticsearch, Integer supports values from -2^31 to 2^31-1, while Long supports values from -2^63 to 2^63-1.
651
+ """
652
+
653
+ def check(self, value, context=[], **kwargs):
654
+ if self.optional and value is None:
655
+ return None
656
+
657
+ if value is None or value == "":
658
+ if self.default_set:
659
+ return self.default
660
+
661
+ try:
662
+ return int(value)
663
+ except ValueError as e:
664
+ raise HowlerValueError(f"[{'.'.join(context)}]: {str(e)}")
665
+
666
+
667
+ class Float(_Field):
668
+ """A field storing a floating point value."""
669
+
670
+ def check(self, value, context=[], **kwargs):
671
+ if self.optional and value is None:
672
+ return None
673
+
674
+ if not value:
675
+ if self.default_set:
676
+ return self.default
677
+ try:
678
+ return float(value)
679
+ except ValueError as e:
680
+ raise HowlerValueError(f"[{'.'.join(context)}]: {str(e)}")
681
+
682
+
683
+ class ClassificationObject(object):
684
+ def __init__(self, engine, value, is_uc=False):
685
+ self.engine = engine
686
+ self.is_uc = is_uc
687
+ self.value = engine.normalize_classification(value, skip_auto_select=is_uc)
688
+
689
+ def get_access_control_parts(self):
690
+ return self.engine.get_access_control_parts(self.value, user_classification=self.is_uc)
691
+
692
+ def min(self, other):
693
+ return ClassificationObject(
694
+ self.engine,
695
+ self.engine.min_classification(self.value, other.value),
696
+ is_uc=self.is_uc,
697
+ )
698
+
699
+ def max(self, other):
700
+ return ClassificationObject(
701
+ self.engine,
702
+ self.engine.max_classification(self.value, other.value),
703
+ is_uc=self.is_uc,
704
+ )
705
+
706
+ def intersect(self, other):
707
+ return ClassificationObject(
708
+ self.engine,
709
+ self.engine.intersect_user_classification(self.value, other.value),
710
+ is_uc=self.is_uc,
711
+ )
712
+
713
+ def long(self):
714
+ return self.engine.normalize_classification(self.value, skip_auto_select=self.is_uc)
715
+
716
+ def small(self):
717
+ return self.engine.normalize_classification(self.value, long_format=False, skip_auto_select=self.is_uc)
718
+
719
+ def __str__(self):
720
+ return self.value
721
+
722
+ def __eq__(self, other):
723
+ return self.value == other.value
724
+
725
+ def __ne__(self, other):
726
+ return self.value != other.value
727
+
728
+ def __le__(self, other):
729
+ return self.engine.is_accessible(other.value, self.value)
730
+
731
+ def __lt__(self, other):
732
+ return self.engine.is_accessible(other.value, self.value)
733
+
734
+ def __ge__(self, other):
735
+ return self.engine.is_accessible(self.value, other.value)
736
+
737
+ def __gt__(self, other):
738
+ return not self.engine.is_accessible(other.value, self.value)
739
+
740
+
741
+ class Classification(Keyword):
742
+ """A field storing access control classification."""
743
+
744
+ def __init__(self, *args, is_user_classification=False, yml_config=None, **kwargs):
745
+ """An expanded classification is one that controls the access to the document
746
+ which holds it.
747
+ """
748
+ super().__init__(*args, **kwargs)
749
+ self.engine = loader.get_classification(yml_config=yml_config)
750
+ self.is_uc = is_user_classification
751
+
752
+ def check(self, value, **kwargs):
753
+ if self.optional and value is None:
754
+ return None
755
+
756
+ if isinstance(value, ClassificationObject):
757
+ return ClassificationObject(self.engine, value.value, is_uc=self.is_uc)
758
+
759
+ return ClassificationObject(self.engine, value, is_uc=self.is_uc)
760
+
761
+
762
+ class ClassificationString(Keyword):
763
+ """A field storing the classification as a string only."""
764
+
765
+ def __init__(self, *args, yml_config=None, **kwargs):
766
+ super().__init__(*args, **kwargs)
767
+ self.engine = loader.get_classification(yml_config=yml_config)
768
+
769
+ def check(self, value, context=[], **kwargs):
770
+ if self.optional and value is None:
771
+ return None
772
+
773
+ if not value:
774
+ if self.default_set:
775
+ value = self.default
776
+ else:
777
+ raise HowlerValueError(
778
+ f"[{'.'.join(context) or self.name}]: Empty classification is not allowed without defaults"
779
+ )
780
+
781
+ if not self.engine.is_valid(value):
782
+ raise HowlerValueError(f"[{'.'.join(context) or self.name}]: Invalid classification: {value}")
783
+
784
+ return str(value)
785
+
786
+
787
+ class TypedList(list):
788
+ def __init__(self, type_p, *items, context=[], **kwargs):
789
+ self.context = context
790
+ self.type = type_p
791
+
792
+ super().__init__([type_p.check(el, context=self.context, **kwargs) for el in items])
793
+
794
+ def append(self, item):
795
+ super().append(self.type.check(item, context=self.context))
796
+
797
+ def extend(self, sequence):
798
+ super().extend(self.type.check(item, context=self.context) for item in sequence)
799
+
800
+ def insert(self, index, item):
801
+ super().insert(index, self.type.check(item, context=self.context))
802
+
803
+ def __setitem__(self, index, item):
804
+ if isinstance(index, slice):
805
+ item = [self.type.check(val, context=self.context) for val in item]
806
+ super().__setitem__(index, item)
807
+ else:
808
+ super().__setitem__(index, self.type.check(item, context=self.context))
809
+
810
+
811
+ class List(_Field):
812
+ """A field storing a sequence of typed elements."""
813
+
814
+ def __init__(self, child_type, **kwargs):
815
+ super().__init__(**kwargs)
816
+ self.child_type = child_type
817
+
818
+ def check(self, value, **kwargs):
819
+ if self.optional and value is None:
820
+ return None
821
+
822
+ if isinstance(self.child_type, Compound) and isinstance(value, dict):
823
+ # Search queries of list of compound fields will return dotted paths of list of
824
+ # values. When processed through the flat_fields function, since this function
825
+ # has no idea about the data layout, it will transform the dotted paths into
826
+ # a dictionary of items then contains a list of object instead of a list
827
+ # of dictionaries with single items.
828
+
829
+ # The following piece of code transforms the dictionary of list into a list of
830
+ # dictionaries so the rest of the model validation can go through.
831
+
832
+ fixed_values = []
833
+ check_key = None
834
+ length = None
835
+ for key, val in flatten(value).items():
836
+ if not isinstance(val, list):
837
+ val = [val]
838
+
839
+ if length is None:
840
+ check_key = key
841
+ length = len(val)
842
+
843
+ for entry in val:
844
+ fixed_values.append({key: entry})
845
+ elif len(val) != length:
846
+ raise HowlerValueError(
847
+ "Flattened fields creating list of ODMs must have equal length. Key "
848
+ f"{key} has length {len(val)} compared to key {check_key} with length {length}."
849
+ )
850
+ else:
851
+ for i in range(len(val)):
852
+ fixed_values[i][key] = val[i]
853
+
854
+ return TypedList(
855
+ self.child_type,
856
+ *fixed_values,
857
+ **kwargs,
858
+ )
859
+
860
+ if value is None:
861
+ logger.warning("Value is None, but optional is not set to True. Using an empty list to avoid errors.")
862
+ value = []
863
+
864
+ return TypedList(self.child_type, *value, **kwargs)
865
+
866
+ def apply_defaults(self, index, store):
867
+ """Initialize the default settings for the child field."""
868
+ # First apply the default to the list itself
869
+ super().apply_defaults(index, store)
870
+ # Then pass through the initialized values on the list to the child type
871
+ self.child_type.apply_defaults(self.index, self.store)
872
+
873
+ def fields(self):
874
+ out = dict()
875
+ for name, field_data in self.child_type.fields().items():
876
+ field_data = copy.deepcopy(field_data)
877
+ field_data.apply_defaults(self.index, self.store)
878
+ out[name] = field_data
879
+ return out
880
+
881
+
882
+ class TypedMapping(dict):
883
+ def __init__(self, type_p, index, store, sanitizer, context=[], **items):
884
+ self.index = index
885
+ self.store = store
886
+ self.sanitizer = sanitizer
887
+ self.context = context
888
+
889
+ for key in items.keys():
890
+ if not self.sanitizer.match(key):
891
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: Illegal key {key}")
892
+
893
+ super().__init__({key: type_p.check(el, context=self.context) for key, el in items.items()})
894
+ self.type = type_p
895
+
896
+ def __setitem__(self, key, item):
897
+ if not self.sanitizer.match(key):
898
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: Illegal key: {key}")
899
+
900
+ return super().__setitem__(key, self.type.check(item, context=self.context))
901
+
902
+ def update(self, *args, **kwargs):
903
+ # Update supports three input layouts:
904
+ # 1. A single dictionary
905
+ if len(args) == 1 and isinstance(args[0], dict):
906
+ for key in args[0].keys():
907
+ if not self.sanitizer.match(key):
908
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: Illegal key: {key}")
909
+
910
+ return super().update({key: self.type.check(item, context=self.context) for key, item in args[0].items()})
911
+
912
+ # 2. A list of key value pairs as if you were constructing a dictionary
913
+ elif args:
914
+ for key, _ in args:
915
+ if not self.sanitizer.match(key):
916
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: Illegal key: {key}")
917
+
918
+ return super().update({key: self.type.check(item, context=self.context) for key, item in args})
919
+
920
+ # 3. Key values as arguments, can be combined with others
921
+ if kwargs:
922
+ for key in kwargs.keys():
923
+ if not self.sanitizer.match(key):
924
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: Illegal key: {key}")
925
+
926
+ return super().update({key: self.type.check(item, context=self.context) for key, item in kwargs.items()})
927
+
928
+
929
+ class Mapping(_Field):
930
+ """A field storing a sequence of typed elements."""
931
+
932
+ def __init__(self, child_type, **kwargs):
933
+ self.child_type = child_type
934
+ super().__init__(**kwargs)
935
+
936
+ def check(self, value, **kwargs):
937
+ if self.optional and value is None:
938
+ return None
939
+
940
+ if self.index or self.store:
941
+ sanitizer = FIELD_SANITIZER
942
+ else:
943
+ sanitizer = NOT_INDEXED_SANITIZER
944
+
945
+ return TypedMapping(self.child_type, self.index, self.store, sanitizer, **value)
946
+
947
+ def apply_defaults(self, index, store):
948
+ """Initialize the default settings for the child field."""
949
+ # First apply the default to the list itself
950
+ super().apply_defaults(index, store)
951
+ # Then pass through the initialized values on the list to the child type
952
+ self.child_type.apply_defaults(self.index, self.store)
953
+
954
+
955
+ class FlattenedListObject(Mapping):
956
+ """A field storing a flattened object"""
957
+
958
+ def __init__(self, **kwargs):
959
+ super().__init__(List(Json()), **kwargs)
960
+
961
+ def check(self, value, **kwargs):
962
+ if self.optional and value is None:
963
+ return None
964
+
965
+ return TypedMapping(self.child_type, self.index, self.store, FLATTENED_OBJECT_SANITIZER, **value)
966
+
967
+ def apply_defaults(self, index, store):
968
+ """Initialize the default settings for the child field."""
969
+ # First apply the default to the list itself
970
+ super().apply_defaults(index, store)
971
+ # Then pass through the initialized values on the list to the child type
972
+ self.child_type.apply_defaults(self.index, self.store)
973
+
974
+
975
+ class FlattenedObject(Mapping):
976
+ """A field storing a flattened object"""
977
+
978
+ def __init__(self, **kwargs):
979
+ super().__init__(Json(), **kwargs)
980
+
981
+ def check(self, value, context=[], **kwargs):
982
+ if self.optional and value is None:
983
+ return None
984
+
985
+ return TypedMapping(
986
+ self.child_type,
987
+ self.index,
988
+ self.store,
989
+ FLATTENED_OBJECT_SANITIZER,
990
+ context=context,
991
+ **value,
992
+ )
993
+
994
+ def apply_defaults(self, index, store):
995
+ """Initialize the default settings for the child field."""
996
+ # First apply the default to the list itself
997
+ super().apply_defaults(index, store)
998
+ # Then pass through the initialized values on the list to the child type
999
+ self.child_type.apply_defaults(self.index, self.store)
1000
+
1001
+
1002
+ class Compound(_Field):
1003
+ def __init__(self, field_type, **kwargs):
1004
+ super().__init__(**kwargs)
1005
+ self.child_type = field_type
1006
+
1007
+ def check(
1008
+ self,
1009
+ value,
1010
+ mask=None,
1011
+ ignore_extra_values=False,
1012
+ extra_fields={},
1013
+ context=[],
1014
+ **kwargs,
1015
+ ):
1016
+ if self.optional and value is None:
1017
+ return None
1018
+
1019
+ if isinstance(value, self.child_type):
1020
+ return value
1021
+
1022
+ return self.child_type(
1023
+ value,
1024
+ mask=mask,
1025
+ ignore_extra_values=ignore_extra_values,
1026
+ extra_fields=extra_fields,
1027
+ context=context,
1028
+ )
1029
+
1030
+ def fields(self):
1031
+ out = dict()
1032
+ for name, field_data in self.child_type.fields().items():
1033
+ field_data = copy.deepcopy(field_data)
1034
+ field_data.apply_defaults(self.index, self.store)
1035
+ out[name] = field_data
1036
+ return out
1037
+
1038
+
1039
+ class Optional(_Field):
1040
+ """A wrapper field to allow simple types (int, float, bool) to take None values."""
1041
+
1042
+ def __init__(self, child_type: _Field, **kwargs):
1043
+ if child_type.default_set:
1044
+ kwargs["default"] = child_type.default
1045
+ else:
1046
+ child_type.default_set = True
1047
+ super().__init__(**kwargs)
1048
+ self.default_set = True
1049
+ self.child_type = child_type
1050
+ self.child_type.optional = True
1051
+
1052
+ def check(self, value, *args, **kwargs):
1053
+ if value is None:
1054
+ return None
1055
+
1056
+ return self.child_type.check(value, *args, **kwargs)
1057
+
1058
+ def fields(self):
1059
+ return self.child_type.fields()
1060
+
1061
+ def apply_defaults(self, index, store):
1062
+ super().apply_defaults(index, store)
1063
+ self.child_type.apply_defaults(self.index, self.store)
1064
+
1065
+
1066
+ class Model:
1067
+ @classmethod
1068
+ def fields(cls, skip_mappings=False) -> dict[str, _Field]:
1069
+ """Describe the elements of the model.
1070
+
1071
+ For compound fields return the field object.
1072
+
1073
+ Args:
1074
+ skip_mappings (bool): Skip over mappings where the real subfield names are unknown.
1075
+ """
1076
+ if skip_mappings and hasattr(cls, "_odm_field_cache_skip"):
1077
+ return cls._odm_field_cache_skip
1078
+
1079
+ if not skip_mappings and hasattr(cls, "_odm_field_cache"):
1080
+ return cls._odm_field_cache
1081
+
1082
+ out = dict()
1083
+ for name, field_data in cls.__dict__.items():
1084
+ if isinstance(field_data, _Field):
1085
+ if skip_mappings and isinstance(field_data, Mapping):
1086
+ continue
1087
+ out[name.rstrip("_")] = field_data
1088
+
1089
+ if skip_mappings:
1090
+ cls._odm_field_cache_skip = out
1091
+ else:
1092
+ cls._odm_field_cache = out
1093
+ return out
1094
+
1095
+ @classmethod
1096
+ def add_namespace(cls, namespace: str, field: _Field, index=None, store=None, description=None):
1097
+ recursive_set_name(field, namespace)
1098
+
1099
+ if hasattr(cls, "_odm_field_cache_skip"):
1100
+ cls._odm_field_cache_skip[namespace.rstrip("_")] = field
1101
+
1102
+ if hasattr(cls, "_odm_field_cache"):
1103
+ cls._odm_field_cache[namespace.rstrip("_")] = field
1104
+
1105
+ setattr(cls, namespace, field)
1106
+
1107
+ field._Model__description = description
1108
+ for name, field_data in field.fields().items():
1109
+ if not FIELD_SANITIZER.match(name) or name in BANNED_FIELDS:
1110
+ raise HowlerValueError(f"Illegal variable name: {name}")
1111
+
1112
+ recursive_set_name(field_data, name)
1113
+ field_data.apply_defaults(index=index, store=store)
1114
+
1115
+ @classmethod
1116
+ def remove_namespace(cls, namespace: str):
1117
+ if hasattr(cls, "_odm_field_cache_skip"):
1118
+ del cls._odm_field_cache_skip[namespace.rstrip("_")]
1119
+
1120
+ if hasattr(cls, "_odm_field_cache"):
1121
+ del cls._odm_field_cache[namespace.rstrip("_")]
1122
+
1123
+ delattr(cls, namespace)
1124
+
1125
+ @staticmethod
1126
+ def _recurse_fields(name, field, show_compound, skip_mappings, multivalued=False):
1127
+ name = name.rstrip("_")
1128
+ out = dict()
1129
+ for sub_name, sub_field in field.fields().items():
1130
+ sub_field.multivalued = multivalued or isinstance(field, List)
1131
+
1132
+ if skip_mappings and isinstance(sub_field, Mapping):
1133
+ continue
1134
+
1135
+ elif isinstance(sub_field, (List, Optional, Compound)) and sub_name != "":
1136
+ out.update(
1137
+ Model._recurse_fields(
1138
+ f"{name}.{sub_name}",
1139
+ sub_field.child_type,
1140
+ show_compound,
1141
+ skip_mappings,
1142
+ multivalued=multivalued or isinstance(sub_field, List),
1143
+ )
1144
+ )
1145
+
1146
+ elif sub_name:
1147
+ out[f"{name}.{sub_name}"] = sub_field
1148
+
1149
+ else:
1150
+ out[name] = sub_field
1151
+
1152
+ if isinstance(field, Compound) and show_compound:
1153
+ out[name] = field
1154
+
1155
+ return out
1156
+
1157
+ @classmethod
1158
+ def flat_fields(cls, show_compound=False, skip_mappings=False) -> dict[str, _Field]:
1159
+ """Describe the elements of the model.
1160
+
1161
+ Recurse into compound fields, concatenating the names with '.' separators.
1162
+
1163
+ Args:
1164
+ show_compound (bool): Show compound as valid fields.
1165
+ skip_mappings (bool): Skip over mappings where the real subfield names are unknown.
1166
+ """
1167
+ out = dict()
1168
+ for name, field in cls.__dict__.items():
1169
+ if isinstance(field, _Field):
1170
+ if skip_mappings and isinstance(field, Mapping):
1171
+ continue
1172
+ out.update(
1173
+ Model._recurse_fields(
1174
+ name,
1175
+ field,
1176
+ show_compound,
1177
+ skip_mappings,
1178
+ multivalued=isinstance(field, List),
1179
+ )
1180
+ )
1181
+ return out
1182
+
1183
+ @classmethod
1184
+ def markdown(
1185
+ cls,
1186
+ toc_depth=1,
1187
+ include_autogen_note=True,
1188
+ defaults=None,
1189
+ url_prefix="/howler/odm/class/",
1190
+ ) -> Union[str, Dict]:
1191
+ markdown_content = (
1192
+ (
1193
+ '??? success "Auto-Generated Documentation"\n '
1194
+ "This set of documentation is automatically generated from source, and will help ensure any change to "
1195
+ "functionality will always be documented and available on release.\n\n"
1196
+ )
1197
+ if include_autogen_note
1198
+ else ""
1199
+ )
1200
+
1201
+ # Header
1202
+ markdown_content += f"{'#'*toc_depth} {cls.__name__}\n\n> {cls.__description}\n\n"
1203
+
1204
+ # Table
1205
+ table = "| Field | Type | Description | Required | Default |\n| :--- | :--- | :--- | :--- | :--- |\n"
1206
+
1207
+ # Determine the type of Field we're dealing with
1208
+ # if possible return the Model class if wrapped in Compound
1209
+ def get_type(field_class: _Field) -> Tuple[str, Model]:
1210
+ if field_class.__class__ == Optional:
1211
+ return get_type(field_class.child_type)
1212
+ elif field_class.__class__ == Compound:
1213
+ name = field_class.child_type.__name__
1214
+
1215
+ return (
1216
+ f"[{name}]({url_prefix}{name.lower()})",
1217
+ field_class.child_type,
1218
+ )
1219
+ elif field_class.__class__ in [Mapping, List]:
1220
+ child_type, child_class = (
1221
+ field_class.child_type.__class__.__name__,
1222
+ field_class.child_type.__class__,
1223
+ )
1224
+ if field_class.child_type.__class__ == Compound:
1225
+ child_type, child_class = get_type(field_class.child_type)
1226
+ return f"{field_class.__class__.__name__} [{child_type}]", child_class
1227
+ elif field_class.__class__.__name__ == "type":
1228
+ return field_class.__name__, None
1229
+
1230
+ return field_class.__class__.__name__, None
1231
+
1232
+ for field, info in cls.fields().items():
1233
+ field_type, field_class = get_type(info)
1234
+
1235
+ # Field description
1236
+ description = info.description
1237
+ if description is None and info.__class__ == Optional:
1238
+ description = info.child_type.description
1239
+ if info.child_type.reference:
1240
+ description += f'<br><a href="{info.child_type.reference}">Reference Link</a><br>'
1241
+ elif info.reference:
1242
+ description += f'<br><a href="{info.reference}">Reference Link</a><br>'
1243
+
1244
+ # If field type is Enum, then show the possible values that can be used in the description
1245
+ if field_type == "Enum":
1246
+ values = info.child_type.values if info.__class__ != Enum else info.values
1247
+ none_value = False
1248
+ if None in values:
1249
+ none_value = True
1250
+ values.remove(None)
1251
+
1252
+ values = [f'"{v}"' if v else str(v) for v in sorted(values)]
1253
+ values.append("None") if none_value else None
1254
+ description = f'{description}<br>Values:<br>`{", ".join(values)}`'
1255
+
1256
+ # Is this a required field?
1257
+ if info.__class__ != Optional and not info.optional:
1258
+ required = ":material-checkbox-marked-outline: Yes"
1259
+ else:
1260
+ required = ":material-minus-box-outline: Optional"
1261
+
1262
+ if info.deprecated:
1263
+ required += " :material-alert-box-outline: Deprecated - "
1264
+ required += info.deprecated_description
1265
+ elif info.__class__ == Optional and info.child_type.deprecated:
1266
+ required += " :material-alert-box-outline: Deprecated - "
1267
+ required += info.child_type.deprecated_description
1268
+
1269
+ # Determine the correct default values to display
1270
+ default = f"`{info.default}`"
1271
+ # If the field is a model, then provide a link to that documentation
1272
+ if field_class and issubclass(field_class, Model) and isinstance(info.default, dict):
1273
+ ref_link = field_type[field_type.index("(") : field_type.index(")") + 1]
1274
+ default = f"See [{field_class.__name__}]{ref_link} for more details."
1275
+
1276
+ # Handle how to display values from provided defaults (different from field defaults)
1277
+ elif isinstance(defaults, dict):
1278
+ val = defaults.get(field, {})
1279
+ default = f"`{val if not isinstance(val, dict) else info.default}`"
1280
+ elif isinstance(defaults, list):
1281
+ default = f"`{defaults}`"
1282
+ row = f"| {field} | {field_type} | {description} | {required} | {default} |\n"
1283
+ table += row
1284
+
1285
+ markdown_content += table + "\n\n"
1286
+
1287
+ return markdown_content
1288
+
1289
+ # Allow attribute assignment by default in the constructor until it is removed
1290
+ __frozen = False
1291
+ # Descriptions of the model should be class-accessible only for markdown()
1292
+ __description = None
1293
+
1294
+ def __init__(
1295
+ self,
1296
+ data: dict = None,
1297
+ mask: list = None,
1298
+ docid=None,
1299
+ ignore_extra_values=True,
1300
+ extra_fields={},
1301
+ context=[],
1302
+ ):
1303
+ if len(context) == 0:
1304
+ context = [self.__class__.__name__.lower()]
1305
+
1306
+ if data is None:
1307
+ data = {}
1308
+
1309
+ if not hasattr(data, "items"):
1310
+ raise HowlerTypeError(f"'{self.__class__.__name__}' object must be constructed with dict like")
1311
+ self._odm_py_obj = {}
1312
+ self._id = docid
1313
+ self.context = context
1314
+
1315
+ # Parse the field mask for sub models
1316
+ mask_map = {}
1317
+ if mask is not None:
1318
+ for entry in mask:
1319
+ if "." in entry:
1320
+ child, sub_key = entry.split(".", 1)
1321
+ try:
1322
+ mask_map[child].append(sub_key)
1323
+ except KeyError:
1324
+ mask_map[child] = [sub_key]
1325
+ else:
1326
+ mask_map[entry] = None
1327
+
1328
+ # Get the list of fields we expect this object to have
1329
+ fields = self.fields()
1330
+ self._odm_removed = {}
1331
+ if mask is not None:
1332
+ self._odm_removed = {k: v for k, v in fields.items() if k not in mask_map}
1333
+ fields = {k: v for k, v in fields.items() if k in mask_map}
1334
+
1335
+ # Trim out keys that actually belong to sub sections
1336
+ data = flat_to_nested(data)
1337
+
1338
+ # Check to make sure we can use all the data we are given
1339
+ self.unused_keys = set(data.keys()) - set(fields.keys()) - BANNED_FIELDS
1340
+ extra_keys = set(extra_fields.keys()) - set(data.keys())
1341
+ if self.unused_keys and not ignore_extra_values:
1342
+ raise HowlerValueError(
1343
+ f"[{'.'.join(context)}]: object was created with invalid parameters: " f"{', '.join(self.unused_keys)}"
1344
+ )
1345
+
1346
+ # Pass each value through it's respective validator, and store it
1347
+ for name, field_type in fields.items():
1348
+ params = {"ignore_extra_values": ignore_extra_values}
1349
+ if name in mask_map and mask_map[name]:
1350
+ params["mask"] = mask_map[name]
1351
+ if name in extra_fields and extra_fields[name]:
1352
+ params["extra_fields"] = extra_fields[name]
1353
+
1354
+ try:
1355
+ value = data[name]
1356
+ except KeyError:
1357
+ if field_type.default_set:
1358
+ value = copy.copy(field_type.default)
1359
+ elif not field_type.optional:
1360
+ raise HowlerValueError(f"[{'.'.join([*context, name])}]: value is missing from the object!")
1361
+ else:
1362
+ value = None
1363
+
1364
+ self._odm_py_obj[name.rstrip("_")] = field_type.check(value, context=[*context, name], **params)
1365
+
1366
+ value = None
1367
+
1368
+ for key in extra_keys:
1369
+ self._odm_py_obj[key.rstrip("_")] = Any().check(extra_fields[key], context=[*context, name])
1370
+
1371
+ # Since the layout of model objects should be fixed, don't allow any further
1372
+ # attribute assignment
1373
+ self.__frozen = True
1374
+
1375
+ def as_primitives(self, hidden_fields=False, strip_null=True) -> dict[str, typing.Any]:
1376
+ """Convert the object back into primitives that can be json serialized."""
1377
+ out = {}
1378
+
1379
+ fields = self.fields()
1380
+ for key, value in self._odm_py_obj.items():
1381
+ field_type = fields.get(key, Any)
1382
+ if value is not None or (value is None and field_type.default_set):
1383
+ if strip_null and value is None:
1384
+ continue
1385
+
1386
+ if isinstance(value, Model):
1387
+ out[key] = value.as_primitives(strip_null=strip_null)
1388
+ elif isinstance(value, datetime):
1389
+ out[key] = value.strftime(DATEFORMAT)
1390
+ elif isinstance(value, TypedMapping):
1391
+ out[key] = {
1392
+ k: (v.as_primitives(strip_null=strip_null) if isinstance(v, Model) else v)
1393
+ for k, v in value.items()
1394
+ }
1395
+ elif isinstance(value, TypedList):
1396
+ out[key] = [(v.as_primitives(strip_null=strip_null) if isinstance(v, Model) else v) for v in value]
1397
+ elif isinstance(value, ClassificationObject):
1398
+ out[key] = str(value)
1399
+ if hidden_fields:
1400
+ out.update(value.get_access_control_parts())
1401
+ else:
1402
+ out[key] = value
1403
+ return out
1404
+
1405
+ def json(self):
1406
+ return json.dumps(self.as_primitives())
1407
+
1408
+ def __eq__(self, other):
1409
+ if isinstance(other, dict):
1410
+ try:
1411
+ other = self.__class__(other)
1412
+ except (ValueError, KeyError):
1413
+ return False
1414
+
1415
+ elif not isinstance(other, self.__class__):
1416
+ return False
1417
+
1418
+ if len(self._odm_py_obj) != len(other._odm_py_obj):
1419
+ return False
1420
+
1421
+ for name, field in self.fields().items():
1422
+ if name in self._odm_removed:
1423
+ continue
1424
+ if field.__get__(self) != field.__get__(other):
1425
+ return False
1426
+
1427
+ return True
1428
+
1429
+ def __repr__(self):
1430
+ if self._id:
1431
+ return f"<{self.__class__.__name__} [{self._id}] {self.json()}>"
1432
+ return f"<{self.__class__.__name__} {self.json()}>"
1433
+
1434
+ def __getitem__(self, name):
1435
+ data = self._odm_py_obj
1436
+ for component in name.split("."):
1437
+ data = data[component.rstrip("_")]
1438
+
1439
+ return data
1440
+
1441
+ def get(self, name, default=None):
1442
+ try:
1443
+ return self[name]
1444
+ except KeyError:
1445
+ return default
1446
+
1447
+ def __setitem__(self, name, value):
1448
+ if name not in self._odm_field_cache:
1449
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: {name}")
1450
+ return self.__setattr__(name, value)
1451
+
1452
+ def __getattr__(self, name):
1453
+ # Any attribute that hasn't been explicitly declared is forbidden
1454
+ if name.rstrip("_") not in self.fields():
1455
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: {name}")
1456
+
1457
+ return super().__getattr__(name)
1458
+
1459
+ def __setattr__(self, name, value):
1460
+ # Any attribute that hasn't been explicitly declared is forbidden
1461
+ if self.__frozen and name.rstrip("_") not in self.fields():
1462
+ raise HowlerKeyError(f"[{'.'.join(self.context)}]: {name}")
1463
+ return object.__setattr__(self, name, value)
1464
+
1465
+ def __contains__(self, name):
1466
+ return name.rstrip("_") in self.fields()
1467
+
1468
+
1469
+ def recursive_set_name(field, name, to_parent=False):
1470
+ if not to_parent:
1471
+ field.name = name
1472
+ else:
1473
+ field.parent_name = name
1474
+
1475
+ if isinstance(field, Optional):
1476
+ recursive_set_name(field.child_type, name)
1477
+ if isinstance(field, List):
1478
+ recursive_set_name(field.child_type, name, to_parent=True)
1479
+
1480
+
1481
+ def model(index=None, store=None, description=None):
1482
+ """Decorator to create model objects."""
1483
+
1484
+ def _finish_model(cls):
1485
+ cls._Model__description = description
1486
+ for name, field_data in cls.fields().items():
1487
+ if not FIELD_SANITIZER.match(name) or name in BANNED_FIELDS:
1488
+ raise HowlerValueError(f"Illegal variable name: {name}")
1489
+
1490
+ recursive_set_name(field_data, name)
1491
+ field_data.apply_defaults(index=index, store=store)
1492
+ return cls
1493
+
1494
+ return _finish_model
1495
+
1496
+
1497
+ def _construct_field(field, value):
1498
+ if isinstance(field, List):
1499
+ clean, dropped = [], []
1500
+ for item in value:
1501
+ _c, _d = _construct_field(field.child_type, item)
1502
+ if _c is not None:
1503
+ clean.append(_c)
1504
+ if _d is not None and _d != "":
1505
+ dropped.append(_d)
1506
+ return clean or None, dropped or None
1507
+
1508
+ elif isinstance(field, Compound):
1509
+ _c, _d = construct_safe(field.child_type, value)
1510
+ if len(_d) == 0:
1511
+ _d = None
1512
+ return _c, _d
1513
+ elif isinstance(field, Optional):
1514
+ return _construct_field(field.child_type, value)
1515
+ else:
1516
+ try:
1517
+ return field.check(value), None
1518
+ except (ValueError, TypeError):
1519
+ return None, value
1520
+
1521
+
1522
+ def construct_safe(mod, data) -> tuple[_Any, dict]:
1523
+ if not isinstance(data, dict):
1524
+ return None, data
1525
+ fields = mod.fields()
1526
+ clean = {}
1527
+ dropped = {}
1528
+ for key, value in data.items():
1529
+ if key not in fields:
1530
+ dropped[key] = value
1531
+ continue
1532
+
1533
+ _c, _d = _construct_field(fields[key], value)
1534
+
1535
+ if _c is not None:
1536
+ clean[key] = _c
1537
+ if _d is not None:
1538
+ dropped[key] = _d
1539
+
1540
+ try:
1541
+ return mod(clean), dropped
1542
+ except ValueError:
1543
+ return None, recursive_update(dropped, clean)