mal-toolbox 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/METADATA +2 -2
  2. mal_toolbox-2.1.0.dist-info/RECORD +51 -0
  3. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/WHEEL +1 -1
  4. maltoolbox/__init__.py +2 -2
  5. maltoolbox/attackgraph/__init__.py +2 -2
  6. maltoolbox/attackgraph/attackgraph.py +121 -549
  7. maltoolbox/attackgraph/factories.py +68 -0
  8. maltoolbox/attackgraph/file_utils.py +0 -0
  9. maltoolbox/attackgraph/generate.py +338 -0
  10. maltoolbox/attackgraph/node_getters.py +36 -0
  11. maltoolbox/attackgraph/ttcs.py +28 -0
  12. maltoolbox/language/__init__.py +2 -2
  13. maltoolbox/language/compiler/mal_compiler.py +4 -3
  14. maltoolbox/language/detector.py +43 -0
  15. maltoolbox/language/expression_chain.py +218 -0
  16. maltoolbox/language/language_graph_asset.py +180 -0
  17. maltoolbox/language/language_graph_assoc.py +147 -0
  18. maltoolbox/language/language_graph_attack_step.py +129 -0
  19. maltoolbox/language/language_graph_builder.py +282 -0
  20. maltoolbox/language/language_graph_loaders.py +7 -0
  21. maltoolbox/language/language_graph_lookup.py +140 -0
  22. maltoolbox/language/language_graph_serialization.py +5 -0
  23. maltoolbox/language/languagegraph.py +244 -1537
  24. maltoolbox/language/step_expression_processor.py +491 -0
  25. mal_toolbox-2.0.0.dist-info/RECORD +0 -36
  26. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/entry_points.txt +0 -0
  27. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/licenses/AUTHORS +0 -0
  28. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/licenses/LICENSE +0 -0
  29. {mal_toolbox-2.0.0.dist-info → mal_toolbox-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ import zipfile
4
+ from maltoolbox.exceptions import AttackGraphStepExpressionError
5
+ from maltoolbox.language.languagegraph import LanguageGraph
6
+ from maltoolbox.model import Model
7
+
8
+ from maltoolbox.attackgraph.attackgraph import AttackGraph
9
+
10
+ from .. import log_configs
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def create_attack_graph(
15
+ lang: str | LanguageGraph,
16
+ model: str | Model,
17
+ ) -> AttackGraph:
18
+ """Create and return an attack graph
19
+
20
+ Args:
21
+ ----
22
+ lang - path to language file (.mar or .mal) or a LanguageGraph object
23
+ model - path to model file (yaml or json) or a Model object
24
+
25
+ """
26
+ # Load language
27
+ if isinstance(lang, LanguageGraph):
28
+ lang_graph = lang
29
+ elif isinstance(lang, str):
30
+ # Load from path
31
+ try:
32
+ lang_graph = LanguageGraph.from_mar_archive(lang)
33
+ except zipfile.BadZipFile:
34
+ lang_graph = LanguageGraph.from_mal_spec(lang)
35
+ else:
36
+ raise TypeError("`lang` must be either string or LanguageGraph")
37
+
38
+ if 'langspec_file' in log_configs:
39
+ lang_graph.save_language_specification_to_json(
40
+ log_configs['langspec_file']
41
+ )
42
+
43
+ if 'langgraph_file' in log_configs:
44
+ lang_graph.save_to_file(log_configs['langgraph_file'])
45
+
46
+ # Load model
47
+ if isinstance(model, Model):
48
+ instance_model = model
49
+ elif isinstance(model, str):
50
+ # Load from path
51
+ instance_model = Model.load_from_file(model, lang_graph)
52
+ else:
53
+ raise TypeError("`model` must be either string or Model")
54
+
55
+ if log_configs['model_file']:
56
+ instance_model.save_to_file(log_configs['model_file'])
57
+
58
+ try:
59
+ attack_graph = AttackGraph(lang_graph, instance_model)
60
+
61
+ except AttackGraphStepExpressionError as e:
62
+ logger.error(
63
+ 'Attack graph generation failed when attempting '
64
+ 'to resolve attack step expression!'
65
+ )
66
+ raise e
67
+
68
+ return attack_graph
File without changes
@@ -0,0 +1,338 @@
1
+ """Graph generation functions"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from typing import TYPE_CHECKING, Optional
8
+
9
+ from maltoolbox.attackgraph.node_getters import get_node_by_full_name
10
+ from maltoolbox.attackgraph.ttcs import get_ttc_dist
11
+
12
+ from ..exceptions import (
13
+ AttackGraphException,
14
+ AttackGraphStepExpressionError,
15
+ LanguageGraphException,
16
+ )
17
+ from ..language import ExpressionsChain, LanguageGraphAttackStep
18
+ from ..model import Model
19
+ from .node import AttackGraphNode
20
+
21
+ if TYPE_CHECKING:
22
+ from typing import Any
23
+ from ..model import ModelAsset
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ def link_node_children(
28
+ model: Model,
29
+ ag_node: AttackGraphNode,
30
+ full_name_to_node: dict[str, AttackGraphNode]
31
+ ) -> None:
32
+ """Link one node to its children."""
33
+ if not ag_node.model_asset:
34
+ raise AttackGraphException('Attack graph node is missing asset link')
35
+
36
+ lg_asset = model.lang_graph.assets[ag_node.model_asset.type]
37
+ lg_attack_step: LanguageGraphAttackStep | None = (
38
+ lg_asset.attack_steps[ag_node.name]
39
+ )
40
+ while lg_attack_step:
41
+ for child_type, expr_chains in lg_attack_step.children.items():
42
+ for expr_chain in expr_chains:
43
+ link_from_expr_chain(
44
+ model, ag_node, child_type, expr_chain, full_name_to_node
45
+ )
46
+ if lg_attack_step.overrides:
47
+ break
48
+ lg_attack_step = lg_attack_step.inherits
49
+
50
+
51
+ def link_from_expr_chain(
52
+ model: Model,
53
+ ag_node: AttackGraphNode,
54
+ child_type: LanguageGraphAttackStep,
55
+ expr_chain: ExpressionsChain | None,
56
+ full_name_to_node: dict[str, AttackGraphNode]
57
+ ) -> None:
58
+ """Link a node to targets from a specific expression chain."""
59
+ if not ag_node.model_asset:
60
+ raise AttackGraphException(
61
+ "Need model asset connection to generate graph"
62
+ )
63
+
64
+ target_assets = follow_expr_chain(model, {ag_node.model_asset}, expr_chain)
65
+ for target_asset in target_assets:
66
+ if not target_asset:
67
+ continue
68
+ target_node = get_node_by_full_name(
69
+ full_name_to_node, f"{target_asset.name}:{child_type.name}"
70
+ )
71
+ if not target_node:
72
+ raise AttackGraphStepExpressionError(
73
+ f'Failed to find target node "{target_asset.name}:{child_type.name}" '
74
+ f'for "{ag_node.full_name}"({ag_node.id})'
75
+ )
76
+ logger.debug(
77
+ 'Linking attack step "%s"(%d) to attack step "%s"(%d)',
78
+ ag_node.full_name, ag_node.id,
79
+ target_node.full_name, target_node.id
80
+ )
81
+ ag_node.children.add(target_node)
82
+ target_node.parents.add(ag_node)
83
+
84
+
85
+ def follow_field_expr_chain(
86
+ target_assets: set[ModelAsset], expr_chain: ExpressionsChain
87
+ ):
88
+ # Change the target assets from the current ones to the
89
+ # associated assets given the specified field name.
90
+ if not expr_chain.fieldname:
91
+ raise LanguageGraphException(
92
+ '"field" step expression chain is missing fieldname.'
93
+ )
94
+ new_target_assets: set[ModelAsset] = set()
95
+ new_target_assets.update(
96
+ *(
97
+ asset.associated_assets.get(expr_chain.fieldname, set())
98
+ for asset in target_assets
99
+ )
100
+ )
101
+ return new_target_assets
102
+
103
+
104
+ def follow_transitive_expr_chain(
105
+ model: Model,
106
+ target_assets: set[ModelAsset],
107
+ expr_chain: ExpressionsChain
108
+ ):
109
+ if not expr_chain.sub_link:
110
+ raise LanguageGraphException(
111
+ '"transitive" step expression chain is missing sub link.'
112
+ )
113
+
114
+ new_assets = target_assets
115
+ while new_assets := follow_expr_chain(
116
+ model, new_assets, expr_chain.sub_link
117
+ ):
118
+ new_assets = new_assets.difference(target_assets)
119
+ if not new_assets:
120
+ break
121
+ target_assets.update(new_assets)
122
+ return target_assets
123
+
124
+
125
+ def follow_subtype_expr_chain(
126
+ model: Model,
127
+ target_assets: set[ModelAsset],
128
+ expr_chain: ExpressionsChain
129
+ ):
130
+ if not expr_chain.sub_link:
131
+ raise LanguageGraphException(
132
+ '"subType" step expression chain is missing sub link.'
133
+ )
134
+ new_target_assets = set()
135
+ new_target_assets.update(
136
+ follow_expr_chain(
137
+ model, target_assets, expr_chain.sub_link
138
+ )
139
+ )
140
+ selected_new_target_assets = set()
141
+ for asset in new_target_assets:
142
+ lang_graph_asset = model.lang_graph.assets[asset.type]
143
+ if not lang_graph_asset:
144
+ raise LookupError(
145
+ f'Failed to find asset "{asset.type}" in the '
146
+ 'language graph.'
147
+ )
148
+ lang_graph_subtype_asset = expr_chain.subtype
149
+ if not lang_graph_subtype_asset:
150
+ raise LookupError(
151
+ 'Failed to find asset "{expr_chain.subtype}" in '
152
+ 'the language graph.'
153
+ )
154
+ if lang_graph_asset.is_subasset_of(lang_graph_subtype_asset):
155
+ selected_new_target_assets.add(asset)
156
+
157
+ return selected_new_target_assets
158
+
159
+ def follow_union_intersection_difference_expr_chain(
160
+ model: Model,
161
+ target_assets: set[ModelAsset],
162
+ expr_chain: ExpressionsChain
163
+ ) -> set[Any]:
164
+ # The set operators are used to combine the left hand and
165
+ # right hand targets accordingly.
166
+ if not expr_chain.left_link:
167
+ raise LanguageGraphException(
168
+ '"%s" step expression chain is missing the left link.',
169
+ expr_chain.type
170
+ )
171
+ if not expr_chain.right_link:
172
+ raise LanguageGraphException(
173
+ '"%s" step expression chain is missing the right link.',
174
+ expr_chain.type
175
+ )
176
+ lh_targets = follow_expr_chain(
177
+ model, target_assets, expr_chain.left_link
178
+ )
179
+ rh_targets = follow_expr_chain(
180
+ model, target_assets, expr_chain.right_link
181
+ )
182
+
183
+ if expr_chain.type == 'union':
184
+ # Once the assets become hashable set operations should be
185
+ # used instead.
186
+ return lh_targets.union(rh_targets)
187
+
188
+ if expr_chain.type == 'intersection':
189
+ return lh_targets.intersection(rh_targets)
190
+
191
+ if expr_chain.type == 'difference':
192
+ return lh_targets.difference(rh_targets)
193
+
194
+ raise ValueError("Expr chain must be of type union, intersectin or difference")
195
+
196
+
197
+ def follow_collect_expr_chain(
198
+ model: Model,
199
+ target_assets: set[ModelAsset],
200
+ expr_chain: ExpressionsChain
201
+ ) -> set[Any]:
202
+ if not expr_chain.left_link:
203
+ raise LanguageGraphException(
204
+ '"collect" step expression chain missing the left link.'
205
+ )
206
+ if not expr_chain.right_link:
207
+ raise LanguageGraphException(
208
+ '"collect" step expression chain missing the right link.'
209
+ )
210
+ lh_targets = follow_expr_chain(
211
+ model,
212
+ target_assets,
213
+ expr_chain.left_link
214
+ )
215
+ rh_targets = set()
216
+ for lh_target in lh_targets:
217
+ rh_targets |= follow_expr_chain(
218
+ model,
219
+ {lh_target},
220
+ expr_chain.right_link
221
+ )
222
+ return rh_targets
223
+
224
+
225
+ def follow_expr_chain(
226
+ model: Model,
227
+ target_assets: set[ModelAsset],
228
+ expr_chain: Optional[ExpressionsChain]
229
+ ):
230
+ if expr_chain is None:
231
+ # There is no expressions chain link left to follow return the
232
+ # current target assets
233
+ return set(target_assets)
234
+
235
+ if logger.isEnabledFor(logging.DEBUG):
236
+ # Avoid running json.dumps when not in debug
237
+ logger.debug(
238
+ 'Following Expressions Chain:\n%s',
239
+ json.dumps(expr_chain.to_dict(), indent=2)
240
+ )
241
+
242
+ match (expr_chain.type):
243
+ case 'union' | 'intersection' | 'difference':
244
+ return follow_union_intersection_difference_expr_chain(
245
+ model, target_assets, expr_chain
246
+ )
247
+
248
+ case 'field':
249
+ return follow_field_expr_chain(target_assets, expr_chain)
250
+
251
+ case 'transitive':
252
+ return follow_transitive_expr_chain(model, target_assets, expr_chain)
253
+
254
+ case 'subType':
255
+ return follow_subtype_expr_chain(model, target_assets, expr_chain)
256
+
257
+ case 'collect':
258
+ return follow_collect_expr_chain(model, target_assets, expr_chain)
259
+
260
+ case _:
261
+ msg = 'Unknown attack expressions chain type: %s'
262
+ logger.error(
263
+ msg,
264
+ expr_chain.type
265
+ )
266
+ raise AttackGraphStepExpressionError(
267
+ msg % expr_chain.type
268
+ )
269
+
270
+
271
+ def link_nodes_by_language(
272
+ model: Model, full_name_to_node: dict[str, AttackGraphNode]
273
+ ):
274
+ for ag_node in full_name_to_node.values():
275
+ link_node_children(model, ag_node, full_name_to_node)
276
+
277
+
278
+ def create_nodes_from_model(model: Model):
279
+ id_to_node = {}
280
+ full_name_to_node = {}
281
+ attack_steps = []
282
+ defense_steps = []
283
+
284
+ node_id = 0
285
+ for asset in model.assets.values():
286
+ asset.attack_step_nodes = [] # TODO: deprecate this
287
+ for lg_attack_step in asset.lg_asset.attack_steps.values():
288
+ node = AttackGraphNode(
289
+ node_id=node_id,
290
+ lg_attack_step=lg_attack_step,
291
+ model_asset=asset,
292
+ ttc_dist=get_ttc_dist(asset, lg_attack_step),
293
+ existence_status=(
294
+ get_existance_status(model, asset, lg_attack_step)
295
+ ),
296
+ )
297
+ asset.attack_step_nodes.append(node) # TODO: deprecate this
298
+ id_to_node[node.id] = node
299
+ full_name_to_node[node.full_name] = node
300
+
301
+ if node.type in ('or', 'and'):
302
+ attack_steps.append(node)
303
+ elif node.type == 'defense':
304
+ defense_steps.append(node)
305
+
306
+ node_id += 1
307
+
308
+ return id_to_node, attack_steps, defense_steps, full_name_to_node
309
+
310
+
311
+ def generate_graph(model: Model):
312
+ id_to_node, attack_steps, defense_steps, full_name_to_node = create_nodes_from_model(model)
313
+ link_nodes_by_language(model, full_name_to_node)
314
+ return id_to_node, attack_steps, defense_steps, full_name_to_node
315
+
316
+
317
+ def get_existance_status(
318
+ model: Model,
319
+ asset: ModelAsset,
320
+ lg_attack_step: LanguageGraphAttackStep
321
+ ):
322
+
323
+ if lg_attack_step.type not in ('exist', 'notExist'):
324
+ # No existence status for other type of steps
325
+ return None
326
+
327
+ existence_status = False
328
+ for requirement in lg_attack_step.requires:
329
+ target_assets = follow_expr_chain(
330
+ model, set([asset]), requirement
331
+ )
332
+ # If the step expression resolution yielded
333
+ # the target assets then the required assets
334
+ # exist in the model.
335
+ if target_assets:
336
+ existence_status = True
337
+ break
338
+ return existence_status
@@ -0,0 +1,36 @@
1
+ """MAL-Toolbox Attack Graph Module
2
+ """
3
+ from __future__ import annotations
4
+ import logging
5
+ from ..str_utils import levenshtein_distance
6
+ from .node import AttackGraphNode
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def get_similar_full_names(
12
+ full_name_to_node: dict[str, AttackGraphNode], q: str
13
+ ):
14
+ shortest_dist = 100
15
+ similar_names = []
16
+
17
+ for full_name in full_name_to_node:
18
+ dist = levenshtein_distance(q, full_name)
19
+ if dist == shortest_dist:
20
+ similar_names.append(full_name)
21
+ elif dist < shortest_dist:
22
+ similar_names = [full_name]
23
+ shortest_dist = dist
24
+
25
+ return similar_names
26
+
27
+
28
+ def get_node_by_full_name(full_name_to_node: dict[str, AttackGraphNode], full_name: str):
29
+ logger.debug('Looking up node with full name "%s"', full_name)
30
+ if full_name not in full_name_to_node:
31
+ similar_names = get_similar_full_names(full_name_to_node, full_name)
32
+ raise LookupError(
33
+ f'Could not find node with name "{full_name}". '
34
+ f'Did you mean: {", ".join(similar_names)}?'
35
+ )
36
+ return full_name_to_node[full_name]
@@ -0,0 +1,28 @@
1
+ import copy
2
+ import logging
3
+ from maltoolbox.language.language_graph_attack_step import LanguageGraphAttackStep
4
+ from maltoolbox.model import ModelAsset
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def get_ttc_dist(
9
+ asset: ModelAsset, attack_step: LanguageGraphAttackStep
10
+ ):
11
+ """Get step ttc distribution based on language
12
+ and possibly overriding defense status
13
+ """
14
+ ttc_dist = copy.deepcopy(attack_step.ttc)
15
+ if attack_step.type == 'defense':
16
+ if attack_step.name in asset.defenses:
17
+ # If defense status was set in model, set ttc accordingly
18
+ defense_value = float(asset.defenses[attack_step.name])
19
+ ttc_dist = {
20
+ 'arguments': [defense_value],
21
+ 'name': 'Bernoulli',
22
+ 'type': 'function'
23
+ }
24
+ logger.debug(
25
+ 'Setting defense \"%s\" to "%s".',
26
+ asset.name + ":" + attack_step.name, defense_value
27
+ )
28
+ return ttc_dist
@@ -1,8 +1,8 @@
1
1
  """Contains tools to process MAL languages"""
2
2
 
3
+ from .detector import Context, Detector
4
+
3
5
  from .languagegraph import (
4
- Context,
5
- Detector,
6
6
  ExpressionsChain,
7
7
  LanguageGraph,
8
8
  LanguageGraphAsset,
@@ -440,7 +440,8 @@ class MalCompiler(ParseTreeVisitor):
440
440
  detectors: dict[str, Any] = {}
441
441
  while assert_node(cursor.node).type == 'detector':
442
442
  detector = self.visit(cursor)
443
- detector[detector['name']] = detector
443
+ detector_name = str(detector['name'])
444
+ detectors[detector_name] = detector
444
445
  if not go_to_sibling(cursor): # in case there is nothing after the meta
445
446
  break
446
447
 
@@ -536,8 +537,8 @@ class MalCompiler(ParseTreeVisitor):
536
537
  ###############
537
538
  # (type) (id) #
538
539
  ###############
539
- asset = node_text(cursor, 'asset')
540
- label = node_text(cursor, 'label')
540
+ asset = node_text(cursor, 'asset').decode('utf-8')
541
+ label = node_text(cursor, 'label').decode('utf-8')
541
542
 
542
543
  return (label, asset)
543
544
 
@@ -0,0 +1,43 @@
1
+ """Detector functionality
2
+ - A detector represent a logging rule on an attack step
3
+ - It includes a context and a name
4
+ """
5
+
6
+
7
+ from __future__ import annotations
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass(frozen=True, eq=True)
12
+ class Detector:
13
+ name: str | None
14
+ context: Context
15
+ type: str | None
16
+ tprate: dict | None
17
+
18
+ def to_dict(self) -> dict:
19
+ return {
20
+ "context": self.context.to_dict(),
21
+ "name": self.name,
22
+ "type": self.type,
23
+ "tprate": self.tprate,
24
+ }
25
+
26
+
27
+ class Context(dict):
28
+ """Context is part of detectors to provide meta data about attackers"""
29
+
30
+ def __init__(self, context) -> None:
31
+ super().__init__(context)
32
+ self._context_dict = context
33
+ for label, asset in context.items():
34
+ setattr(self, label, asset)
35
+
36
+ def to_dict(self) -> dict:
37
+ return {label: asset.name for label, asset in self.items()}
38
+
39
+ def __str__(self) -> str:
40
+ return str({label: asset.name for label, asset in self._context_dict.items()})
41
+
42
+ def __repr__(self) -> str:
43
+ return f"Context({self!s}))"