mal-toolbox 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,920 @@
1
+ import logging
2
+ import re
3
+ from tree_sitter import Node
4
+ from .distributions import Distributions, DistributionsException
5
+ from typing import Any, Tuple, List
6
+
7
+
8
+ class malAnalyzerException(Exception):
9
+ def __init__(self, error_message):
10
+ self._error_message = error_message
11
+ super().__init__(self._error_message)
12
+
13
+
14
+ class malAnalyzer:
15
+ """
16
+ A class to preform syntax-checks for MAL.
17
+ """
18
+
19
+ def __init__(self, *args, **kwargs) -> None:
20
+ self._error: bool = False
21
+ self._warn: bool = False
22
+ self._preform_post_analysis = 0
23
+
24
+ self._defines: dict = {}
25
+ self._assets: dict = {}
26
+ self._category: dict = {}
27
+ self._metas: dict = {}
28
+ self._steps: dict = {}
29
+ self._vars: dict = {}
30
+ self._associations: dict = {}
31
+
32
+ self._all_associations: list[dict[str, Any]] = []
33
+ self._current_vars: list[str] = []
34
+ self._include_stack: list[str] = []
35
+
36
+ self._error_msg: str = ''
37
+
38
+ super().__init__(*args, **kwargs)
39
+
40
+ def _raise_analyzer_exception(self, error_msg: str):
41
+ logging.error(error_msg)
42
+ raise malAnalyzerException(error_msg)
43
+
44
+ def has_error(self) -> bool:
45
+ return self._error
46
+
47
+ def has_warning(self) -> bool:
48
+ return self._warn
49
+
50
+ def _post_analysis(self) -> None:
51
+ """
52
+ Perform a post-analysis to confirm that the
53
+ mandatory fields and relations are met.
54
+ """
55
+ self._analyse_defines()
56
+ self._analyse_extends()
57
+ self._analyse_abstract()
58
+ self._analyse_parents()
59
+ self._analyse_association()
60
+ self._analyse_steps()
61
+ self._analyse_fields()
62
+ self._analyse_variables()
63
+ self._analyse_reaches()
64
+
65
+ def _analyse_defines(self) -> None:
66
+ """
67
+ Check for mandatory defines: ID & Version
68
+ """
69
+ if 'id' in self._defines.keys():
70
+ define_value: str = self._defines['id']['value']
71
+ if len(define_value) == 0:
72
+ error_msg = "Define 'id' cannot be empty"
73
+ self._raise_analyzer_exception(error_msg)
74
+ else:
75
+ error_msg = 'Missing required define \'#id: ""\''
76
+ self._raise_analyzer_exception(error_msg)
77
+
78
+ if 'version' in self._defines.keys():
79
+ version: str = self._defines['version']['value']
80
+ if not re.match(r'\d+\.\d+\.\d+', version):
81
+ error_msg = "Define 'version' must be valid semantic versioning without pre-release identifier and build metadata"
82
+ self._raise_analyzer_exception(error_msg)
83
+ else:
84
+ error_msg = 'Missing required define \'#version: ""\''
85
+ self._raise_analyzer_exception(error_msg)
86
+
87
+ def _analyse_extends(self) -> None:
88
+ """
89
+ For all assets which extend another, verify if the extended asset exists
90
+ """
91
+ extend_asset_name: str = ''
92
+ for asset in self._assets:
93
+ asset_node: Node = self._assets[asset]['node']
94
+ if asset_node.child_by_field_name('extends'):
95
+ # Next sibling is the identifier itself
96
+ extend_asset = asset_node.child_by_field_name('extends')
97
+ if extend_asset and extend_asset.next_sibling:
98
+ if extend_asset.next_sibling.text:
99
+ extend_asset_name = extend_asset.next_sibling.text.decode()
100
+
101
+ if extend_asset_name not in self._assets:
102
+ """
103
+ Do we need to check if the extended asset is
104
+ in the same category? If so we can load the asset
105
+ and check it's parent
106
+ """
107
+ error_msg = f"Asset '{extend_asset_name}' not defined"
108
+ self._raise_analyzer_exception(error_msg)
109
+
110
+ def _analyse_abstract(self) -> None:
111
+ """
112
+ For every abstract asset, verify if it is extended by another asset
113
+ """
114
+ for parent in self._assets:
115
+ parent_node: Node = self._assets[parent]['node']
116
+ parent_node_id_child = parent_node.child_by_field_name('id')
117
+ assert parent_node_id_child, 'No child id'
118
+ assert parent_node_id_child.text, 'No child id text'
119
+ parent_node_name: str = parent_node_id_child.text.decode()
120
+ if (
121
+ parent_node.children[0].text
122
+ and parent_node.children[0].text.decode() == 'abstract'
123
+ ):
124
+ found: bool = False
125
+ for extendee in self._assets:
126
+ extendee_node: Node = self._assets[extendee]['node']
127
+ extendee_node_extender = extendee_node.child_by_field_name(
128
+ 'extends'
129
+ )
130
+ if (
131
+ extendee_node_extender
132
+ and extendee_node_extender.next_sibling
133
+ and extendee_node_extender.next_sibling.text
134
+ and extendee_node_extender.next_sibling.text.decode()
135
+ == parent_node_name
136
+ ):
137
+ found = True
138
+ break
139
+ if not found:
140
+ self._warn = True
141
+ logging.warning(
142
+ f"Asset '{parent_node_name}' is abstract but never extended to"
143
+ )
144
+
145
+ def _analyse_parents(self) -> None:
146
+ """
147
+ Verify if there are circular extend relations
148
+ """
149
+ for asset in self._assets:
150
+ parents: list[str] = []
151
+ parent_node: Node | None = self._assets[asset]['node']
152
+ while parent_node and parent_node.type == 'asset_declaration':
153
+ parent_name_node = parent_node.child_by_field_name('id')
154
+ assert parent_name_node and parent_name_node.text, (
155
+ 'Asset need name node with text'
156
+ )
157
+ parent_name: str = parent_name_node.text.decode()
158
+ if parent_name in parents:
159
+ error_msg: str = ' -> '.join(parents)
160
+ error_msg += f' -> {parent_name}'
161
+ error_msg = f"Asset '{parent_name}' extends in loop '{error_msg}'"
162
+ self._raise_analyzer_exception(error_msg)
163
+ parents.append(parent_name)
164
+ parent_node = self._get_assets_extendee(parent_node)
165
+
166
+ def _get_assets_extendee(self, node: Node) -> Node | None:
167
+ """
168
+ Verifies if the current asset extends another and, if so, return the parent's context
169
+ """
170
+ if extender_node := node.child_by_field_name('extends'):
171
+ extender_name_node = extender_node.next_sibling
172
+ assert extender_name_node and extender_name_node.text, (
173
+ 'Asset need name node with text'
174
+ )
175
+ extender_node_name = extender_name_node.text.decode()
176
+ return self._assets[extender_node_name]['node']
177
+ return None
178
+
179
+ def _analyse_reaches(self) -> None:
180
+ """
181
+ For every attackStep in every asset, verify if the prerequisites point to assets and that the reaches point to
182
+ attack steps
183
+ """
184
+ for asset in self._assets.keys():
185
+ attack_steps = self._assets[asset]['obj']['attackSteps']
186
+ for attack_step in attack_steps:
187
+ if attack_step['type'] in ['exist', 'notExist']:
188
+ if attack_step['ttc']:
189
+ error_msg = f"Attack step of type '{attack_step['type']}' must not have TTC"
190
+ self._raise_analyzer_exception(error_msg)
191
+ if attack_step['requires']:
192
+ # Verify if every requires expression returns an asset
193
+ for expr in attack_step['requires']['stepExpressions']:
194
+ if not self._check_to_asset(asset, expr):
195
+ error_msg = (
196
+ f'Line {self._steps[asset][attack_step["name"]]["node"].start_point.row}: '
197
+ + "All expressions in requires ('<-') must point to a valid asset"
198
+ )
199
+ self._raise_analyzer_exception(
200
+ self._error_msg + error_msg
201
+ )
202
+ else:
203
+ error_msg = f"Attack step of type '{attack_step['type']}' must have require '<-'"
204
+ self._raise_analyzer_exception(error_msg)
205
+ elif attack_step['requires']:
206
+ error_msg = "Require '<-' may only be defined for attack step type exist 'E' or not-exist '!E'"
207
+ self._raise_analyzer_exception(error_msg)
208
+ if attack_step['reaches']:
209
+ # Verify if every reaches expresion returns an attack step
210
+ for expr in attack_step['reaches']['stepExpressions']:
211
+ if not self._check_to_step(asset, expr):
212
+ error_msg = (
213
+ f'Line {self._steps[asset][attack_step["name"]]["node"].start_point.row}: '
214
+ + "All expressions in reaches ('->') must point to a valid attack step"
215
+ )
216
+ self._raise_analyzer_exception(self._error_msg + error_msg)
217
+
218
+ def _analyse_association(self) -> None:
219
+ """
220
+ For every association, verify if the assets exist
221
+ """
222
+ for association in self._all_associations:
223
+ leftAsset = association['association']['leftAsset']
224
+ rightAsset = association['association']['rightAsset']
225
+
226
+ if leftAsset not in self._assets.keys():
227
+ error_msg = f"Left asset '{leftAsset}' is not defined"
228
+ self._raise_analyzer_exception(error_msg)
229
+ if rightAsset not in self._assets.keys():
230
+ error_msg = f"Right asset '{leftAsset}' is not defined"
231
+ self._raise_analyzer_exception(error_msg)
232
+
233
+ def _analyse_fields(self) -> None:
234
+ """
235
+ Update a variable's fields (associations) to include its parents associations.
236
+ Also checks if an association has been defined more than once in a hierarchy
237
+ """
238
+ for asset in self._assets.keys():
239
+ parents = self._get_parents(self._assets[asset]['node'])
240
+ for parent in parents:
241
+ for association in self._all_associations:
242
+ leftAsset = association['association']['leftAsset']
243
+ rightAsset = association['association']['rightAsset']
244
+ if leftAsset == parent:
245
+ rightField = association['association']['rightField']
246
+ self._add_field(parent, asset, rightField, association)
247
+ if rightAsset == parent:
248
+ leftField = association['association']['leftField']
249
+ self._add_field(parent, asset, leftField, association)
250
+
251
+ def _add_field(self, parent: str, asset: str, field: str, association: dict):
252
+ # Check that this asset does not have an assoication with the same name
253
+ if asset not in self._associations or field not in self._associations[asset]:
254
+ # Check if there isn't a step with the same name
255
+ step_node = self._has_step(asset, field)
256
+ if not step_node:
257
+ if asset not in self._associations.keys():
258
+ self._associations[asset] = {
259
+ field: {k: association[k] for k in ['association', 'node']}
260
+ }
261
+ else:
262
+ self._associations[asset][field] = {
263
+ k: association[k] for k in ['association', 'node']
264
+ }
265
+ # Otherwise, this will be an error
266
+ else:
267
+ error_msg = f'Field {field} previously defined as an attack step at {step_node.start_point.row}'
268
+ self._raise_analyzer_exception(error_msg)
269
+ # Association field was already defined for this asset
270
+ else:
271
+ error_msg = f'Field {parent}.{field} previously defined at {self._associations[asset][field]["node"].start_point.row}'
272
+ self._raise_analyzer_exception(error_msg)
273
+
274
+ def _has_step(self, asset, field):
275
+ if asset in self._steps.keys() and field in self._steps[asset]:
276
+ return self._steps[asset][field]['node']
277
+ return None
278
+
279
+ def _variable_to_asset(self, asset: str, var: str):
280
+ """
281
+ Checks if there is no cycle in the variables and verifies that it points to
282
+ an existing asset
283
+ """
284
+ if var not in self._current_vars:
285
+ self._current_vars.append(var)
286
+ res = self._check_to_asset(
287
+ asset, self._vars[asset][var]['var']['stepExpression']
288
+ )
289
+ self._current_vars.pop()
290
+ return res
291
+
292
+ cycle = '->'.join(self._current_vars) + '->' + var
293
+ error_msg = f"Variable '{var}' contains cycle {cycle}"
294
+ self._raise_analyzer_exception(error_msg)
295
+
296
+ def _analyse_variables(self):
297
+ """
298
+ This function will verify if an asset which extends another does not redefine a variable.
299
+ It also updates the list of variables for an asset to includes its parent's variables
300
+
301
+ Once that is done, we need to guarantee that the variable points to an asset and that
302
+ there are no loops in the variables, i.e. a variable does not somehow reference itself
303
+ """
304
+ for asset in self._assets.keys():
305
+ parents = self._get_parents(self._assets[asset]['node'])
306
+ parents.pop() # The last element is the asset itself, no need to check again if variable is defined twice
307
+ for parent in parents:
308
+ if parent not in self._vars.keys():
309
+ continue # If parent has no variables, we don't need to do anything
310
+ if asset not in self._vars.keys():
311
+ self._vars[asset] = self._vars[
312
+ parent
313
+ ] # If asset has no variables, just inherit its parents variables
314
+ continue
315
+ # Otherwise, we do need to check if variables are defined more than once
316
+ for var in self._vars[asset].keys():
317
+ if (
318
+ parent in self._vars.keys()
319
+ and var in self._vars[parent].keys()
320
+ and self._vars[asset][var]['node']
321
+ != self._vars[parent][var]['node']
322
+ ):
323
+ error_msg = f"Variable '{var}' previously defined at {self._vars[parent][var]['node'].start_point.row}"
324
+ self._raise_analyzer_exception(error_msg)
325
+ self._vars[asset].update(self._vars[parent])
326
+
327
+ # If the current asset has variables, we want to check they point to an asset
328
+ if asset in self._vars.keys():
329
+ for var in self._vars[asset].keys():
330
+ if self._variable_to_asset(asset, var) is None:
331
+ error_msg = f"Variable '{var}' defined at {self._vars[asset][var]['node'].start_point.row} does not point to an asset"
332
+ self._raise_analyzer_exception(self._error_msg + error_msg)
333
+
334
+ def _check_to_step(self, asset, expr) -> Any:
335
+ """
336
+ Given a reaches, verify if the expression resolves to a valid AttackStep for an existing Asset
337
+ """
338
+ match expr['type']:
339
+ # Returns an attackStep if it exists for this asset
340
+ case 'attackStep':
341
+ if asset in self._assets.keys():
342
+ for attackStep in self._steps[asset].keys():
343
+ if attackStep == expr['name']:
344
+ return self._steps[asset][attackStep]['step']
345
+ error_msg = (
346
+ f"Attack step '{expr['name']}' not defined for asset '{asset}'"
347
+ )
348
+ self._raise_analyzer_exception(error_msg)
349
+ # Returns an attackStep if it exists for the asset returned by the lhs expression
350
+ case 'collect':
351
+ if left_target := self._check_to_asset(asset, expr['lhs']):
352
+ return self._check_to_step(left_target, expr['rhs'])
353
+ return None
354
+ case _:
355
+ error_msg = 'Last step is not attack step'
356
+ self._raise_analyzer_exception(error_msg)
357
+
358
+ def _check_to_asset(self, asset, expr) -> Any:
359
+ """
360
+ Verify if the expression resolves to an existing Asset
361
+ """
362
+ match expr['type']:
363
+ # field - verify if asset exists via associations
364
+ case 'field':
365
+ return self._check_field_expr(asset, expr)
366
+ # variable - verify if there is a variable with this name
367
+ case 'variable':
368
+ return self._check_variable_expr(asset, expr)
369
+ # collect - return asset pointed by all the parts in the expression
370
+ case 'collect':
371
+ return self._check_collect_expr(asset, expr)
372
+ # set - verify if the assets in the operation have an common ancestor
373
+ case 'union' | 'intersection' | 'difference':
374
+ return self._check_set_expr(asset, expr)
375
+ # transitive - verify if the asset before * (STAR) exists
376
+ case 'transitive':
377
+ return self._check_transitive_expr(asset, expr)
378
+ # subtype - verifies if the asset inside [] is a child from the asset preceding it
379
+ case 'subType':
380
+ return self._check_sub_type_expr(asset, expr)
381
+ case _:
382
+ logging.error(f"Unexpected expression '{expr['type']}'")
383
+ self._error = True
384
+ return None
385
+
386
+ def _check_field_expr(self, asset, expr):
387
+ """
388
+ Check if an asset exists by checking the associations for the current asset
389
+ """
390
+ if asset in self._associations.keys():
391
+ for association in self._associations[asset].keys():
392
+ association = self._associations[asset][association]['association']
393
+ if expr['name'] == association['leftField']:
394
+ if self._get_asset_name(association['leftAsset']):
395
+ return association['leftAsset']
396
+ if expr['name'] == association['rightField']:
397
+ if self._get_asset_name(association['rightAsset']):
398
+ return association['rightAsset']
399
+
400
+ # Verify if there is a variable defined with the same name; possible the user forgot to call it
401
+ extra = ''
402
+ if asset in self._vars.keys() and expr['name'] in self._vars[asset].keys():
403
+ extra = f", did you mean the variable '{expr['name']}()'?"
404
+ self._warn = True
405
+
406
+ self._error_msg = (
407
+ f"Field '{expr['name']}' not defined for asset '{asset}'" + extra + '\n'
408
+ )
409
+
410
+ def _check_variable_expr(self, asset, expr):
411
+ """
412
+ Check if there is a variable reference in this asset with the user identifier.
413
+ """
414
+ if asset in self._vars.keys() and expr['name'] in self._vars[asset].keys():
415
+ return self._variable_to_asset(asset, expr['name'])
416
+
417
+ self._error_msg = f"Variable '{expr['name']}' is not defined\n"
418
+ return None
419
+
420
+ def _check_collect_expr(self, asset, expr):
421
+ """
422
+ Iteratively, retrieve the asset pointed by the leftmost expression and, recursively, check the rhs associated
423
+ with each lhs.
424
+ """
425
+ if left_target := self._check_to_asset(asset, expr['lhs']):
426
+ return self._check_to_asset(left_target, expr['rhs'])
427
+ return None
428
+
429
+ def _check_set_expr(self, asset, expr) -> None:
430
+ """
431
+ Obtains the assets pointed by boths hs's and verifies if they have a common ancestor
432
+ """
433
+ lhs_target = self._check_to_asset(asset, expr['lhs'])
434
+ rhs_target = self._check_to_asset(asset, expr['rhs'])
435
+ if not lhs_target or not rhs_target:
436
+ return None
437
+
438
+ if target := self._get_LCA(lhs_target, rhs_target):
439
+ return target
440
+
441
+ self._error_msg = (
442
+ f"Types '{lhs_target}' and '{rhs_target}' have no common ancestor\n"
443
+ )
444
+ return None
445
+
446
+ def _get_LCA(self, lhs_target, rhs_target):
447
+ """
448
+ Receives two assets and verifies if they have an ancestor in common
449
+ """
450
+ if self._is_child(lhs_target, rhs_target):
451
+ return lhs_target
452
+ elif self._is_child(rhs_target, lhs_target):
453
+ return rhs_target
454
+ else:
455
+ lhs_node = self._assets[lhs_target]['node']
456
+ rhs_node = self._assets[rhs_target]['node']
457
+ lhs_parent_node = self._get_assets_extendee(lhs_node)
458
+ rhs_parent_node = self._get_assets_extendee(rhs_node)
459
+ if not lhs_parent_node or not rhs_parent_node:
460
+ return None
461
+ return self._get_LCA(
462
+ lhs_parent_node.child_by_field_name('id').text.decode(),
463
+ rhs_parent_node.child_by_field_name('id').text.decode(),
464
+ )
465
+
466
+ def _check_sub_type_expr(self, asset, expr) -> None:
467
+ """
468
+ Given expr[ID], obtains the assets given by expr and ID and verifies if ID is
469
+ a child of expr
470
+ """
471
+ target = self._check_to_asset(asset, expr['stepExpression'])
472
+ if not target:
473
+ return None
474
+
475
+ if asset_type := self._get_asset_name(expr['subType']):
476
+ if self._is_child(target, asset_type):
477
+ return asset_type
478
+
479
+ self._error_msg = f"Asset '{asset_type}' cannot be of type '{target}'\n"
480
+ return None
481
+
482
+ def _check_transitive_expr(self, asset, expr) -> None:
483
+ """
484
+ Given expr*, obtain the asset given by expr and verify if it is a child of the current asset
485
+ """
486
+ if res := self._check_to_asset(asset, expr['stepExpression']):
487
+ if self._is_child(asset, res):
488
+ return res
489
+
490
+ self._error_msg = f"Previous asset '{asset}' is not of type '{res}'\n"
491
+ return None
492
+
493
+ def _is_child(self, parent_name, child_name):
494
+ """
495
+ Receives two assets and verifies if one extends the other
496
+ """
497
+ if parent_name == child_name:
498
+ return True
499
+
500
+ if valid_asset := self._get_asset_name(child_name):
501
+ asset_node: Node = self._assets[valid_asset]['node']
502
+ if parent_node := self._get_assets_extendee(asset_node):
503
+ child_parent_name = self._get_asset_name(
504
+ parent_node.child_by_field_name('id').text.decode()
505
+ )
506
+ return self._is_child(parent_name, child_parent_name)
507
+
508
+ return False
509
+
510
+ def _get_parents(self, node: Node) -> list[str]:
511
+ """
512
+ Given an asset, obtain its parents in inverse order.
513
+ I.e., A->B->C returns [C,B,A] for asset A
514
+ """
515
+ name_node = node.child_by_field_name('id')
516
+ assert name_node and name_node.text, 'Name node needs text'
517
+ parents = [name_node.text.decode()]
518
+ while node.child_by_field_name('extends'):
519
+ extends_node = node.child_by_field_name('extends')
520
+ assert extends_node, 'Need extends node'
521
+ assert extends_node.next_sibling and extends_node.next_sibling.text
522
+ parent_name = extends_node.next_sibling.text.decode()
523
+ if parent_name in parents:
524
+ break
525
+ parents.insert(0, parent_name)
526
+ node = self._assets[parent_name]['node']
527
+ return parents
528
+
529
+ def _get_asset_name(self, name):
530
+ if name in self._assets.keys():
531
+ return name
532
+
533
+ logging.error(f"Asset '{name}' not defined")
534
+ self._error = True
535
+ return None
536
+
537
+ def _analyse_steps(self) -> None:
538
+ """
539
+ For each asset, obtain its parents and analyse each step
540
+ """
541
+ for asset in self._assets.keys():
542
+ parents = self._get_parents(self._assets[asset]['node'])
543
+ self._read_steps(asset, parents)
544
+
545
+ def _attackStep_seen_in_parent(
546
+ self, attackStep: str, seen_steps: List
547
+ ) -> str | None:
548
+ """
549
+ Given a list of parent scopes, verify if the attackStep has been defined
550
+ """
551
+ for parent, parent_scope in seen_steps:
552
+ if attackStep in parent_scope:
553
+ return parent
554
+ return None
555
+
556
+ def _read_steps(self, asset: str, parents: List) -> None:
557
+ """
558
+ For an asset, check if every step is properly defined in accordance to its hierarchy, i.e. if any of the asset's parents
559
+ also defines this step
560
+ """
561
+
562
+ seen_steps: list[tuple] = []
563
+ for parent in parents:
564
+ # If this parent has no steps, skip it
565
+ if parent not in self._steps.keys():
566
+ continue
567
+
568
+ current_steps = []
569
+ for attackStep in self._steps[parent].keys():
570
+ # Verify if attackStep has not been defined in the current asset
571
+ if attackStep not in current_steps:
572
+ # Verify if attackStep has not been defined in any parent asset
573
+ prevDef_parent = self._attackStep_seen_in_parent(
574
+ attackStep, seen_steps
575
+ )
576
+ if not prevDef_parent:
577
+ # Since this attackStep has never been defined, it must either not reach or reach with '->'
578
+ attackStep_node = self._steps[parent][attackStep]['node']
579
+ attackStep_node_reaches = attackStep_node.child_by_field_name(
580
+ 'reaches'
581
+ )
582
+ if (
583
+ not attackStep_node_reaches
584
+ or attackStep_node_reaches.child_by_field_name(
585
+ 'operator'
586
+ ).text.decode()
587
+ == '->'
588
+ ):
589
+ # Valid step
590
+ current_steps.append(attackStep)
591
+ else:
592
+ # Step was defined using '+>', but there is nothing to inherit from
593
+ error_msg = f"Cannot inherit attack step '{attackStep}' without previous definition"
594
+ self._raise_analyzer_exception(error_msg)
595
+ else:
596
+ # Step was previously defined in a parent
597
+ # So it must be of the same type (&, |, #, E, !E)
598
+ attackStep_node = self._steps[parent][attackStep]['node']
599
+ parent_attackStep_node = self._steps[prevDef_parent][
600
+ attackStep
601
+ ]['node']
602
+ if (
603
+ attackStep_node.child_by_field_name('step_type').text
604
+ == parent_attackStep_node.child_by_field_name(
605
+ 'step_type'
606
+ ).text
607
+ ):
608
+ # Valid step
609
+ current_steps.append(attackStep)
610
+ else:
611
+ # Invalid, type mismatches that of parent
612
+ error_msg = (
613
+ f"Cannot override attack step '{attackStep}' previously defined "
614
+ + f"at {parent_attackStep_node.start_point.row} with different type '{attackStep_node.child_by_field_name('step_type').text.decode()}' "
615
+ + f"=/= '{parent_attackStep_node.child_by_field_name('step_type').text.decode()}'"
616
+ )
617
+ self._raise_analyzer_exception(error_msg)
618
+ seen_steps.append((parent, current_steps))
619
+
620
+ for parent, steps in seen_steps:
621
+ for step in steps:
622
+ if asset not in self._steps.keys():
623
+ self._steps[asset] = {step: self._steps[parent][step]}
624
+ else:
625
+ self._steps[asset][step] = self._steps[parent][step]
626
+
627
+ def check_source_file(self, node: Node) -> None:
628
+ """
629
+ We only want to preform _post_analysis as the very last step.
630
+ """
631
+ if self._preform_post_analysis == 0:
632
+ self._post_analysis()
633
+ else:
634
+ self._include_stack.pop()
635
+ self._preform_post_analysis -= 1
636
+
637
+ def check_asset_declaration(self, node: Node, asset: dict) -> None:
638
+ """
639
+ Given an asset, verify if it has been previously defined in the same category
640
+
641
+ {
642
+ 'asset':
643
+ {
644
+ 'node': node,
645
+ 'obj' : dict,
646
+ 'parent':
647
+ {
648
+ 'name': str,
649
+ 'node': node,
650
+ }
651
+ }
652
+ }
653
+ """
654
+ asset_name = asset['name']
655
+ assert node.parent, 'Category node needed'
656
+ category_name_node = node.parent.child_by_field_name('id')
657
+ assert category_name_node and category_name_node.text, 'Category need name'
658
+ category_name = category_name_node.text.decode()
659
+
660
+ if not asset_name:
661
+ logging.error(
662
+ f'Asset was defined without a name at line {node.start_point.row}'
663
+ )
664
+ self._error = True
665
+ return
666
+
667
+ # Check if asset was previously defined in the same category.
668
+ if asset_name in self._assets.keys():
669
+ prev_asset_line = self._assets[asset_name]['node'].start_point.row
670
+ error_msg = f"Asset '{asset_name}' previously defined at {prev_asset_line}"
671
+ self._raise_analyzer_exception(error_msg)
672
+ else:
673
+ self._assets[asset_name] = {
674
+ 'node': node,
675
+ 'obj': asset,
676
+ 'parent': {'name': category_name, 'node': node.parent},
677
+ }
678
+
679
+ def check_meta(self, node: Node, data: Tuple[str, str]) -> None:
680
+ """
681
+ Given a meta, verify if it was previously defined for the same type (category, asset, step or association)
682
+ """
683
+
684
+ meta_name, _ = data
685
+
686
+ # Check if we don't have the metas for this parent (category, asset, step or association)
687
+ if node.parent not in self._metas.keys():
688
+ self._metas[node.parent] = {meta_name: node}
689
+ # Check if the new meta is not already defined
690
+ elif (
691
+ node.parent in self._metas.keys()
692
+ and meta_name not in self._metas[node.parent]
693
+ ):
694
+ self._metas[node.parent][meta_name] = node
695
+ # Otherwise, throw error
696
+ else:
697
+ prev_node = self._metas[node.parent][meta_name]
698
+ error_msg = f'Metadata {meta_name} previously defined at {prev_node.start_point.row}'
699
+ self._raise_analyzer_exception(error_msg)
700
+
701
+ def check_category_declaration(
702
+ self, node: Node, data: Tuple[str, Tuple[List, Any]]
703
+ ) -> None:
704
+ """
705
+ Given a category, verify if it has a name and if contains metadata or assets
706
+ """
707
+ _, [[category], assets] = data
708
+
709
+ # TODO: is this really needed? doesn't the grammar prevent this?
710
+ if str(category['name']) == '<missing <INVALID>>':
711
+ category_line = node.start_point.row
712
+ logging.error(f'Category has no name at line {category_line}')
713
+ self._error = True
714
+ return
715
+
716
+ if len(category['meta']) == 0 and len(assets) == 0:
717
+ logging.warning(
718
+ f"Category '{category['name']}' contains no assets or metadata"
719
+ )
720
+
721
+ self._category[category['name']] = {
722
+ 'node': node,
723
+ 'obj': {'category': category, 'assets': assets},
724
+ }
725
+
726
+ def check_define_declaration(self, node: Node, data: Tuple[str, dict]) -> None:
727
+ """
728
+ Given a new define, verify if it has been previously defined
729
+ """
730
+ _, obj = data
731
+ key, value = list(obj.items())[0]
732
+
733
+ # ID and version can be defined multiple times
734
+ if key != 'id' and key != 'version' and key in self._defines.keys():
735
+ prev_define_line = self._defines[key]['node'].start_point.row
736
+ error_msg = f"Define '{key}' previously defined at line {prev_define_line}"
737
+ self._raise_analyzer_exception(error_msg)
738
+
739
+ self._defines[key] = {'node': node, 'value': value}
740
+
741
+ def check_include_declaration(self, node: Node, data: Tuple[str, str]) -> None:
742
+ """
743
+ When an include is found, it triggers the analysis of a new MAL file. To prevent
744
+ checkMal from being performed before all files have been analysed, we increment
745
+ the variable and, every time the file is finished being analysed, it is decreased
746
+ again (in checkMal()). This prevents out-of-order analysis.
747
+ """
748
+ self._preform_post_analysis += 1
749
+
750
+ include_file_node = node.child_by_field_name('file')
751
+ assert include_file_node, 'Need include file node'
752
+ assert include_file_node.text, 'Include needs text'
753
+ include_file = include_file_node.text.decode()
754
+ if include_file in self._include_stack:
755
+ cycle = (
756
+ '->'.join([file.replace('"', '') for file in self._include_stack])
757
+ + ' -> '
758
+ + include_file.replace('"', '')
759
+ )
760
+ error_msg = f'Include sequence contains cycle: {cycle}'
761
+ self._raise_analyzer_exception(error_msg)
762
+ self._include_stack.append(include_file)
763
+
764
+ def check_attack_step(self, node: Node, step: dict) -> None:
765
+ """
766
+ Given a step, check if it is already defined in the current asset. Otherwise, add it to the list of
767
+ steps related to this asset
768
+ """
769
+ _, step = step
770
+
771
+ step_name = step['name']
772
+ assert node.parent and node.parent.parent
773
+ asset_name_node = node.parent.parent.child_by_field_name('id')
774
+ assert asset_name_node and asset_name_node.text, 'Asset name node needs text'
775
+ asset_name = asset_name_node.text.decode()
776
+
777
+ # Check if asset has no steps
778
+ if asset_name not in self._steps.keys():
779
+ self._steps[asset_name] = {step_name: {'node': node, 'step': step}}
780
+ # If so, check if the there is no step with this name in the current asset
781
+ elif step_name not in self._steps[asset_name].keys():
782
+ self._steps[asset_name][step_name] = {'node': node, 'step': step}
783
+ # Otherwise, log error
784
+ else:
785
+ prev_node = self._steps[asset_name][step_name]['node']
786
+ error_msg = f"Attack step '{step_name}' previously defined at {prev_node.start_point.row}"
787
+ self._raise_analyzer_exception(error_msg)
788
+
789
+ self._validate_CIA(node, step)
790
+ self._validate_TTC(node, asset_name, step)
791
+
792
+ def _validate_CIA(self, node: Node, step: dict) -> None:
793
+ """
794
+ Given a step, check if it has CIAs. In that case, verify if the step is not of type
795
+ defense and that it does not have repeated CIAs.
796
+ """
797
+ if not node.child_by_field_name('cias'):
798
+ return
799
+
800
+ step_name = step['name']
801
+ assert node.parent and node.parent.parent
802
+ asset_name_node = node.parent.parent.child_by_field_name('id')
803
+ assert asset_name_node and asset_name_node.text, 'Asset name node needs text'
804
+ asset_name = asset_name_node.text.decode()
805
+
806
+ if (
807
+ step['type'] == 'defense'
808
+ or step['type'] == 'exist'
809
+ or step['type'] == 'notExist'
810
+ ):
811
+ error_msg = f'Line {node.start_point.row}: {step["type"]}: Defenses cannot have CIA classifications'
812
+ self._raise_analyzer_exception(error_msg)
813
+
814
+ cias = []
815
+
816
+ # Get the CIAs node and iterate over the individual CIA
817
+ cias_node = node.child_by_field_name('cias')
818
+ assert cias_node, 'Need CIA node'
819
+ for cia in cias_node.named_children:
820
+ assert cia.text, 'CIA node need text'
821
+ letter = cia.text.decode()
822
+
823
+ if letter in cias:
824
+ logging.warning(
825
+ f'Attack step {asset_name}.{step_name} contains duplicate classification {letter}'
826
+ )
827
+ self._warn = True
828
+ return
829
+ cias.append(letter)
830
+
831
+ def _validate_TTC(self, node: Node, asset_name, step: dict) -> None:
832
+ if not step['ttc']:
833
+ return
834
+ match step['type']:
835
+ case 'defense':
836
+ if step['ttc']['type'] != 'function':
837
+ error_msg = f'Defense {asset_name}.{step["name"]} may not have advanced TTC expressions'
838
+ self._raise_analyzer_exception(error_msg)
839
+
840
+ match step['ttc']['name']:
841
+ case 'Enabled' | 'Disabled' | 'Bernoulli':
842
+ try:
843
+ Distributions.validate(
844
+ step['ttc']['name'], step['ttc']['arguments']
845
+ )
846
+ except DistributionsException as e:
847
+ self._raise_analyzer_exception(e._error_message)
848
+ case _:
849
+ error_msg = f"Defense {asset_name}.{step['name']} may only have 'Enabled', 'Disabled', or 'Bernoulli(p)' as TTC"
850
+ self._raise_analyzer_exception(error_msg)
851
+ case 'exist' | 'notExist':
852
+ # This should log error, but it happens later in the code
853
+ pass
854
+ case _:
855
+ self._check_TTC_expr(step['ttc'])
856
+
857
+ def _check_TTC_expr(self, expr, isSubDivExp=False):
858
+ match expr['type']:
859
+ case 'subtraction' | 'exponentiation' | 'division':
860
+ self._check_TTC_expr(expr['lhs'], True)
861
+ self._check_TTC_expr(expr['rhs'], True)
862
+ case 'multiplication' | 'addition':
863
+ self._check_TTC_expr(expr['lhs'], False)
864
+ self._check_TTC_expr(expr['rhs'], False)
865
+ case 'function':
866
+ if expr['name'] in ['Enabled', 'Disabled']:
867
+ error_msg = "Distributions 'Enabled' or 'Disabled' may not be used as TTC values in '&' and '|' attack steps"
868
+ self._raise_analyzer_exception(error_msg)
869
+ if isSubDivExp and expr['name'] in ['Bernoulli', 'EasyAndUncertain']:
870
+ error_msg = f"TTC distribution '{expr['name']}' is not available in subtraction, division or exponential expressions."
871
+ self._raise_analyzer_exception(error_msg)
872
+ try:
873
+ Distributions.validate(expr['name'], expr['arguments'])
874
+ except DistributionsException as e:
875
+ self._raise_analyzer_exception(e._error_message)
876
+ case 'number':
877
+ # Always ok
878
+ pass
879
+ case _:
880
+ error_msg = f'Unexpected expression {expr}'
881
+ self._raise_analyzer_exception(error_msg)
882
+
883
+ def check_association(self, node: Node, association: dict):
884
+ self._all_associations.append(
885
+ {'name': association['name'], 'association': association, 'node': node}
886
+ )
887
+
888
+ def check_asset_variable(self, node: Node, var: dict) -> None:
889
+ """
890
+ This checks if the variable has been defined in the current asset.
891
+
892
+ self._vars = {
893
+ <asset-name>: {
894
+ <var-name>: {'node': <var-node>, 'var' <var-dict>}
895
+ }
896
+ }
897
+ """
898
+ _, var = var
899
+
900
+ assert node.parent, 'Asset variable needs a parent'
901
+ parent = node.parent.parent # Twice to skip asset_definition
902
+
903
+ assert parent, 'Asset variable needs a parent'
904
+ asset_name_node = parent.child_by_field_name('id')
905
+
906
+ assert asset_name_node, 'Asset needs name node'
907
+ assert asset_name_node.text, 'Asset name node needs text'
908
+
909
+ asset_name: str = str(asset_name_node.text.decode())
910
+ var_name: str = var['name']
911
+ if asset_name not in self._vars.keys():
912
+ self._vars[asset_name] = {var_name: {'node': node, 'var': var}}
913
+ elif var_name not in self._vars[asset_name]:
914
+ self._vars[asset_name][var_name] = {'node': node, 'var': var}
915
+ else:
916
+ prev_define_line = self._vars[asset_name][var_name]['node'].start_point.row
917
+ error_msg = (
918
+ f"Variable '{var_name}' previously defined at line {prev_define_line}"
919
+ )
920
+ self._raise_analyzer_exception(error_msg)