mal-toolbox 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,500 @@
1
- #!/usr/bin/env python
2
1
  # mypy: ignore-errors
2
+ import sys
3
+ from collections.abc import MutableMapping, MutableSequence
4
+ from pathlib import Path
3
5
 
4
- import os
5
- from typing import Optional
6
+ from antlr4 import FileStream, CommonTokenStream, ParseTreeVisitor
7
+ from antlr4.error.ErrorListener import ConsoleErrorListener
6
8
 
7
- from antlr4 import FileStream, CommonTokenStream
8
9
  from .mal_lexer import malLexer
9
10
  from .mal_parser import malParser
10
- from .mal_visitor import malVisitor
11
11
 
12
+ # In a rule like `rule: one? two* three`:
13
+ # - ctx.one() would be None if the token was not found on a matching line
14
+ # - ctx.two() would be []
12
15
 
13
- class MalCompiler:
14
- def __init__(self):
15
- self.path = None
16
- self.current_file = None
17
16
 
18
- def compile(self, malfile: Optional[str] = None):
19
- if not self.path:
20
- self.path = os.path.dirname(malfile)
17
+ def patched_antrl_syntax_error(self, recognizer, offendingSymbol, line, column, msg, e):
18
+ file = patched_antrl_syntax_error.file
19
+ print(f"{file}:{str(line)}:{str(column)}: {msg}", file=sys.stderr)
21
20
 
22
- self.current_file = os.path.basename(malfile)
23
21
 
24
- input_stream = FileStream(
25
- os.path.join(self.path, self.current_file), encoding="utf-8"
26
- )
22
+ ConsoleErrorListener.syntaxError = patched_antrl_syntax_error
23
+
24
+
25
+ class MalCompiler(ParseTreeVisitor):
26
+ def __init__(self, *args, **kwargs):
27
+ self.current_file: Path = None
28
+ self.visited_files: set[Path] = set()
29
+ self.path_stack: list[Path] = []
30
+
31
+ super().__init__(*args, **kwargs)
32
+
33
+ def compile(self, malfile: Path | str):
34
+ current_file = Path(malfile)
35
+
36
+ if not current_file.is_absolute() and self.path_stack:
37
+ # Only for the first file self.path_stack will be empty.
38
+ current_file = self.path_stack[-1] / current_file
39
+
40
+ if current_file in self.visited_files:
41
+ # Avoid infinite loops due to recursive includes
42
+ return {}
43
+
44
+ self.visited_files.add(current_file)
45
+ self.path_stack.append(current_file.parent)
46
+
47
+ patched_antrl_syntax_error.file = current_file.relative_to(self.path_stack[0])
48
+
49
+ input_stream = FileStream(current_file, encoding="utf-8")
27
50
  lexer = malLexer(input_stream)
28
51
  stream = CommonTokenStream(lexer)
29
52
  parser = malParser(stream)
30
53
  tree = parser.mal()
31
54
 
32
- return malVisitor(compiler=self).visit(tree)
55
+ result = self.visit(tree)
56
+
57
+ self.path_stack.pop()
58
+
59
+ return result
60
+
61
+ def visitMal(self, ctx):
62
+ langspec = {
63
+ "formatVersion": "1.0.0",
64
+ "defines": {},
65
+ "categories": [],
66
+ "assets": [],
67
+ "associations": [],
68
+ }
69
+
70
+ # no visitDeclaration method needed, `declaration` is a thin rule
71
+ for declaration in (d.getChild(0) for d in ctx.declaration()):
72
+ if result := self.visit(declaration) or True:
73
+ key, value = result
74
+
75
+ if key == "categories":
76
+ category, assets = value
77
+ langspec["categories"].extend(category)
78
+ langspec["assets"].extend(assets)
79
+ continue
80
+
81
+ if key == "defines":
82
+ langspec[key].update(value)
83
+
84
+ if key == "associations":
85
+ langspec[key].extend(value)
86
+
87
+ if key == "include":
88
+ included_file = self.compile(value)
89
+ for k, v in langspec.items():
90
+ if isinstance(v, MutableMapping):
91
+ langspec[k].update(included_file.get(k, {}))
92
+ if isinstance(v, MutableSequence) and k in included_file:
93
+ langspec[k].extend(included_file[k])
94
+
95
+ for key in ("categories", "assets", "associations"):
96
+ unique = []
97
+ for item in langspec[key]:
98
+ if item not in unique:
99
+ unique.append(item)
100
+ langspec[key] = unique
101
+
102
+ return langspec
103
+
104
+ def visitInclude(self, ctx):
105
+ return ("include", ctx.STRING().getText().strip('"'))
106
+
107
+ def visitDefine(self, ctx):
108
+ return ("defines", {ctx.ID().getText(): ctx.STRING().getText().strip('"')})
109
+
110
+ def visitDetector(self, ctx):
111
+ detector = {}
112
+ detector["name"] = (
113
+ self.visit(ctx.detectorname()) if ctx.detectorname() else None
114
+ )
115
+ detector["context"] = self.visit(ctx.context()) if ctx.context() else None
116
+ detector["type"] = (
117
+ self.visit(ctx.detectortype()) if ctx.detectortype() else None
118
+ )
119
+ detector["tprate"] = self.visit(ctx.tprate()) if ctx.tprate() else None
120
+
121
+ return detector
122
+
123
+ def visitDetectorname(self, ctx):
124
+ return ctx.getText()
125
+
126
+ def visitContext(self, ctx):
127
+ return {
128
+ # Using labels as the dict keys since multiple contextparts can
129
+ # share the same asset type.
130
+ # TODO: add analyzer check if two labels are same in a context
131
+ cpart.contextlabel().getText(): cpart.contextasset().getText()
132
+ for cpart in ctx.contextpart()
133
+ }
134
+
135
+ def visitDetectortype(self, ctx):
136
+ return ctx.getText()
137
+
138
+ def visitCategory(self, ctx):
139
+ category = {}
140
+ category["name"] = ctx.ID().getText()
141
+ category["meta"] = {
142
+ (info := self.visit(meta))[0]: info[1] for meta in ctx.meta()
143
+ }
144
+
145
+ assets = [self.visit(asset) for asset in ctx.asset()]
146
+
147
+ return ("categories", ([category], assets))
148
+
149
+ def visitMeta(self, ctx):
150
+ return (ctx.ID().getText(), ctx.text().getText().strip("\"'"))
151
+
152
+ def visitAsset(self, ctx):
153
+ asset = {}
154
+ asset["name"] = ctx.ID()[0].getText()
155
+ asset["meta"] = {(info := self.visit(meta))[0]: info[1] for meta in ctx.meta()}
156
+ asset["category"] = ctx.parentCtx.ID().getText()
157
+ asset["isAbstract"] = ctx.ABSTRACT() is not None
158
+
159
+ asset["superAsset"] = None
160
+ if len(ctx.ID()) > 1 and ctx.ID()[1]:
161
+ asset["superAsset"] = ctx.ID()[1].getText()
162
+
163
+ asset["variables"] = [self.visit(variable) for variable in ctx.variable()]
164
+ asset["attackSteps"] = [self.visit(step) for step in ctx.step()]
165
+
166
+ return asset
167
+
168
+ def visitStep(self, ctx):
169
+ step = {}
170
+ step["name"] = ctx.ID().getText()
171
+ step["meta"] = {(info := self.visit(meta))[0]: info[1] for meta in ctx.meta()}
172
+
173
+ # TODO: add analyzer check for conflicting detector names
174
+ step["detectors"] = {
175
+ (d := self.visit(detector))["name"]: d for detector in ctx.detector()
176
+ }
177
+ step["type"] = self.visit(ctx.steptype())
178
+ step["tags"] = [self.visit(tag) for tag in ctx.tag()]
179
+ step["risk"] = self.visit(ctx.cias()) if ctx.cias() else None
180
+
181
+ # TODO: left as "ttc" for compatibility reasons
182
+ step["ttc"] = self.visit(ctx.pdist()) if ctx.pdist() else None
183
+
184
+ step["requires"] = (
185
+ self.visit(ctx.precondition()) if ctx.precondition() else None
186
+ )
187
+ step["reaches"] = self.visit(ctx.reaches()) if ctx.reaches() else None
188
+
189
+ return step
190
+
191
+ def visitSteptype(self, ctx):
192
+ return (
193
+ "or"
194
+ if ctx.OR()
195
+ else "and"
196
+ if ctx.AND()
197
+ else "defense"
198
+ if ctx.HASH()
199
+ else "exist"
200
+ if ctx.EXISTS()
201
+ else "notExist"
202
+ if ctx.NOTEXISTS()
203
+ else None # should never happen, the grammar limits it
204
+ )
205
+
206
+ def visitTag(self, ctx):
207
+ return ctx.ID().getText()
208
+
209
+ def visitCias(self, ctx):
210
+ risk = {
211
+ "isConfidentiality": False,
212
+ "isIntegrity": False,
213
+ "isAvailability": False,
214
+ }
215
+
216
+ for cia in ctx.cia():
217
+ risk.update(self.visit(cia))
218
+
219
+ return risk
220
+
221
+ def visitCia(self, ctx):
222
+ key = (
223
+ "isConfidentiality"
224
+ if ctx.C()
225
+ else "isIntegrity"
226
+ if ctx.I()
227
+ else "isAvailability"
228
+ if ctx.A()
229
+ else None
230
+ )
231
+
232
+ return {key: True}
233
+
234
+ def visitPdist(self, ctx):
235
+ ret = self.visit(ctx.pdistexpr())
236
+
237
+ return ret
238
+
239
+ def visitPdistexpr(self, ctx):
240
+ if len(terms := ctx.pdistterm()) == 1:
241
+ return self.visit(terms[0])
242
+
243
+ ret = {}
244
+
245
+ lhs = self.visit(terms[0])
246
+ for i in range(1, len(terms)):
247
+ ret["type"] = (
248
+ "addition"
249
+ if ctx.children[2 * i - 1].getText() == "+"
250
+ else "subtraction"
251
+ )
252
+ ret["lhs"] = lhs
253
+ ret["rhs"] = self.visit(terms[i])
254
+
255
+ lhs = ret.copy()
256
+
257
+ return ret
258
+
259
+ def visitPdistterm(self, ctx):
260
+ if len(factors := ctx.pdistfact()) == 1:
261
+ ret = self.visit(factors[0])
262
+ else:
263
+ ret = {}
264
+ ret["type"] = "multiplication" if ctx.STAR() else "division"
265
+ ret["lhs"] = self.visit(factors[0])
266
+ ret["rhs"] = self.visit(factors[1])
267
+
268
+ return ret
269
+
270
+ def visitPdistfact(self, ctx):
271
+ if len(atoms := ctx.pdistatom()) == 1:
272
+ ret = self.visit(atoms[0])
273
+ else:
274
+ ret = {}
275
+ ret["type"] = "exponentiation"
276
+ ret["lhs"] = self.visit(atoms[0])
277
+ ret["rhs"] = self.visit(atoms[1])
278
+
279
+ return ret
280
+
281
+ def visitPdistatom(self, ctx):
282
+ if ctx.pdistdist():
283
+ ret = self.visit(ctx.pdistdist())
284
+ elif ctx.pdistexpr():
285
+ ret = self.visit(ctx.pdistexpr())
286
+ elif ctx.number():
287
+ ret = self.visit(ctx.number())
288
+
289
+ return ret
290
+
291
+ def visitPdistdist(self, ctx):
292
+ ret = {"type": "function"}
293
+ ret["name"] = ctx.ID().getText()
294
+ ret["arguments"] = []
295
+
296
+ if ctx.LPAREN():
297
+ ret["arguments"] = [self.visit(number)["value"] for number in ctx.number()]
298
+
299
+ return ret
300
+
301
+ def visitPrecondition(self, ctx):
302
+ ret = {}
303
+ ret["overrides"] = True
304
+ ret["stepExpressions"] = [self.visit(expr) for expr in ctx.expr()]
305
+ return ret
306
+
307
+ def visitReaches(self, ctx):
308
+ ret = {}
309
+ ret["overrides"] = ctx.INHERITS() is None
310
+ ret["stepExpressions"] = [self.visit(expr) for expr in ctx.expr()]
311
+
312
+ return ret
313
+
314
+ def visitNumber(self, ctx):
315
+ ret = {"type": "number"}
316
+ ret["value"] = float(ctx.getText())
317
+
318
+ return ret
319
+
320
+ def visitVariable(self, ctx):
321
+ ret = {}
322
+ ret["name"] = ctx.ID().getText()
323
+ ret["stepExpression"] = self.visit(ctx.expr())
324
+
325
+ return ret
326
+
327
+ def visitExpr(self, ctx):
328
+ if len(ctx.parts()) == 1:
329
+ return self.visit(ctx.parts()[0])
330
+
331
+ ret = {}
332
+ lhs = self.visit(ctx.parts()[0])
333
+ for i in range(1, len(ctx.parts())):
334
+ ret["type"] = self.visit(ctx.children[2 * i - 1])
335
+ ret["lhs"] = lhs
336
+ ret["rhs"] = self.visit(ctx.parts()[i])
337
+ lhs = ret.copy()
338
+
339
+ return ret
340
+
341
+ def visitParts(self, ctx):
342
+ if len(ctx.part()) == 1:
343
+ return self.visit(ctx.part()[0])
344
+
345
+ ret = {}
346
+
347
+ lhs = self.visit(ctx.part()[0])
348
+
349
+ for i in range(1, len(ctx.part())):
350
+ ret["type"] = "collect"
351
+ ret["lhs"] = lhs
352
+ ret["rhs"] = self.visit(ctx.part()[i])
353
+
354
+ lhs = ret.copy()
355
+
356
+ return ret
357
+
358
+ def visitPart(self, ctx):
359
+ ret = {}
360
+ if ctx.varsubst():
361
+ ret["type"] = "variable"
362
+ ret["name"] = self.visit(ctx.varsubst())
363
+ elif ctx.LPAREN():
364
+ ret = self.visit(ctx.expr())
365
+ else: # ctx.ID()
366
+ # Resolve type: field or attackStep?
367
+ ret["type"] = self._resolve_part_ID_type(ctx)
368
+
369
+ ret["name"] = ctx.ID().getText()
370
+
371
+ if ctx.STAR():
372
+ ret = {"type": "transitive", "stepExpression": ret}
373
+
374
+ for type_ in ctx.type_(): # mind the trailing underscore
375
+ ret = {
376
+ "type": "subType",
377
+ "subType": self.visit(type_),
378
+ "stepExpression": ret,
379
+ }
380
+
381
+ return ret
382
+
383
+ def _resolve_part_ID_type(self, ctx):
384
+ pctx = ctx.parentCtx
385
+
386
+ # Traverse up the tree until we find the parent of the topmost expr
387
+ # (saying "topmost" as expr can be nested) or the root of the tree.
388
+ while pctx and not isinstance(
389
+ pctx,
390
+ malParser.ReachesContext,
391
+ # Expressions are also valid in `let` variable assignments, but
392
+ # there every lexical component of expr is considered a "field",
393
+ # no need to resolve the type in that case. Similarly, preconditions
394
+ # (`<-`) only accept fields.
395
+ ):
396
+ pctx = pctx.parentCtx
397
+
398
+ if pctx is None:
399
+ # ctx (the `part`) belongs to a "let" assignment or a precondition.
400
+ return "field"
401
+
402
+ # scan for a dot to the right of `ctx`
403
+ file_tokens = ctx.parser.getTokenStream().tokens
404
+ for i in range(ctx.start.tokenIndex, pctx.stop.tokenIndex + 1):
405
+ if file_tokens[i].type == malParser.DOT:
406
+ return "field"
407
+
408
+ # We are looping until the end of pctx (which is a `reaches` or
409
+ # `precondition` context). This could include multiple comma
410
+ # separated `expr`s, we only care for the current one.
411
+ if file_tokens[i].type == malParser.COMMA: # end of current `expr`
412
+ return "attackStep"
413
+
414
+ return "attackStep"
415
+
416
+ def visitVarsubst(self, ctx):
417
+ return ctx.ID().getText()
418
+
419
+ def visitType(self, ctx):
420
+ return ctx.ID().getText()
421
+
422
+ def visitSetop(self, ctx):
423
+ return (
424
+ "union"
425
+ if ctx.UNION()
426
+ else "intersection"
427
+ if ctx.INTERSECT()
428
+ else "difference"
429
+ if ctx.INTERSECT
430
+ else None
431
+ )
432
+
433
+ def visitAssociations(self, ctx):
434
+ associations = []
435
+ for assoc in ctx.association():
436
+ associations.append(self.visit(assoc))
437
+
438
+ return ("associations", associations)
439
+
440
+ def visitAssociation(self, ctx):
441
+ association = {}
442
+
443
+ association["name"] = self.visit(ctx.linkname())
444
+ association["meta"] = {
445
+ (info := self.visit(meta))[0]: info[1] for meta in ctx.meta()
446
+ }
447
+ association["leftAsset"] = ctx.ID()[0].getText()
448
+ association["leftField"] = self.visit(ctx.field()[0])
449
+
450
+ # no self.visitMult or self.visitMultatom methods, reading them here
451
+ # directly
452
+ association["leftMultiplicity"] = {
453
+ "min": (multatoms := ctx.mult()[0].multatom()).pop(0).getText(),
454
+ "max": multatoms.pop().getText() if multatoms else None,
455
+ }
456
+ association["rightAsset"] = ctx.ID()[1].getText()
457
+ association["rightField"] = self.visit(ctx.field()[1])
458
+ association["rightMultiplicity"] = {
459
+ "min": (multatoms := ctx.mult()[1].multatom()).pop(0).getText(),
460
+ "max": multatoms.pop().getText() if multatoms else None,
461
+ }
462
+
463
+ self._post_process_multitudes(association)
464
+ return association
465
+
466
+ def _post_process_multitudes(self, association):
467
+ mult_keys = [
468
+ # start the multatoms from right to left to make sure the rules
469
+ # below get applied cleanly
470
+ "rightMultiplicity.max",
471
+ "rightMultiplicity.min",
472
+ "leftMultiplicity.max",
473
+ "leftMultiplicity.min",
474
+ ]
475
+
476
+ for mult_key in mult_keys:
477
+ key, subkey = mult_key.split(".")
478
+
479
+ # upper limit equals lower limit if not given
480
+ if subkey == "max" and association[key][subkey] is None:
481
+ association[key][subkey] = association[key]["min"]
482
+
483
+ if association[key][subkey] == "*":
484
+ # 'any' as lower limit means start from 0
485
+ if subkey == "min":
486
+ association[key][subkey] = 0
487
+
488
+ # 'any' as upper limit means not limit
489
+ else:
490
+ association[key][subkey] = None
491
+
492
+ # cast numerical strings to integers
493
+ if (multatom := association[key][subkey]) and multatom.isdigit():
494
+ association[key][subkey] = int(association[key][subkey])
495
+
496
+ def visitField(self, ctx):
497
+ return ctx.ID().getText()
498
+
499
+ def visitLinkname(self, ctx):
500
+ return ctx.ID().getText()