maco 1.2.13__py3-none-any.whl → 1.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
maco/utils.py CHANGED
@@ -1,4 +1,5 @@
1
- # Common utilities shared between the MACO collector and configextractor-py
1
+ """Common utilities shared between the MACO collector and configextractor-py."""
2
+
2
3
  import importlib
3
4
  import importlib.machinery
4
5
  import importlib.util
@@ -33,8 +34,8 @@ from typing import Callable, Dict, List, Set, Tuple, Union
33
34
  from uv import find_uv_bin
34
35
 
35
36
  from maco import model
36
- from maco.extractor import Extractor
37
37
  from maco.exceptions import AnalysisAbortedException
38
+ from maco.extractor import Extractor
38
39
 
39
40
  logger = logging.getLogger("maco.lib.utils")
40
41
 
@@ -50,10 +51,14 @@ VENV_CREATE_CMD = f"{UV_BIN} venv"
50
51
 
51
52
 
52
53
  class Base64Decoder(json.JSONDecoder):
54
+ """JSON decoder that also base64 encodes binary data."""
55
+
53
56
  def __init__(self, *args, **kwargs):
57
+ """Initialize the decoder."""
54
58
  json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
55
59
 
56
60
  def object_hook(self, obj):
61
+ """Hook to decode base64 encoded binary data.""" # noqa: DOC201
57
62
  if "__class__" not in obj:
58
63
  return obj
59
64
  type = obj["__class__"]
@@ -131,17 +136,38 @@ rule MACO {
131
136
 
132
137
 
133
138
  def maco_extractor_validation(module: ModuleType) -> bool:
139
+ """Validation function for extractors.
140
+
141
+ Returns:
142
+ (bool): True if extractor belongs to MACO, False otherwise.
143
+ """
134
144
  if inspect.isclass(module):
135
145
  # 'author' has to be implemented otherwise will raise an exception according to MACO
136
146
  return hasattr(module, "author") and module.author
137
147
  return False
138
148
 
139
149
 
140
- def maco_extract_rules(module: Extractor) -> bool:
150
+ def maco_extract_rules(module: Extractor) -> str:
151
+ """Extracts YARA rules from extractor.
152
+
153
+ Returns:
154
+ (str): YARA rules
155
+ """
141
156
  return module.yara_rule
142
157
 
143
158
 
144
159
  def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger) -> Tuple[List[str], List[str]]:
160
+ """Looks for extractors using YARA rules.
161
+
162
+ Args:
163
+ root_directory (str): Root directory containing extractors
164
+ scanner (yara.Rules): Scanner to look for extractors using YARA rules
165
+ logger (Logger): Logger to use
166
+
167
+ Returns:
168
+ Tuple[List[str], List[str]]: Returns a list of extractor directories and extractor files
169
+
170
+ """
145
171
  extractor_dirs = set([root_directory])
146
172
  extractor_files = []
147
173
 
@@ -177,17 +203,22 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
177
203
  with open(path, "rb") as f:
178
204
  data = f.read()
179
205
 
180
- with open(path, "wb") as f:
181
- # Replace any relative importing with absolute
182
- curr_dir = os.path.dirname(path)
183
- split = curr_dir.split("/")[::-1]
184
- for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
185
- for match in pattern.findall(data):
186
- depth = match.count(b".")
187
- abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
188
- abspath += "." if pattern == RELATIVE_FROM_RE else ""
189
- data = data.replace(f"from {match.decode()}".encode(), f"from {abspath}".encode(), 1)
190
- f.write(data)
206
+ # Replace any relative importing with absolute
207
+ changed_imports = False
208
+ curr_dir = os.path.dirname(path)
209
+ split = curr_dir.split("/")[::-1]
210
+ for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
211
+ for match in pattern.findall(data):
212
+ depth = match.count(b".")
213
+ abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
214
+ abspath += "." if pattern == RELATIVE_FROM_RE else ""
215
+ data = data.replace(f"from {match.decode()}".encode(), f"from {abspath}".encode(), 1)
216
+ changed_imports = True
217
+
218
+ # only write extractor files if imports were changed
219
+ if changed_imports:
220
+ with open(path, "wb") as f:
221
+ f.write(data)
191
222
 
192
223
  if scanner.match(path):
193
224
  # Add directory to list of hits for venv creation
@@ -282,7 +313,16 @@ def _install_required_packages(create_venv: bool, directories: List[str], python
282
313
  return venvs
283
314
 
284
315
 
285
- def find_and_insert_venv(path: str, venvs: List[str]):
316
+ def find_and_insert_venv(path: str, venvs: List[str]) -> Tuple[str, str]:
317
+ """Finds the closest virtual environment to the extractor and inserts it into the PATH.
318
+
319
+ Args:
320
+ path (str): Path of extractor
321
+ venvs (List[str]): List of virtual environments
322
+
323
+ Returns:
324
+ (Tuple[str, str]): Virtual environment and site-packages path that's closest to the extractor
325
+ """
286
326
  venv = None
287
327
  for venv in sorted(venvs, reverse=True):
288
328
  venv_parent = os.path.dirname(venv)
@@ -311,6 +351,16 @@ def register_extractors(
311
351
  logger: Logger,
312
352
  default_loaded_modules: Set[str] = set(sys.modules.keys()),
313
353
  ):
354
+ """Register extractors with in the current directory.
355
+
356
+ Args:
357
+ current_directory (str): Current directory to register extractors found
358
+ venvs (List[str]): List of virtual environments
359
+ extractor_files (List[str]): List of extractor files found
360
+ extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
361
+ logger (Logger): Logger to use
362
+ default_loaded_modules (Set[str]): Set of default loaded modules
363
+ """
314
364
  package_name = os.path.basename(current_directory)
315
365
  parent_directory = os.path.dirname(current_directory)
316
366
  if venvs and package_name in sys.modules:
@@ -413,6 +463,17 @@ def import_extractors(
413
463
  python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
414
464
  skip_install: bool = False,
415
465
  ):
466
+ """Import extractors in a given directory.
467
+
468
+ Args:
469
+ extractor_module_callback (Callable[[ModuleType, str], bool]): Callback used to register extractors
470
+ root_directory (str): Root directory to look for extractors
471
+ scanner (yara.Rules): Scanner to look for extractors that match YARA rule
472
+ create_venv (bool): Create/Use virtual environments
473
+ logger (Logger): Logger to use
474
+ python_version (str): Version of python to use when creating virtual environments
475
+ skip_install (bool): Skip installation of Python dependencies for extractors
476
+ """
416
477
  extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
417
478
 
418
479
  logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
@@ -448,7 +509,24 @@ def run_extractor(
448
509
  venv_script=VENV_SCRIPT,
449
510
  json_decoder=Base64Decoder,
450
511
  ) -> Union[Dict[str, dict], model.ExtractorModel]:
451
- """Runs the maco extractor against sample either in current process or child process."""
512
+ """Runs the maco extractor against sample either in current process or child process.
513
+
514
+ Args:
515
+ sample_path (str): Path to sample
516
+ module_name (str): Name of extractor module
517
+ extractor_class (str): Name of extractor class in module
518
+ module_path (str): Path to Python module containing extractor
519
+ venv (str): Path to virtual environment associated to extractor
520
+ venv_script (str): Script to run extractor in a virtual environment
521
+ json_decoder (Base64Decoder): Decoder used for JSON
522
+
523
+ Raises:
524
+ AnalysisAbortedException: Raised when extractor voluntarily terminates execution
525
+ Exception: Raised when extractor raises an exception
526
+
527
+ Returns:
528
+ Union[Dict[str, dict], model.ExtractorModel]: Results from extractor
529
+ """
452
530
  if not venv:
453
531
  key = f"{module_name}_{extractor_class}"
454
532
  if key not in _loaded_extractors:
maco/yara.py CHANGED
@@ -1,25 +1,34 @@
1
+ """yara-python facade that uses yara-x."""
2
+
1
3
  import re
2
4
  from collections import namedtuple
3
5
  from itertools import cycle
4
- from typing import Dict
6
+ from typing import Dict, List
5
7
 
6
8
  import yara_x
7
9
 
8
- RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
9
-
10
+ from maco.exceptions import SyntaxError
10
11
 
11
- class SyntaxError(Exception): ...
12
+ RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
12
13
 
13
14
 
14
15
  # Create interfaces that resembles yara-python (but is running yara-x under the hood)
15
16
  class StringMatchInstance:
17
+ """Instance of a string match."""
18
+
16
19
  def __init__(self, match: yara_x.Match, file_content: bytes):
20
+ """Initializes StringMatchInstance."""
17
21
  self.matched_data = file_content[match.offset : match.offset + match.length]
18
22
  self.matched_length = match.length
19
23
  self.offset = match.offset
20
24
  self.xor_key = match.xor_key
21
25
 
22
26
  def plaintext(self) -> bytes:
27
+ """Plaintext of the matched data.
28
+
29
+ Returns:
30
+ (bytes): Plaintext of the matched cipher text
31
+ """
23
32
  if not self.xor_key:
24
33
  # No need to XOR the matched data
25
34
  return self.matched_data
@@ -28,17 +37,28 @@ class StringMatchInstance:
28
37
 
29
38
 
30
39
  class StringMatch:
40
+ """String match."""
41
+
31
42
  def __init__(self, pattern: yara_x.Pattern, file_content: bytes):
43
+ """Initializes StringMatch."""
32
44
  self.identifier = pattern.identifier
33
45
  self.instances = [StringMatchInstance(match, file_content) for match in pattern.matches]
34
46
  self._is_xor = any([match.xor_key for match in pattern.matches])
35
47
 
36
48
  def is_xor(self):
49
+ """Checks if string match is xor'd.
50
+
51
+ Returns:
52
+ (bool): True if match is xor'd
53
+ """
37
54
  return self._is_xor
38
55
 
39
56
 
40
57
  class Match:
58
+ """Match."""
59
+
41
60
  def __init__(self, rule: yara_x.Rule, file_content: bytes):
61
+ """Initializes Match."""
42
62
  self.rule = rule.identifier
43
63
  self.namespace = rule.namespace
44
64
  self.tags = list(rule.tags) or []
@@ -50,7 +70,14 @@ class Match:
50
70
 
51
71
 
52
72
  class Rules:
73
+ """Rules."""
74
+
53
75
  def __init__(self, source: str = None, sources: Dict[str, str] = None):
76
+ """Initializes Rules.
77
+
78
+ Raises:
79
+ SyntaxError: Raised when there's a syntax error in the YARA rule.
80
+ """
54
81
  Rule = namedtuple("Rule", "identifier namespace is_global")
55
82
  if source:
56
83
  sources = {"default": source}
@@ -69,10 +96,20 @@ class Rules:
69
96
  raise SyntaxError(e)
70
97
 
71
98
  def __iter__(self):
99
+ """Iterate over rules.
100
+
101
+ Yields:
102
+ YARA rules
103
+ """
72
104
  for rule in self._rules:
73
105
  yield rule
74
106
 
75
- def match(self, filepath: str = None, data: bytes = None):
107
+ def match(self, filepath: str = None, data: bytes = None) -> List[Match]:
108
+ """Performs a scan to check for YARA rules matches based on the file, either given by path or buffer.
109
+
110
+ Returns:
111
+ (List[Match]): A list of YARA matches.
112
+ """
76
113
  if filepath:
77
114
  with open(filepath, "rb") as fp:
78
115
  data = fp.read()
@@ -81,4 +118,9 @@ class Rules:
81
118
 
82
119
 
83
120
  def compile(source: str = None, sources: Dict[str, str] = None) -> Rules:
121
+ """Compiles YARA rules from source or from sources.
122
+
123
+ Returns:
124
+ (Rules): a Rules object
125
+ """
84
126
  return Rules(source, sources)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: maco
3
- Version: 1.2.13
3
+ Version: 1.2.15
4
4
  Author: sl-govau
5
5
  Maintainer: cccs-rs
6
6
  License: MIT License
@@ -33,9 +33,9 @@ Requires-Dist: cart
33
33
  Requires-Dist: pydantic>=2.0.0
34
34
  Requires-Dist: tomli>=1.1.0; python_version < "3.11"
35
35
  Requires-Dist: uv
36
- Requires-Dist: yara-python
37
36
  Requires-Dist: yara-x==0.11.0
38
37
  Requires-Dist: multiprocess>=0.70.17
38
+ Dynamic: license-file
39
39
 
40
40
  # Maco - Malware config extractor framework
41
41
 
@@ -70,7 +70,6 @@ This framework is actively being used by:
70
70
  | <a href="https://cybercentrecanada.github.io/assemblyline4_docs/"><img src="https://images.weserv.nl/?url=cybercentrecanada.github.io/assemblyline4_docs/images/crane.png?v=4&h=100&w=100&fit=cover&maxage=7d"></a> | A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format | [![License](https://img.shields.io/github/license/CybercentreCanada/assemblyline)](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md) |
71
71
  | [configextractor-py](https://github.com/CybercentreCanada/configextractor-py) | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization | [![License](https://img.shields.io/github/license/CybercentreCanada/configextractor-py)](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
72
72
  | <a href="https://github.com/jeFF0Falltrades/rat_king_parser"><img src="https://images.weserv.nl/?url=raw.githubusercontent.com/jeFF0Falltrades/rat_king_parser/master/.github/logo.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO | [![License](https://img.shields.io/github/license/jeFF0Falltrades/rat_king_parser)](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE) |
73
- | <a href="https://github.com/apophis133/apophis-YARA-Rules"><img src="https://images.weserv.nl/?url=github.com/apophis133.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A parser/extractor repository that supports MACO for performing malware configuration extraction with YARA rule detection | |
74
73
  | <a href="https://github.com/CAPESandbox/community"><img src="https://images.weserv.nl/?url=github.com/CAPESandbox.png?v=4&h=100&w=100&fit=cover&maxage=7d0&mask=circle"/> </a> | A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.<br>**Note: These MACO extractors wrap and parse the original CAPE extractors.** | [![License](https://img.shields.io/badge/license-GPL--3.0-informational)](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE) |
75
74
 
76
75
  ## Model Example
@@ -0,0 +1,49 @@
1
+ demo_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ demo_extractors/elfy.py,sha256=PQpX956WaKmGzxF0pvpXECbwSPF_j3-_SElvboYPlF8,1083
3
+ demo_extractors/limit_other.py,sha256=lR0-7KPPDyl2UK917ev7ALhqvnPcFGsUObq7b-dESBE,1718
4
+ demo_extractors/nothing.py,sha256=0pLL9vZESWSdNOmtzTv33Ird0QaQUmXmeW_rwu6MExU,784
5
+ demo_extractors/requirements.txt,sha256=nD7BPNv7YEPUr9MDcaKYNs2UfHtxvN8FOKKesgC_L5g,50
6
+ demo_extractors/shared.py,sha256=GxdUKic4N1Bu2dODo-zjvm8JMLxFIXGkgoz4PUBo-Xw,432
7
+ demo_extractors/terminator.py,sha256=nxoZYRteYDQS7wp-aAsCaxCSJ9FSE54jPrW3fJpRVho,925
8
+ demo_extractors/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ demo_extractors/complex/complex.py,sha256=GYKmPOD8-fyVHxwjZb-3t1IghKVMuLtdUvCs5C5yPe0,2625
10
+ demo_extractors/complex/complex_utils.py,sha256=5kdMl-niSH9d-d3ChuItpmlPT4U-S9g-iyBZlR4tfmQ,296
11
+ maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ maco/base_test.py,sha256=DrVE7vOazeLQpOQeIDwBYK1WtlmdJrRe50JOqP5t4Y0,3198
13
+ maco/cli.py,sha256=nrSukAJAthbstZT3-lQNPz4zOOMcBhvfYQqLh_B5Jdk,9457
14
+ maco/collector.py,sha256=R3zw-fUJBlwmcSqvkQ-PnoJdHfRm2V0JAOl7N8MTAbY,8240
15
+ maco/exceptions.py,sha256=XBHUrs1kr1ZayPI9B_W-WejKgVmC8sWL_o4RL0b4DQE,745
16
+ maco/extractor.py,sha256=s36aGcsXSc-9iCik6iihVt5G1a1DZUA7TquvWYQNwdE,2912
17
+ maco/utils.py,sha256=rXKrrKTNi7DEC5SZUnUQcxnRRmJXRp0y4DuVaDkBYvY,24977
18
+ maco/yara.py,sha256=gkHHxwZNxzZV7nHZM3HNUmhHXB7VW82voCHK5mHpt2Q,3970
19
+ maco/model/__init__.py,sha256=ULdyHx8R5D2ICHZo3VoCk1YTlewTok36TYIpwx__pNY,45
20
+ maco/model/model.py,sha256=whdeqwphReHpgQ5f6kODB7pQI3UEylTTiVqNq_FHNBg,24156
21
+ maco-1.2.15.dist-info/licenses/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
22
+ model_setup/maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ model_setup/maco/base_test.py,sha256=DrVE7vOazeLQpOQeIDwBYK1WtlmdJrRe50JOqP5t4Y0,3198
24
+ model_setup/maco/cli.py,sha256=nrSukAJAthbstZT3-lQNPz4zOOMcBhvfYQqLh_B5Jdk,9457
25
+ model_setup/maco/collector.py,sha256=R3zw-fUJBlwmcSqvkQ-PnoJdHfRm2V0JAOl7N8MTAbY,8240
26
+ model_setup/maco/exceptions.py,sha256=XBHUrs1kr1ZayPI9B_W-WejKgVmC8sWL_o4RL0b4DQE,745
27
+ model_setup/maco/extractor.py,sha256=s36aGcsXSc-9iCik6iihVt5G1a1DZUA7TquvWYQNwdE,2912
28
+ model_setup/maco/utils.py,sha256=rXKrrKTNi7DEC5SZUnUQcxnRRmJXRp0y4DuVaDkBYvY,24977
29
+ model_setup/maco/yara.py,sha256=gkHHxwZNxzZV7nHZM3HNUmhHXB7VW82voCHK5mHpt2Q,3970
30
+ model_setup/maco/model/__init__.py,sha256=ULdyHx8R5D2ICHZo3VoCk1YTlewTok36TYIpwx__pNY,45
31
+ model_setup/maco/model/model.py,sha256=whdeqwphReHpgQ5f6kODB7pQI3UEylTTiVqNq_FHNBg,24156
32
+ pipelines/publish.yaml,sha256=xt3WNU-5kIICJgKIiiE94M3dWjS3uEiun-n4OmIssK8,1471
33
+ pipelines/test.yaml,sha256=btJVI-R39UBeYosGu7TOpU6V9ogFW3FT3ROtWygQGQ0,1472
34
+ tests/data/example.txt.cart,sha256=j4ZdDnFNVq7lb-Qi4pY4evOXKQPKG-GSg-n-uEqPhV0,289
35
+ tests/data/trigger_complex.txt,sha256=uqnLSrnyDGCmXwuPmZ2s8vdhH0hJs8DxvyaW_tuYY24,64
36
+ tests/data/trigger_complex.txt.cart,sha256=Z7qF1Zi640O45Znkl9ooP2RhSLAEqY0NRf51d-q7utU,345
37
+ tests/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ tests/extractors/basic.py,sha256=BpOgVoeeAYoRF4PYDP4llS0GrvlqcEKw1588RsnSHFc,952
39
+ tests/extractors/basic_longer.py,sha256=2I7wWJugOB9tHtgdIvG9crbV9pEuDsuvr9OR-aHRRbs,990
40
+ tests/extractors/test_basic.py,sha256=RZPKBP6we2DlY2qpbxYjvf8u-TPcD96ofphLQ117WPk,775
41
+ tests/extractors/bob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ tests/extractors/bob/bob.py,sha256=4fpqy_O6NDinJImghyW5OwYgnaB05aY4kgoIS_C3c_U,253
43
+ tests/extractors/import_rewriting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ tests/extractors/import_rewriting/importer.py,sha256=wqF1AG2zXXuj9EMt9qlDorab-UD0GYuFggtrCuz4sf0,289735
45
+ maco-1.2.15.dist-info/METADATA,sha256=BBjPNqDyPQPPwFHL0G0LqOrM2zYURFENydH2K63J6aU,15232
46
+ maco-1.2.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ maco-1.2.15.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
48
+ maco-1.2.15.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
49
+ maco-1.2.15.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +1,6 @@
1
1
  """Foundation for unit testing an extractor.
2
2
 
3
3
  Example:
4
-
5
4
  from maco import base_test
6
5
  class TestExample(base_test.BaseTest):
7
6
  name = "Example"
@@ -20,13 +19,12 @@ import unittest
20
19
  import cart
21
20
 
22
21
  from maco import collector
23
-
24
-
25
- class NoHitException(Exception):
26
- pass
22
+ from maco.exceptions import NoHitException
27
23
 
28
24
 
29
25
  class BaseTest(unittest.TestCase):
26
+ """Base test class."""
27
+
30
28
  name: str = None # name of the extractor
31
29
  # folder and/or file where extractor is.
32
30
  # I recommend something like os.path.join(__file__, "../../extractors")
@@ -36,6 +34,11 @@ class BaseTest(unittest.TestCase):
36
34
 
37
35
  @classmethod
38
36
  def setUpClass(cls) -> None:
37
+ """Initialization of class.
38
+
39
+ Raises:
40
+ Exception: when name or path is not set.
41
+ """
39
42
  if not cls.name or not cls.path:
40
43
  raise Exception("name and path must be set")
41
44
  cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
@@ -47,7 +50,11 @@ class BaseTest(unittest.TestCase):
47
50
  self.assertEqual(len(self.c.extractors), 1)
48
51
 
49
52
  def extract(self, stream):
50
- """Return results for running extractor over stream, including yara check."""
53
+ """Return results for running extractor over stream, including yara check.
54
+
55
+ Raises:
56
+ NoHitException: when yara rule doesn't hit.
57
+ """
51
58
  runs = self.c.match(stream)
52
59
  if not runs:
53
60
  raise NoHitException("no yara rule hit")
@@ -65,7 +72,17 @@ class BaseTest(unittest.TestCase):
65
72
 
66
73
  @classmethod
67
74
  def load_cart(cls, filepath: str) -> io.BytesIO:
68
- """Load and unneuter a test file (likely malware) into memory for processing."""
75
+ """Load and unneuter a test file (likely malware) into memory for processing.
76
+
77
+ Args:
78
+ filepath (str): Path to carted sample
79
+
80
+ Returns:
81
+ (io.BytesIO): Buffered stream containing the un-carted sample
82
+
83
+ Raises:
84
+ FileNotFoundError: if the path to the sample doesn't exist
85
+ """
69
86
  # it is nice if we can load files relative to whatever is implementing base_test
70
87
  dirpath = os.path.split(cls._get_location())[0]
71
88
  # either filepath is absolute, or should be loaded relative to child of base_test
model_setup/maco/cli.py CHANGED
@@ -3,19 +3,18 @@
3
3
  import argparse
4
4
  import base64
5
5
  import binascii
6
- import cart
7
6
  import hashlib
8
7
  import io
9
8
  import json
10
9
  import logging
11
10
  import os
12
11
  import sys
13
-
14
12
  from importlib.metadata import version
15
13
  from typing import BinaryIO, List, Tuple
16
14
 
17
- from maco import collector
15
+ import cart
18
16
 
17
+ from maco import collector
19
18
 
20
19
  logger = logging.getLogger("maco.lib.cli")
21
20
 
@@ -29,7 +28,20 @@ def process_file(
29
28
  force: bool,
30
29
  include_base64: bool,
31
30
  ):
32
- """Process a filestream with the extractors and rules."""
31
+ """Process a filestream with the extractors and rules.
32
+
33
+ Args:
34
+ collected (collector.Collector): a Collector instance
35
+ path_file (str): path to sample to be analyzed
36
+ stream (BinaryIO): binary stream to be analyzed
37
+ pretty (bool): Pretty print the JSON output
38
+ force (bool): Run all extractors regardless of YARA rule match
39
+ include_base64 (bool): include base64'd data in output
40
+
41
+ Returns:
42
+ (dict): The output from the extractors analyzing the sample
43
+
44
+ """
33
45
  unneutered = io.BytesIO()
34
46
  try:
35
47
  cart.unpack_stream(stream, unneutered)
@@ -98,7 +110,8 @@ def process_filesystem(
98
110
  ) -> Tuple[int, int, int]:
99
111
  """Process filesystem with extractors and print results of extraction.
100
112
 
101
- Returns total number of analysed files, yara hits and successful maco extractions.
113
+ Returns:
114
+ (Tuple[int, int, int]): Total number of analysed files, yara hits and successful maco extractions.
102
115
  """
103
116
  if force:
104
117
  logger.warning("force execute will cause errors if an extractor requires a yara rule hit during execution")
@@ -163,6 +176,7 @@ def process_filesystem(
163
176
 
164
177
 
165
178
  def main():
179
+ """Main block for CLI."""
166
180
  parser = argparse.ArgumentParser(description="Run extractors over samples.")
167
181
  parser.add_argument("extractors", type=str, help="path to extractors")
168
182
  parser.add_argument("samples", type=str, help="path to samples")
@@ -13,18 +13,20 @@ from multiprocess import Manager, Process, Queue
13
13
  from pydantic import BaseModel
14
14
 
15
15
  from maco import extractor, model, utils, yara
16
- from maco.exceptions import AnalysisAbortedException
17
-
18
-
19
- class ExtractorLoadError(Exception):
20
- pass
21
-
16
+ from maco.exceptions import AnalysisAbortedException, ExtractorLoadError
22
17
 
23
18
  logger = logging.getLogger("maco.lib.helpers")
24
19
 
25
20
 
26
21
  def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
27
- """Enforce types and verify properties, and remove defaults."""
22
+ """Enforce types and verify properties, and remove defaults.
23
+
24
+ Args:
25
+ resp (Union[BaseModel, dict])): results from extractor
26
+
27
+ Returns:
28
+ (Dict): results from extractor after verification
29
+ """
28
30
  if not resp:
29
31
  return None
30
32
  # check the response is valid for its own model
@@ -62,6 +64,8 @@ class ExtractorRegistration(TypedDict):
62
64
 
63
65
 
64
66
  class Collector:
67
+ """Discover and load extractors from file system."""
68
+
65
69
  def __init__(
66
70
  self,
67
71
  path_extractors: str,
@@ -70,7 +74,11 @@ class Collector:
70
74
  create_venv: bool = False,
71
75
  skip_install: bool = False,
72
76
  ):
73
- """Discover and load extractors from file system."""
77
+ """Discover and load extractors from file system.
78
+
79
+ Raises:
80
+ ExtractorLoadError: when no extractors are found
81
+ """
74
82
  # maco requires the extractor to be imported directly, so ensure they are available on the path
75
83
  full_path_extractors = os.path.abspath(path_extractors)
76
84
  full_path_above_extractors = os.path.dirname(full_path_extractors)
@@ -175,7 +183,15 @@ class Collector:
175
183
  stream: BinaryIO,
176
184
  extractor_name: str,
177
185
  ) -> Dict[str, Any]:
178
- """Run extractor with stream and verify output matches the model."""
186
+ """Run extractor with stream and verify output matches the model.
187
+
188
+ Args:
189
+ stream (BinaryIO): Binary stream to analyze
190
+ extractor_name (str): Name of extractor to analyze stream
191
+
192
+ Returns:
193
+ (Dict[str, Any]): Results from extractor
194
+ """
179
195
  extractor = self.extractors[extractor_name]
180
196
  try:
181
197
  # Run extractor on a copy of the sample
@@ -1,3 +1,33 @@
1
+ """Exception classes for extractors."""
2
+
3
+
1
4
  # Can be raised by extractors to abort analysis of a sample
2
5
  # ie. Can abort if preliminary checks at start of run indicate the file shouldn't be analyzed by extractor
3
- class AnalysisAbortedException(Exception): ...
6
+ class AnalysisAbortedException(Exception):
7
+ """Raised when extractors voluntarily abort analysis of a sample."""
8
+
9
+ pass
10
+
11
+
12
+ class ExtractorLoadError(Exception):
13
+ """Raised when extractors cannot be loaded."""
14
+
15
+ pass
16
+
17
+
18
+ class InvalidExtractor(ValueError):
19
+ """Raised when an extractor is invalid."""
20
+
21
+ pass
22
+
23
+
24
+ class NoHitException(Exception):
25
+ """Raised when the YARA rule of an extractor doesn't hit."""
26
+
27
+ pass
28
+
29
+
30
+ class SyntaxError(Exception):
31
+ """Raised when there's a syntax error in the YARA rule."""
32
+
33
+ pass
@@ -4,14 +4,8 @@ import logging
4
4
  import textwrap
5
5
  from typing import BinaryIO, List, Optional, Union
6
6
 
7
- from maco import yara
8
-
9
- from . import model
10
-
11
-
12
- class InvalidExtractor(ValueError):
13
- pass
14
-
7
+ from maco import model, yara
8
+ from maco.exceptions import InvalidExtractor
15
9
 
16
10
  DEFAULT_YARA_RULE = """
17
11
  rule {name}
@@ -37,6 +31,11 @@ class Extractor:
37
31
  logger: logging.Logger = None # logger for use when debugging
38
32
 
39
33
  def __init__(self) -> None:
34
+ """Initialise the extractor.
35
+
36
+ Raises:
37
+ InvalidExtractor: When the extractor is invalid.
38
+ """
40
39
  self.name = name = type(self).__name__
41
40
  self.logger = logging.getLogger(f"maco.extractor.{name}")
42
41
  self.logger.debug(f"initialise '{name}'")