maco 1.2.4__py3-none-any.whl → 1.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
maco/utils.py CHANGED
@@ -4,14 +4,14 @@ import importlib.machinery
4
4
  import importlib.util
5
5
  import inspect
6
6
  import json
7
+ import logging
7
8
  import logging.handlers
9
+ import multiprocessing
8
10
  import os
9
11
  import re
10
12
  import shutil
11
13
  import subprocess
12
14
  import sys
13
- import multiprocessing
14
- import logging
15
15
  import tempfile
16
16
 
17
17
  from maco import yara
@@ -27,8 +27,11 @@ from glob import glob
27
27
  from logging import Logger
28
28
  from pkgutil import walk_packages
29
29
  from types import ModuleType
30
- from typing import Callable, Dict, List, Set, Tuple
30
+ from typing import Callable, Dict, List, Set, Tuple, Union
31
+
32
+ from uv import find_uv_bin
31
33
 
34
+ from maco import model
32
35
  from maco.extractor import Extractor
33
36
 
34
37
  logger = logging.getLogger("maco.lib.utils")
@@ -38,22 +41,10 @@ VENV_DIRECTORY_NAME = ".venv"
38
41
  RELATIVE_FROM_RE = re.compile(r"from (\.+)")
39
42
  RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
40
43
 
41
- try:
42
- # Attempt to use the uv package manager (Recommended)
43
- from uv import find_uv_bin
44
+ UV_BIN = find_uv_bin()
44
45
 
45
- UV_BIN = find_uv_bin()
46
-
47
- PIP_CMD = f"{UV_BIN} pip"
48
- VENV_CREATE_CMD = f"{UV_BIN} venv"
49
- PACKAGE_MANAGER = "uv"
50
- except ImportError:
51
- # Otherwise default to pip
52
- from sys import executable
53
-
54
- PIP_CMD = "pip"
55
- VENV_CREATE_CMD = f"{executable} -m venv"
56
- PACKAGE_MANAGER = "pip"
46
+ PIP_CMD = f"{UV_BIN} pip"
47
+ VENV_CREATE_CMD = f"{UV_BIN} venv"
57
48
 
58
49
 
59
50
  class Base64Decoder(json.JSONDecoder):
@@ -189,11 +180,9 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
189
180
  for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
190
181
  for match in pattern.findall(data):
191
182
  depth = match.count(".")
192
- data = data.replace(
193
- f"from {match}",
194
- f"from {'.'.join(split[depth - 1 : split.index(package) + 1][::-1])}{'.' if pattern == RELATIVE_FROM_RE else ''}",
195
- 1,
196
- )
183
+ abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
184
+ abspath += "." if pattern == RELATIVE_FROM_RE else ""
185
+ data = data.replace(f"from {match}", f"from {abspath}", 1)
197
186
  f.write(data)
198
187
 
199
188
  if scanner.match(path):
@@ -210,9 +199,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
210
199
  return extractor_dirs, extractor_files
211
200
 
212
201
 
213
- def create_virtual_environments(directories: List[str], python_version: str, logger: Logger):
202
+ def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
214
203
  venvs = []
215
- logger.info("Creating virtual environment(s)..")
216
204
  env = deepcopy(os.environ)
217
205
  stop_directory = os.path.dirname(sorted(directories)[0])
218
206
  # Track directories that we've already visited
@@ -222,17 +210,23 @@ def create_virtual_environments(directories: List[str], python_version: str, log
222
210
  while dir != stop_directory and dir not in visited_dirs:
223
211
  req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
224
212
  if req_files:
225
- venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
226
- env.update({"VIRTUAL_ENV": venv_path})
227
- # Create a virtual environment for the directory
228
- if not os.path.exists(venv_path):
229
- cmd = VENV_CREATE_CMD
230
- if PACKAGE_MANAGER == "uv":
231
- cmd += f" --python {python_version}"
232
- subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
213
+ # create a virtual environment, otherwise directly install into current env
214
+ if create_venv:
215
+ venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
216
+ logger.info(f"Updating virtual environment {venv_path}")
217
+ env.update({"VIRTUAL_ENV": venv_path})
218
+ # Create a virtual environment for the directory
219
+ if not os.path.exists(venv_path):
220
+ cmd = f"{VENV_CREATE_CMD} --python {python_version}"
221
+ subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
233
222
 
234
223
  # Install/Update the packages in the environment
235
- install_command = PIP_CMD.split(" ") + ["install", "-U"]
224
+ install_command = PIP_CMD.split(" ") + ["install"]
225
+ # When running locally, only install packages to required spec.
226
+ # This prevents issues during maco development and building extractors against local libraries.
227
+ if create_venv:
228
+ # when running in custom virtual environment, always upgrade packages.
229
+ install_command.append("-U")
236
230
 
237
231
  # Update the pip install command depending on where the dependencies are coming from
238
232
  if "requirements.txt" in req_files:
@@ -253,7 +247,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
253
247
 
254
248
  install_command.extend(pyproject_command)
255
249
 
250
+ # always require maco to be installed
251
+ install_command.append("maco")
256
252
  logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
253
+ # this uses VIRTUAL_ENV to control usage of a virtual environment
257
254
  p = subprocess.run(
258
255
  install_command,
259
256
  cwd=dir,
@@ -264,10 +261,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
264
261
  if b"is being installed using the legacy" in p.stderr:
265
262
  # Ignore these types of errors
266
263
  continue
267
- logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
264
+ logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
268
265
  else:
269
- logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
270
- venvs.append(venv_path)
266
+ logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
267
+ if create_venv:
268
+ venvs.append(venv_path)
271
269
 
272
270
  # Cleanup any build directories that are the product of package installation
273
271
  expected_build_path = os.path.join(dir, "build")
@@ -311,10 +309,12 @@ def register_extractors(
311
309
  ):
312
310
  package_name = os.path.basename(current_directory)
313
311
  parent_directory = os.path.dirname(current_directory)
314
- if package_name in sys.modules:
312
+ if venvs and package_name in sys.modules:
315
313
  # this may happen as part of testing if some part of the extractor code was directly imported
316
- logger.warning(f"Looks like {package_name} is already loaded. "
317
- "If your maco extractor overlaps an existing package name this could cause problems.")
314
+ logger.warning(
315
+ f"Looks like {package_name} is already loaded. "
316
+ "If your maco extractor overlaps an existing package name this could cause problems."
317
+ )
318
318
 
319
319
  try:
320
320
  # Modify the PATH so we can recognize this new package on import
@@ -389,6 +389,7 @@ def register_extractors(
389
389
  # We were able to find all the extractor files
390
390
  break
391
391
 
392
+
392
393
  def proxy_logging(queue: multiprocessing.Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
393
394
  """Ensures logging is set up correctly for a child process and then executes the callback."""
394
395
  logger = logging.getLogger()
@@ -397,37 +398,33 @@ def proxy_logging(queue: multiprocessing.Queue, callback: Callable[[ModuleType,
397
398
  logger.addHandler(qh)
398
399
  callback(*args, **kwargs, logger=logger)
399
400
 
401
+
400
402
  def import_extractors(
401
403
  extractor_module_callback: Callable[[ModuleType, str], bool],
402
404
  *,
403
405
  root_directory: str,
404
406
  scanner: yara.Rules,
405
- create_venv: bool = False,
406
- python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
407
+ create_venv: bool,
407
408
  logger: Logger,
409
+ python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
408
410
  ):
409
411
  extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
410
412
 
411
413
  logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
412
414
  logger.debug(extractor_files)
413
415
 
414
- venvs = []
415
- if create_venv:
416
- venvs = create_virtual_environments(extractor_dirs, python_version, logger)
417
- else:
418
- # Look for pre-existing virtual environments, if any
419
- logger.info("Checking for pre-existing virtual environment(s)..")
420
- venvs = [
421
- os.path.join(root, VENV_DIRECTORY_NAME)
422
- for root, dirs, _ in os.walk(root_directory)
423
- if VENV_DIRECTORY_NAME in dirs
424
- ]
416
+ # Install packages into the current environment or dynamically created virtual environments
417
+ venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
425
418
 
426
419
  # With the environment prepared, we can now hunt for the extractors and register them
427
420
  logger.info("Registering extractors..")
428
421
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
429
422
 
430
423
 
424
+ # holds cached extractors when not running in venv mode
425
+ _loaded_extractors: Dict[str, Extractor] = {}
426
+
427
+
431
428
  def run_extractor(
432
429
  sample_path,
433
430
  module_name,
@@ -436,55 +433,69 @@ def run_extractor(
436
433
  venv,
437
434
  venv_script=VENV_SCRIPT,
438
435
  json_decoder=Base64Decoder,
439
- ) -> Dict[str, dict]:
440
- # Write temporary script in the same directory as extractor to resolve relative imports
441
- python_exe = sys.executable
442
- if venv:
443
- # If there is a linked virtual environment, execute within that environment
436
+ ) -> Union[Dict[str, dict], model.ExtractorModel]:
437
+ """Runs the maco extractor against sample either in current process or child process."""
438
+ if not venv:
439
+ key = f"{module_name}_{extractor_class}"
440
+ if key not in _loaded_extractors:
441
+ # dynamic import of extractor
442
+ mod = importlib.import_module(module_name)
443
+ extractor_cls = mod.__getattribute__(extractor_class)
444
+ extractor = extractor_cls()
445
+ else:
446
+ # retrieve cached extractor
447
+ extractor = _loaded_extractors[key]
448
+ if extractor.yara_compiled:
449
+ matches = extractor.yara_compiled.match(sample_path)
450
+ loaded = extractor.run(open(sample_path, "rb"), matches=matches)
451
+ else:
452
+ # execute extractor in child process with separate virtual environment
453
+ # Write temporary script in the same directory as extractor to resolve relative imports
444
454
  python_exe = os.path.join(venv, "bin", "python")
445
- dirname = os.path.dirname(module_path)
446
- with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
447
- with tempfile.NamedTemporaryFile() as output:
448
- parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
449
- root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
450
-
451
- script.write(
452
- venv_script.format(
453
- parent_package_path=parent_package_path,
454
- module_name=module_name,
455
- module_class=extractor_class,
456
- sample_path=sample_path,
457
- output_path=output.name,
455
+ dirname = os.path.dirname(module_path)
456
+ with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
457
+ with tempfile.NamedTemporaryFile() as output:
458
+ parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
459
+ root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
460
+
461
+ script.write(
462
+ venv_script.format(
463
+ parent_package_path=parent_package_path,
464
+ module_name=module_name,
465
+ module_class=extractor_class,
466
+ sample_path=sample_path,
467
+ output_path=output.name,
468
+ )
458
469
  )
459
- )
460
- script.flush()
461
- cwd = root_directory
462
- custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
463
-
464
- if custom_module.startswith("src."):
465
- # src layout found, which means the actual module content is within 'src' directory
466
- custom_module = custom_module[4:]
467
- cwd = os.path.join(cwd, "src")
468
-
469
- proc = subprocess.run(
470
- [python_exe, "-m", custom_module],
471
- cwd=cwd,
472
- capture_output=True,
473
- )
474
- stderr = proc.stderr.decode()
475
- try:
476
- # Load results and return them
477
- output.seek(0)
478
- loaded = json.load(output, cls=json_decoder)
479
- except Exception as e:
480
- # If there was an error raised during runtime, then propagate
481
- delim = f'File "{module_path}"'
482
- exception = stderr
483
- if delim in exception:
484
- exception = f"{delim}{exception.split(delim, 1)[1]}"
485
- # print extractor logging at error level
486
- logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
487
- raise Exception(exception) from e
488
- # ensure that extractor logging is available
489
- logger.info(f"maco extractor stderr:\n{stderr}")
490
- return loaded
470
+ script.flush()
471
+ cwd = root_directory
472
+ custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
473
+
474
+ if custom_module.startswith("src."):
475
+ # src layout found, which means the actual module content is within 'src' directory
476
+ custom_module = custom_module[4:]
477
+ cwd = os.path.join(cwd, "src")
478
+
479
+ # run the maco extractor in full venv process isolation (slow)
480
+ proc = subprocess.run(
481
+ [python_exe, "-m", custom_module],
482
+ cwd=cwd,
483
+ capture_output=True,
484
+ )
485
+ stderr = proc.stderr.decode()
486
+ try:
487
+ # Load results and return them
488
+ output.seek(0)
489
+ loaded = json.load(output, cls=json_decoder)
490
+ except Exception as e:
491
+ # If there was an error raised during runtime, then propagate
492
+ delim = f'File "{module_path}"'
493
+ exception = stderr
494
+ if delim in exception:
495
+ exception = f"{delim}{exception.split(delim, 1)[1]}"
496
+ # print extractor logging at error level
497
+ logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
498
+ raise Exception(exception) from e
499
+ # ensure that extractor logging is available
500
+ logger.info(f"maco extractor stderr:\n{stderr}")
501
+ return loaded
maco/yara.py CHANGED
@@ -3,7 +3,6 @@ from collections import namedtuple
3
3
  from itertools import cycle
4
4
  from typing import Dict
5
5
 
6
- import yara
7
6
  import yara_x
8
7
 
9
8
  RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.4
3
+ Version: 1.2.6
4
4
  Author: sl-govau
5
5
  Maintainer: cccs-rs
6
6
  License: MIT License
@@ -38,7 +38,7 @@ Requires-Dist: yara-x==0.11.0
38
38
 
39
39
  # Maco - Malware config extractor framework
40
40
 
41
- ## Maco is a framework for ***ma***lware ***co***nfig extractors.
41
+ ## Maco is a framework for <ins>ma</ins>lware <ins>co</ins>nfig extractors.
42
42
 
43
43
  It aims to solve two problems:
44
44
 
@@ -272,3 +272,15 @@ run Complex extractor from rules ['ComplexAlt']
272
272
  The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
273
273
 
274
274
  e.g. `maco demo_extractors demo_extractors`
275
+
276
+ # Contributions
277
+
278
+ Please use ruff to format and lint PRs. This may be the cause of PR test failures.
279
+
280
+ Ruff will attempt to fix most issues, but some may require manual resolution.
281
+
282
+ ```
283
+ pip install ruff
284
+ ruff format
285
+ ruff check --fix
286
+ ```
@@ -0,0 +1,44 @@
1
+ demo_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ demo_extractors/elfy.py,sha256=Jo_GKExCeFOKGENJnNM_9ONfJO7LQFucCNz0ryTAo9U,765
3
+ demo_extractors/limit_other.py,sha256=BWjeyOxB75kw4eRla5zvSzdcXtELOS8R6hc71rLPh1s,1295
4
+ demo_extractors/nothing.py,sha256=MNPlb0IsBjrlU5e438JlJ4DIKoBpBRAaYY3JhD3yHqk,601
5
+ demo_extractors/requirements.txt,sha256=E0tD6xBZldq6sQGTHng6k88lBeASOhmLJcdcjpcqBNE,6
6
+ demo_extractors/shared.py,sha256=2P1cyuRbHDvM9IRt3UZnwdyhxx7OWqNC83xLyV8Y190,305
7
+ demo_extractors/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ demo_extractors/complex/complex.py,sha256=tXrzj_zWIXbTOwj7Lezapk-qkrM-lfwcyjd5m-BYzdg,2322
9
+ demo_extractors/complex/complex_utils.py,sha256=aec8kJsYUrMPo-waihkVLt-0QpiOPkw7dDqfT9MNuHk,123
10
+ maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ maco/base_test.py,sha256=cjGLEy2c69wl9sjn74QFz7X-VxWOfdin4W8MvYsXc4Q,2718
12
+ maco/cli.py,sha256=NTzV8eu9V0qQNttRo592j-Rdzac7q1NAMraqJF2h_6k,8171
13
+ maco/collector.py,sha256=LraWYlCA72FCmQP0dHWc-ekd7R1SxR6h6rMD95_6mMs,7077
14
+ maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
15
+ maco/utils.py,sha256=vo8zGSoFP0k2APUlQXmLssdrVrknjS2YcpvbRM78J68,20480
16
+ maco/yara.py,sha256=8RVaGyeUWY5f8_wfQ25lDX1bcXsb_VoSja85ZC2SqGw,2913
17
+ maco/model/__init__.py,sha256=ULdyHx8R5D2ICHZo3VoCk1YTlewTok36TYIpwx__pNY,45
18
+ maco/model/model.py,sha256=4uY88WphbP3iu-L2WjuYwtgZCS_wNul_hr0bAVuTpvc,23740
19
+ model_setup/maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ model_setup/maco/base_test.py,sha256=cjGLEy2c69wl9sjn74QFz7X-VxWOfdin4W8MvYsXc4Q,2718
21
+ model_setup/maco/cli.py,sha256=NTzV8eu9V0qQNttRo592j-Rdzac7q1NAMraqJF2h_6k,8171
22
+ model_setup/maco/collector.py,sha256=LraWYlCA72FCmQP0dHWc-ekd7R1SxR6h6rMD95_6mMs,7077
23
+ model_setup/maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
24
+ model_setup/maco/utils.py,sha256=vo8zGSoFP0k2APUlQXmLssdrVrknjS2YcpvbRM78J68,20480
25
+ model_setup/maco/yara.py,sha256=8RVaGyeUWY5f8_wfQ25lDX1bcXsb_VoSja85ZC2SqGw,2913
26
+ model_setup/maco/model/__init__.py,sha256=ULdyHx8R5D2ICHZo3VoCk1YTlewTok36TYIpwx__pNY,45
27
+ model_setup/maco/model/model.py,sha256=4uY88WphbP3iu-L2WjuYwtgZCS_wNul_hr0bAVuTpvc,23740
28
+ pipelines/publish.yaml,sha256=xt3WNU-5kIICJgKIiiE94M3dWjS3uEiun-n4OmIssK8,1471
29
+ pipelines/test.yaml,sha256=btJVI-R39UBeYosGu7TOpU6V9ogFW3FT3ROtWygQGQ0,1472
30
+ tests/data/example.txt.cart,sha256=j4ZdDnFNVq7lb-Qi4pY4evOXKQPKG-GSg-n-uEqPhV0,289
31
+ tests/data/trigger_complex.txt,sha256=uqnLSrnyDGCmXwuPmZ2s8vdhH0hJs8DxvyaW_tuYY24,64
32
+ tests/data/trigger_complex.txt.cart,sha256=Z7qF1Zi640O45Znkl9ooP2RhSLAEqY0NRf51d-q7utU,345
33
+ tests/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ tests/extractors/basic.py,sha256=r5eLCL6Ynr14nCBgtbLvUbm0NdrXizyc9c-4xBCNShU,828
35
+ tests/extractors/basic_longer.py,sha256=1ClU2QD-Y0TOl_loNFvEqIEpTR5TSVJ6zg9ZmC-ESJo,860
36
+ tests/extractors/test_basic.py,sha256=FLKekfSGM69HaiF7Vu_7D7KDXHZko-9hZkMO8_DoyYA,697
37
+ tests/extractors/bob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ tests/extractors/bob/bob.py,sha256=G5aOoz58J0ZQK2_lA7HRxAzeLzBxssWxBTZcv1pSbi8,176
39
+ maco-1.2.6.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
40
+ maco-1.2.6.dist-info/METADATA,sha256=-ZxVnBbAHn-iGDizJJa8knrTf4DUArPzANp-SVqDzZ4,15855
41
+ maco-1.2.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
42
+ maco-1.2.6.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
43
+ maco-1.2.6.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
44
+ maco-1.2.6.dist-info/RECORD,,
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool = False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
model_setup/maco/cli.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """CLI example of how extractors can be executed."""
2
+
2
3
  import argparse
3
4
  import base64
4
5
  import binascii
@@ -150,6 +151,7 @@ def process_filesystem(
150
151
  logger.info(f"{num_analysed} analysed, {num_hits} hits, {num_extracted} extracted")
151
152
  return num_analysed, num_hits, num_extracted
152
153
 
154
+
153
155
  def main():
154
156
  parser = argparse.ArgumentParser(description="Run extractors over samples.")
155
157
  parser.add_argument("extractors", type=str, help="path to extractors")
@@ -165,7 +167,8 @@ def main():
165
167
  parser.add_argument(
166
168
  "--base64",
167
169
  action="store_true",
168
- help="Include base64 encoded binary data in output (can be large, consider printing to file rather than console)",
170
+ help="Include base64 encoded binary data in output "
171
+ "(can be large, consider printing to file rather than console)",
169
172
  )
170
173
  parser.add_argument("--logfile", type=str, help="file to log output")
171
174
  parser.add_argument("--include", type=str, help="comma separated extractors to run")
@@ -179,7 +182,9 @@ def main():
179
182
  parser.add_argument(
180
183
  "--create_venv",
181
184
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
185
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). "
186
+ "This runs much slower than the alternative but may be necessary "
187
+ "when there are many extractors with conflicting dependencies.",
183
188
  )
184
189
  args = parser.parse_args()
185
190
  inc = args.include.split(",") if args.include else []
@@ -225,7 +230,7 @@ def main():
225
230
  pretty=args.pretty,
226
231
  force=args.force,
227
232
  include_base64=args.base64,
228
- create_venv=args.create_venv
233
+ create_venv=args.create_venv,
229
234
  )
230
235
 
231
236
 
@@ -4,6 +4,7 @@ import inspect
4
4
  import logging
5
5
  import logging.handlers
6
6
  import os
7
+ import sys
7
8
  from multiprocessing import Manager, Process, Queue
8
9
  from tempfile import NamedTemporaryFile
9
10
  from types import ModuleType
@@ -48,6 +49,15 @@ class Collector:
48
49
  create_venv: bool = False,
49
50
  ):
50
51
  """Discover and load extractors from file system."""
52
+ # maco requires the extractor to be imported directly, so ensure they are available on the path
53
+ full_path_extractors = os.path.abspath(path_extractors)
54
+ full_path_above_extractors = os.path.dirname(full_path_extractors)
55
+ # Modify the PATH so we can recognize this new package on import
56
+ if full_path_extractors not in sys.path:
57
+ sys.path.insert(1, full_path_extractors)
58
+ if full_path_above_extractors not in sys.path:
59
+ sys.path.insert(1, full_path_above_extractors)
60
+
51
61
  path_extractors = os.path.realpath(path_extractors)
52
62
  self.path: str = path_extractors
53
63
  self.extractors: Dict[str, Dict[str, str]] = {}
@@ -89,7 +99,7 @@ class Collector:
89
99
 
90
100
  # multiprocess logging is awkward - set up a queue to ensure we can log
91
101
  logging_queue = Queue()
92
- queue_handler = logging.handlers.QueueListener(logging_queue,*logging.getLogger().handlers)
102
+ queue_handler = logging.handlers.QueueListener(logging_queue, *logging.getLogger().handlers)
93
103
  queue_handler.start()
94
104
 
95
105
  # Find the extractors within the given directory
@@ -51,14 +51,14 @@ class Extractor:
51
51
  # check yara rules conform to expected structure
52
52
  # we throw away these compiled rules as we need all rules in system compiled together
53
53
  try:
54
- rules = yara.compile(source=self.yara_rule)
54
+ self.yara_compiled = yara.compile(source=self.yara_rule)
55
55
  except yara.SyntaxError as e:
56
56
  raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
57
57
  # need to track which plugin owns the rules
58
- self.yara_rule_names = [x.identifier for x in rules]
59
- if not len(list(rules)):
58
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
59
+ if not len(list(self.yara_compiled)):
60
60
  raise InvalidExtractor(f"{name} must define at least one yara rule")
61
- for x in rules:
61
+ for x in self.yara_compiled:
62
62
  if x.is_global:
63
63
  raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
64
64
 
@@ -1 +1 @@
1
- from maco.model.model import *
1
+ from maco.model.model import * # noqa: F403