maco 1.2.4__py3-none-any.whl → 1.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  from io import BytesIO
2
2
  from typing import List, Optional
3
3
 
4
- from maco import extractor, model, yara
5
-
6
4
  from demo_extractors.complex import complex_utils
5
+ from maco import extractor, model, yara
7
6
 
8
7
 
9
8
  class Complex(extractor.Extractor):
@@ -1,9 +1,8 @@
1
1
  from io import BytesIO
2
2
  from typing import Dict, List, Optional
3
3
 
4
- from maco import extractor, model, yara
5
-
6
4
  from demo_extractors import shared
5
+ from maco import extractor, model, yara
7
6
 
8
7
 
9
8
  class LimitOther(extractor.Extractor):
@@ -24,6 +23,10 @@ class LimitOther(extractor.Extractor):
24
23
  """
25
24
 
26
25
  def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
26
+ # import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
27
+ # the tests that do direct importing
28
+ import httpx
29
+
27
30
  # use a custom model that inherits from ExtractorModel
28
31
  # this model defines what can go in the 'other' dict
29
32
  tmp = shared.MyCustomModel(family="specify_other")
@@ -0,0 +1 @@
1
+ httpx
demo_extractors/shared.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from typing import Optional
2
+
2
3
  import pydantic
3
4
 
4
5
  from maco import model
maco/base_test.py CHANGED
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool=False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
maco/cli.py CHANGED
@@ -179,7 +179,7 @@ def main():
179
179
  parser.add_argument(
180
180
  "--create_venv",
181
181
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
182
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
183
183
  )
184
184
  args = parser.parse_args()
185
185
  inc = args.include.split(",") if args.include else []
maco/extractor.py CHANGED
@@ -51,14 +51,14 @@ class Extractor:
51
51
  # check yara rules conform to expected structure
52
52
  # we throw away these compiled rules as we need all rules in system compiled together
53
53
  try:
54
- rules = yara.compile(source=self.yara_rule)
54
+ self.yara_compiled = yara.compile(source=self.yara_rule)
55
55
  except yara.SyntaxError as e:
56
56
  raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
57
57
  # need to track which plugin owns the rules
58
- self.yara_rule_names = [x.identifier for x in rules]
59
- if not len(list(rules)):
58
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
59
+ if not len(list(self.yara_compiled)):
60
60
  raise InvalidExtractor(f"{name} must define at least one yara rule")
61
- for x in rules:
61
+ for x in self.yara_compiled:
62
62
  if x.is_global:
63
63
  raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
64
64
 
maco/utils.py CHANGED
@@ -4,14 +4,14 @@ import importlib.machinery
4
4
  import importlib.util
5
5
  import inspect
6
6
  import json
7
+ import logging
7
8
  import logging.handlers
9
+ import multiprocessing
8
10
  import os
9
11
  import re
10
12
  import shutil
11
13
  import subprocess
12
14
  import sys
13
- import multiprocessing
14
- import logging
15
15
  import tempfile
16
16
 
17
17
  from maco import yara
@@ -27,8 +27,11 @@ from glob import glob
27
27
  from logging import Logger
28
28
  from pkgutil import walk_packages
29
29
  from types import ModuleType
30
- from typing import Callable, Dict, List, Set, Tuple
30
+ from typing import Callable, Dict, List, Set, Tuple, Union
31
+
32
+ from uv import find_uv_bin
31
33
 
34
+ from maco import model
32
35
  from maco.extractor import Extractor
33
36
 
34
37
  logger = logging.getLogger("maco.lib.utils")
@@ -38,23 +41,10 @@ VENV_DIRECTORY_NAME = ".venv"
38
41
  RELATIVE_FROM_RE = re.compile(r"from (\.+)")
39
42
  RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
40
43
 
41
- try:
42
- # Attempt to use the uv package manager (Recommended)
43
- from uv import find_uv_bin
44
-
45
- UV_BIN = find_uv_bin()
46
-
47
- PIP_CMD = f"{UV_BIN} pip"
48
- VENV_CREATE_CMD = f"{UV_BIN} venv"
49
- PACKAGE_MANAGER = "uv"
50
- except ImportError:
51
- # Otherwise default to pip
52
- from sys import executable
53
-
54
- PIP_CMD = "pip"
55
- VENV_CREATE_CMD = f"{executable} -m venv"
56
- PACKAGE_MANAGER = "pip"
44
+ UV_BIN = find_uv_bin()
57
45
 
46
+ PIP_CMD = f"{UV_BIN} pip"
47
+ VENV_CREATE_CMD = f"{UV_BIN} venv"
58
48
 
59
49
  class Base64Decoder(json.JSONDecoder):
60
50
  def __init__(self, *args, **kwargs):
@@ -210,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
210
200
  return extractor_dirs, extractor_files
211
201
 
212
202
 
213
- def create_virtual_environments(directories: List[str], python_version: str, logger: Logger):
203
+ def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
214
204
  venvs = []
215
- logger.info("Creating virtual environment(s)..")
216
205
  env = deepcopy(os.environ)
217
206
  stop_directory = os.path.dirname(sorted(directories)[0])
218
207
  # Track directories that we've already visited
@@ -222,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
222
211
  while dir != stop_directory and dir not in visited_dirs:
223
212
  req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
224
213
  if req_files:
225
- venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
226
- env.update({"VIRTUAL_ENV": venv_path})
227
- # Create a virtual environment for the directory
228
- if not os.path.exists(venv_path):
229
- cmd = VENV_CREATE_CMD
230
- if PACKAGE_MANAGER == "uv":
231
- cmd += f" --python {python_version}"
232
- subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
214
+ # create a virtual environment, otherwise directly install into current env
215
+ if create_venv:
216
+ venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
217
+ logger.info(f"Updating virtual environment {venv_path}")
218
+ env.update({"VIRTUAL_ENV": venv_path})
219
+ # Create a virtual environment for the directory
220
+ if not os.path.exists(venv_path):
221
+ cmd = f"{VENV_CREATE_CMD} --python {python_version}"
222
+ subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
233
223
 
234
224
  # Install/Update the packages in the environment
235
225
  install_command = PIP_CMD.split(" ") + ["install", "-U"]
@@ -253,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
253
243
 
254
244
  install_command.extend(pyproject_command)
255
245
 
246
+ # always require maco to be installed
247
+ install_command.append("maco")
256
248
  logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
249
+ # this uses VIRTUAL_ENV to control usage of a virtual environment
257
250
  p = subprocess.run(
258
251
  install_command,
259
252
  cwd=dir,
@@ -264,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
264
257
  if b"is being installed using the legacy" in p.stderr:
265
258
  # Ignore these types of errors
266
259
  continue
267
- logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
260
+ logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
268
261
  else:
269
- logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
270
- venvs.append(venv_path)
262
+ logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
263
+ if create_venv:
264
+ venvs.append(venv_path)
271
265
 
272
266
  # Cleanup any build directories that are the product of package installation
273
267
  expected_build_path = os.path.join(dir, "build")
@@ -311,7 +305,7 @@ def register_extractors(
311
305
  ):
312
306
  package_name = os.path.basename(current_directory)
313
307
  parent_directory = os.path.dirname(current_directory)
314
- if package_name in sys.modules:
308
+ if venvs and package_name in sys.modules:
315
309
  # this may happen as part of testing if some part of the extractor code was directly imported
316
310
  logger.warning(f"Looks like {package_name} is already loaded. "
317
311
  "If your maco extractor overlaps an existing package name this could cause problems.")
@@ -402,32 +396,26 @@ def import_extractors(
402
396
  *,
403
397
  root_directory: str,
404
398
  scanner: yara.Rules,
405
- create_venv: bool = False,
406
- python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
399
+ create_venv: bool,
407
400
  logger: Logger,
401
+ python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
408
402
  ):
409
403
  extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
410
404
 
411
405
  logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
412
406
  logger.debug(extractor_files)
413
407
 
414
- venvs = []
415
- if create_venv:
416
- venvs = create_virtual_environments(extractor_dirs, python_version, logger)
417
- else:
418
- # Look for pre-existing virtual environments, if any
419
- logger.info("Checking for pre-existing virtual environment(s)..")
420
- venvs = [
421
- os.path.join(root, VENV_DIRECTORY_NAME)
422
- for root, dirs, _ in os.walk(root_directory)
423
- if VENV_DIRECTORY_NAME in dirs
424
- ]
408
+ # Install packages into the current environment or dynamically created virtual environments
409
+ venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
425
410
 
426
411
  # With the environment prepared, we can now hunt for the extractors and register them
427
412
  logger.info("Registering extractors..")
428
413
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
429
414
 
430
415
 
416
+ # holds cached extractors when not running in venv mode
417
+ _loaded_extractors: Dict[str, Extractor] = {}
418
+
431
419
  def run_extractor(
432
420
  sample_path,
433
421
  module_name,
@@ -436,55 +424,69 @@ def run_extractor(
436
424
  venv,
437
425
  venv_script=VENV_SCRIPT,
438
426
  json_decoder=Base64Decoder,
439
- ) -> Dict[str, dict]:
440
- # Write temporary script in the same directory as extractor to resolve relative imports
441
- python_exe = sys.executable
442
- if venv:
443
- # If there is a linked virtual environment, execute within that environment
427
+ ) -> Union[Dict[str, dict], model.ExtractorModel]:
428
+ """Runs the maco extractor against sample either in current process or child process."""
429
+ if not venv:
430
+ key = f"{module_name}_{extractor_class}"
431
+ if key not in _loaded_extractors:
432
+ # dynamic import of extractor
433
+ mod = importlib.import_module(module_name)
434
+ extractor_cls = mod.__getattribute__(extractor_class)
435
+ extractor = extractor_cls()
436
+ else:
437
+ # retrieve cached extractor
438
+ extractor = _loaded_extractors[key]
439
+ if extractor.yara_compiled:
440
+ matches = extractor.yara_compiled.match(sample_path)
441
+ loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
442
+ else:
443
+ # execute extractor in child process with separate virtual environment
444
+ # Write temporary script in the same directory as extractor to resolve relative imports
444
445
  python_exe = os.path.join(venv, "bin", "python")
445
- dirname = os.path.dirname(module_path)
446
- with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
447
- with tempfile.NamedTemporaryFile() as output:
448
- parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
449
- root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
450
-
451
- script.write(
452
- venv_script.format(
453
- parent_package_path=parent_package_path,
454
- module_name=module_name,
455
- module_class=extractor_class,
456
- sample_path=sample_path,
457
- output_path=output.name,
446
+ dirname = os.path.dirname(module_path)
447
+ with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
448
+ with tempfile.NamedTemporaryFile() as output:
449
+ parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
450
+ root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
451
+
452
+ script.write(
453
+ venv_script.format(
454
+ parent_package_path=parent_package_path,
455
+ module_name=module_name,
456
+ module_class=extractor_class,
457
+ sample_path=sample_path,
458
+ output_path=output.name,
459
+ )
458
460
  )
459
- )
460
- script.flush()
461
- cwd = root_directory
462
- custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
463
-
464
- if custom_module.startswith("src."):
465
- # src layout found, which means the actual module content is within 'src' directory
466
- custom_module = custom_module[4:]
467
- cwd = os.path.join(cwd, "src")
468
-
469
- proc = subprocess.run(
470
- [python_exe, "-m", custom_module],
471
- cwd=cwd,
472
- capture_output=True,
473
- )
474
- stderr = proc.stderr.decode()
475
- try:
476
- # Load results and return them
477
- output.seek(0)
478
- loaded = json.load(output, cls=json_decoder)
479
- except Exception as e:
480
- # If there was an error raised during runtime, then propagate
481
- delim = f'File "{module_path}"'
482
- exception = stderr
483
- if delim in exception:
484
- exception = f"{delim}{exception.split(delim, 1)[1]}"
485
- # print extractor logging at error level
486
- logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
487
- raise Exception(exception) from e
488
- # ensure that extractor logging is available
489
- logger.info(f"maco extractor stderr:\n{stderr}")
490
- return loaded
461
+ script.flush()
462
+ cwd = root_directory
463
+ custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
464
+
465
+ if custom_module.startswith("src."):
466
+ # src layout found, which means the actual module content is within 'src' directory
467
+ custom_module = custom_module[4:]
468
+ cwd = os.path.join(cwd, "src")
469
+
470
+ # run the maco extractor in full venv process isolation (slow)
471
+ proc = subprocess.run(
472
+ [python_exe, "-m", custom_module],
473
+ cwd=cwd,
474
+ capture_output=True,
475
+ )
476
+ stderr = proc.stderr.decode()
477
+ try:
478
+ # Load results and return them
479
+ output.seek(0)
480
+ loaded = json.load(output, cls=json_decoder)
481
+ except Exception as e:
482
+ # If there was an error raised during runtime, then propagate
483
+ delim = f'File "{module_path}"'
484
+ exception = stderr
485
+ if delim in exception:
486
+ exception = f"{delim}{exception.split(delim, 1)[1]}"
487
+ # print extractor logging at error level
488
+ logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
489
+ raise Exception(exception) from e
490
+ # ensure that extractor logging is available
491
+ logger.info(f"maco extractor stderr:\n{stderr}")
492
+ return loaded
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.4
3
+ Version: 1.2.5
4
4
  Author: sl-govau
5
5
  Maintainer: cccs-rs
6
6
  License: MIT License
@@ -1,26 +1,27 @@
1
1
  demo_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  demo_extractors/elfy.py,sha256=AAFr5i1aivPwO4nycyXJEud57EpVNA-5k_2GicWesbY,771
3
- demo_extractors/limit_other.py,sha256=RAFx_0K_WnhUURA5uwXGmjrWODrAuLZFBxqZcWaxf64,944
3
+ demo_extractors/limit_other.py,sha256=8Z7X0cXUyZuK3MhDtObMWmdruRj5hgFdDi_VVGXqRx4,1123
4
4
  demo_extractors/nothing.py,sha256=3aeQJTY-dakmVXmyfmrRM8YCQVT7q3bq880DFH1Ol_Y,607
5
- demo_extractors/shared.py,sha256=Wlvy77SCAR97gxi8uUhGYyjxGmDb-pOSvN8b1rXrVWs,304
5
+ demo_extractors/requirements.txt,sha256=E0tD6xBZldq6sQGTHng6k88lBeASOhmLJcdcjpcqBNE,6
6
+ demo_extractors/shared.py,sha256=2P1cyuRbHDvM9IRt3UZnwdyhxx7OWqNC83xLyV8Y190,305
6
7
  demo_extractors/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- demo_extractors/complex/complex.py,sha256=JFKqBGKwkuDSz4zZUJuqhCLUQv6dlCMhBqNj33grBsE,2323
8
+ demo_extractors/complex/complex.py,sha256=tXrzj_zWIXbTOwj7Lezapk-qkrM-lfwcyjd5m-BYzdg,2322
8
9
  demo_extractors/complex/complex_utils.py,sha256=aec8kJsYUrMPo-waihkVLt-0QpiOPkw7dDqfT9MNuHk,123
9
10
  maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- maco/base_test.py,sha256=pqet9ofMwFRTj3JHgPdh9WHwdyp8kxNMi1vJNUzkSNA,2518
11
- maco/cli.py,sha256=1I3U54yPddTxqWclCtZ5Ma5hW6RoVTZMzLSFOjjfM1g,8008
11
+ maco/base_test.py,sha256=EPxCun9Tv91V-lFpaenn14tPyW17TPvXVH4AjE3t6js,2716
12
+ maco/cli.py,sha256=fIeUXOgOxcecmAkl6OAdnjBKqk1gBPv1ryWe50pT60g,8135
12
13
  maco/collector.py,sha256=Vlo7KcJC7TKZFTElv8i_f_hvWEnlWCRzOP1xOc9x7vk,6532
13
- maco/extractor.py,sha256=4ZQd8OfvEQYUIkUS3LzZ5tcioembuLhT9_uRVNKSsyM,2750
14
- maco/utils.py,sha256=vQeJKw4whWTXp1mTd2oEhfvL4nvvgAzWjBnCp2XxWLI,19275
14
+ maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
15
+ maco/utils.py,sha256=K41c-H7naaoiEYf0WNfP054IxwvHPujsbmmzgTizuLU,20159
15
16
  maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
16
17
  maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
17
18
  maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
18
19
  model_setup/maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- model_setup/maco/base_test.py,sha256=pqet9ofMwFRTj3JHgPdh9WHwdyp8kxNMi1vJNUzkSNA,2518
20
- model_setup/maco/cli.py,sha256=1I3U54yPddTxqWclCtZ5Ma5hW6RoVTZMzLSFOjjfM1g,8008
20
+ model_setup/maco/base_test.py,sha256=EPxCun9Tv91V-lFpaenn14tPyW17TPvXVH4AjE3t6js,2716
21
+ model_setup/maco/cli.py,sha256=fIeUXOgOxcecmAkl6OAdnjBKqk1gBPv1ryWe50pT60g,8135
21
22
  model_setup/maco/collector.py,sha256=Vlo7KcJC7TKZFTElv8i_f_hvWEnlWCRzOP1xOc9x7vk,6532
22
- model_setup/maco/extractor.py,sha256=4ZQd8OfvEQYUIkUS3LzZ5tcioembuLhT9_uRVNKSsyM,2750
23
- model_setup/maco/utils.py,sha256=vQeJKw4whWTXp1mTd2oEhfvL4nvvgAzWjBnCp2XxWLI,19275
23
+ model_setup/maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
24
+ model_setup/maco/utils.py,sha256=K41c-H7naaoiEYf0WNfP054IxwvHPujsbmmzgTizuLU,20159
24
25
  model_setup/maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
25
26
  model_setup/maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
26
27
  model_setup/maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
@@ -35,9 +36,9 @@ tests/extractors/basic_longer.py,sha256=1ClU2QD-Y0TOl_loNFvEqIEpTR5TSVJ6zg9ZmC-E
35
36
  tests/extractors/test_basic.py,sha256=FLKekfSGM69HaiF7Vu_7D7KDXHZko-9hZkMO8_DoyYA,697
36
37
  tests/extractors/bob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
38
  tests/extractors/bob/bob.py,sha256=Gy5p8KssJX87cwa9vVv8UBODF_ulbUteZXh15frW2hs,247
38
- maco-1.2.4.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
39
- maco-1.2.4.dist-info/METADATA,sha256=EoAEnCfbaXe8eUFAMHj3-C9lQIDR9Ss31NcY4CbEXjI,15610
40
- maco-1.2.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
41
- maco-1.2.4.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
42
- maco-1.2.4.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
43
- maco-1.2.4.dist-info/RECORD,,
39
+ maco-1.2.5.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
40
+ maco-1.2.5.dist-info/METADATA,sha256=cJ7x_shBhDgKVjkq_e2d94aj3qiUzi0lt7f3lPO334U,15610
41
+ maco-1.2.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
42
+ maco-1.2.5.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
43
+ maco-1.2.5.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
44
+ maco-1.2.5.dist-info/RECORD,,
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool=False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
model_setup/maco/cli.py CHANGED
@@ -179,7 +179,7 @@ def main():
179
179
  parser.add_argument(
180
180
  "--create_venv",
181
181
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
182
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
183
183
  )
184
184
  args = parser.parse_args()
185
185
  inc = args.include.split(",") if args.include else []
@@ -51,14 +51,14 @@ class Extractor:
51
51
  # check yara rules conform to expected structure
52
52
  # we throw away these compiled rules as we need all rules in system compiled together
53
53
  try:
54
- rules = yara.compile(source=self.yara_rule)
54
+ self.yara_compiled = yara.compile(source=self.yara_rule)
55
55
  except yara.SyntaxError as e:
56
56
  raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
57
57
  # need to track which plugin owns the rules
58
- self.yara_rule_names = [x.identifier for x in rules]
59
- if not len(list(rules)):
58
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
59
+ if not len(list(self.yara_compiled)):
60
60
  raise InvalidExtractor(f"{name} must define at least one yara rule")
61
- for x in rules:
61
+ for x in self.yara_compiled:
62
62
  if x.is_global:
63
63
  raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
64
64
 
model_setup/maco/utils.py CHANGED
@@ -4,14 +4,14 @@ import importlib.machinery
4
4
  import importlib.util
5
5
  import inspect
6
6
  import json
7
+ import logging
7
8
  import logging.handlers
9
+ import multiprocessing
8
10
  import os
9
11
  import re
10
12
  import shutil
11
13
  import subprocess
12
14
  import sys
13
- import multiprocessing
14
- import logging
15
15
  import tempfile
16
16
 
17
17
  from maco import yara
@@ -27,8 +27,11 @@ from glob import glob
27
27
  from logging import Logger
28
28
  from pkgutil import walk_packages
29
29
  from types import ModuleType
30
- from typing import Callable, Dict, List, Set, Tuple
30
+ from typing import Callable, Dict, List, Set, Tuple, Union
31
+
32
+ from uv import find_uv_bin
31
33
 
34
+ from maco import model
32
35
  from maco.extractor import Extractor
33
36
 
34
37
  logger = logging.getLogger("maco.lib.utils")
@@ -38,23 +41,10 @@ VENV_DIRECTORY_NAME = ".venv"
38
41
  RELATIVE_FROM_RE = re.compile(r"from (\.+)")
39
42
  RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
40
43
 
41
- try:
42
- # Attempt to use the uv package manager (Recommended)
43
- from uv import find_uv_bin
44
-
45
- UV_BIN = find_uv_bin()
46
-
47
- PIP_CMD = f"{UV_BIN} pip"
48
- VENV_CREATE_CMD = f"{UV_BIN} venv"
49
- PACKAGE_MANAGER = "uv"
50
- except ImportError:
51
- # Otherwise default to pip
52
- from sys import executable
53
-
54
- PIP_CMD = "pip"
55
- VENV_CREATE_CMD = f"{executable} -m venv"
56
- PACKAGE_MANAGER = "pip"
44
+ UV_BIN = find_uv_bin()
57
45
 
46
+ PIP_CMD = f"{UV_BIN} pip"
47
+ VENV_CREATE_CMD = f"{UV_BIN} venv"
58
48
 
59
49
  class Base64Decoder(json.JSONDecoder):
60
50
  def __init__(self, *args, **kwargs):
@@ -210,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
210
200
  return extractor_dirs, extractor_files
211
201
 
212
202
 
213
- def create_virtual_environments(directories: List[str], python_version: str, logger: Logger):
203
+ def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
214
204
  venvs = []
215
- logger.info("Creating virtual environment(s)..")
216
205
  env = deepcopy(os.environ)
217
206
  stop_directory = os.path.dirname(sorted(directories)[0])
218
207
  # Track directories that we've already visited
@@ -222,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
222
211
  while dir != stop_directory and dir not in visited_dirs:
223
212
  req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
224
213
  if req_files:
225
- venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
226
- env.update({"VIRTUAL_ENV": venv_path})
227
- # Create a virtual environment for the directory
228
- if not os.path.exists(venv_path):
229
- cmd = VENV_CREATE_CMD
230
- if PACKAGE_MANAGER == "uv":
231
- cmd += f" --python {python_version}"
232
- subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
214
+ # create a virtual environment, otherwise directly install into current env
215
+ if create_venv:
216
+ venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
217
+ logger.info(f"Updating virtual environment {venv_path}")
218
+ env.update({"VIRTUAL_ENV": venv_path})
219
+ # Create a virtual environment for the directory
220
+ if not os.path.exists(venv_path):
221
+ cmd = f"{VENV_CREATE_CMD} --python {python_version}"
222
+ subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
233
223
 
234
224
  # Install/Update the packages in the environment
235
225
  install_command = PIP_CMD.split(" ") + ["install", "-U"]
@@ -253,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
253
243
 
254
244
  install_command.extend(pyproject_command)
255
245
 
246
+ # always require maco to be installed
247
+ install_command.append("maco")
256
248
  logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
249
+ # this uses VIRTUAL_ENV to control usage of a virtual environment
257
250
  p = subprocess.run(
258
251
  install_command,
259
252
  cwd=dir,
@@ -264,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
264
257
  if b"is being installed using the legacy" in p.stderr:
265
258
  # Ignore these types of errors
266
259
  continue
267
- logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
260
+ logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
268
261
  else:
269
- logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
270
- venvs.append(venv_path)
262
+ logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
263
+ if create_venv:
264
+ venvs.append(venv_path)
271
265
 
272
266
  # Cleanup any build directories that are the product of package installation
273
267
  expected_build_path = os.path.join(dir, "build")
@@ -311,7 +305,7 @@ def register_extractors(
311
305
  ):
312
306
  package_name = os.path.basename(current_directory)
313
307
  parent_directory = os.path.dirname(current_directory)
314
- if package_name in sys.modules:
308
+ if venvs and package_name in sys.modules:
315
309
  # this may happen as part of testing if some part of the extractor code was directly imported
316
310
  logger.warning(f"Looks like {package_name} is already loaded. "
317
311
  "If your maco extractor overlaps an existing package name this could cause problems.")
@@ -402,32 +396,26 @@ def import_extractors(
402
396
  *,
403
397
  root_directory: str,
404
398
  scanner: yara.Rules,
405
- create_venv: bool = False,
406
- python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
399
+ create_venv: bool,
407
400
  logger: Logger,
401
+ python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
408
402
  ):
409
403
  extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
410
404
 
411
405
  logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
412
406
  logger.debug(extractor_files)
413
407
 
414
- venvs = []
415
- if create_venv:
416
- venvs = create_virtual_environments(extractor_dirs, python_version, logger)
417
- else:
418
- # Look for pre-existing virtual environments, if any
419
- logger.info("Checking for pre-existing virtual environment(s)..")
420
- venvs = [
421
- os.path.join(root, VENV_DIRECTORY_NAME)
422
- for root, dirs, _ in os.walk(root_directory)
423
- if VENV_DIRECTORY_NAME in dirs
424
- ]
408
+ # Install packages into the current environment or dynamically created virtual environments
409
+ venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
425
410
 
426
411
  # With the environment prepared, we can now hunt for the extractors and register them
427
412
  logger.info("Registering extractors..")
428
413
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
429
414
 
430
415
 
416
+ # holds cached extractors when not running in venv mode
417
+ _loaded_extractors: Dict[str, Extractor] = {}
418
+
431
419
  def run_extractor(
432
420
  sample_path,
433
421
  module_name,
@@ -436,55 +424,69 @@ def run_extractor(
436
424
  venv,
437
425
  venv_script=VENV_SCRIPT,
438
426
  json_decoder=Base64Decoder,
439
- ) -> Dict[str, dict]:
440
- # Write temporary script in the same directory as extractor to resolve relative imports
441
- python_exe = sys.executable
442
- if venv:
443
- # If there is a linked virtual environment, execute within that environment
427
+ ) -> Union[Dict[str, dict], model.ExtractorModel]:
428
+ """Runs the maco extractor against sample either in current process or child process."""
429
+ if not venv:
430
+ key = f"{module_name}_{extractor_class}"
431
+ if key not in _loaded_extractors:
432
+ # dynamic import of extractor
433
+ mod = importlib.import_module(module_name)
434
+ extractor_cls = mod.__getattribute__(extractor_class)
435
+ extractor = extractor_cls()
436
+ else:
437
+ # retrieve cached extractor
438
+ extractor = _loaded_extractors[key]
439
+ if extractor.yara_compiled:
440
+ matches = extractor.yara_compiled.match(sample_path)
441
+ loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
442
+ else:
443
+ # execute extractor in child process with separate virtual environment
444
+ # Write temporary script in the same directory as extractor to resolve relative imports
444
445
  python_exe = os.path.join(venv, "bin", "python")
445
- dirname = os.path.dirname(module_path)
446
- with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
447
- with tempfile.NamedTemporaryFile() as output:
448
- parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
449
- root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
450
-
451
- script.write(
452
- venv_script.format(
453
- parent_package_path=parent_package_path,
454
- module_name=module_name,
455
- module_class=extractor_class,
456
- sample_path=sample_path,
457
- output_path=output.name,
446
+ dirname = os.path.dirname(module_path)
447
+ with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
448
+ with tempfile.NamedTemporaryFile() as output:
449
+ parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
450
+ root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
451
+
452
+ script.write(
453
+ venv_script.format(
454
+ parent_package_path=parent_package_path,
455
+ module_name=module_name,
456
+ module_class=extractor_class,
457
+ sample_path=sample_path,
458
+ output_path=output.name,
459
+ )
458
460
  )
459
- )
460
- script.flush()
461
- cwd = root_directory
462
- custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
463
-
464
- if custom_module.startswith("src."):
465
- # src layout found, which means the actual module content is within 'src' directory
466
- custom_module = custom_module[4:]
467
- cwd = os.path.join(cwd, "src")
468
-
469
- proc = subprocess.run(
470
- [python_exe, "-m", custom_module],
471
- cwd=cwd,
472
- capture_output=True,
473
- )
474
- stderr = proc.stderr.decode()
475
- try:
476
- # Load results and return them
477
- output.seek(0)
478
- loaded = json.load(output, cls=json_decoder)
479
- except Exception as e:
480
- # If there was an error raised during runtime, then propagate
481
- delim = f'File "{module_path}"'
482
- exception = stderr
483
- if delim in exception:
484
- exception = f"{delim}{exception.split(delim, 1)[1]}"
485
- # print extractor logging at error level
486
- logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
487
- raise Exception(exception) from e
488
- # ensure that extractor logging is available
489
- logger.info(f"maco extractor stderr:\n{stderr}")
490
- return loaded
461
+ script.flush()
462
+ cwd = root_directory
463
+ custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
464
+
465
+ if custom_module.startswith("src."):
466
+ # src layout found, which means the actual module content is within 'src' directory
467
+ custom_module = custom_module[4:]
468
+ cwd = os.path.join(cwd, "src")
469
+
470
+ # run the maco extractor in full venv process isolation (slow)
471
+ proc = subprocess.run(
472
+ [python_exe, "-m", custom_module],
473
+ cwd=cwd,
474
+ capture_output=True,
475
+ )
476
+ stderr = proc.stderr.decode()
477
+ try:
478
+ # Load results and return them
479
+ output.seek(0)
480
+ loaded = json.load(output, cls=json_decoder)
481
+ except Exception as e:
482
+ # If there was an error raised during runtime, then propagate
483
+ delim = f'File "{module_path}"'
484
+ exception = stderr
485
+ if delim in exception:
486
+ exception = f"{delim}{exception.split(delim, 1)[1]}"
487
+ # print extractor logging at error level
488
+ logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
489
+ raise Exception(exception) from e
490
+ # ensure that extractor logging is available
491
+ logger.info(f"maco extractor stderr:\n{stderr}")
492
+ return loaded
File without changes