maco 1.2.3__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {maco-1.2.3/maco.egg-info → maco-1.2.5}/PKG-INFO +1 -1
  2. {maco-1.2.3 → maco-1.2.5}/demo_extractors/complex/complex.py +1 -2
  3. {maco-1.2.3 → maco-1.2.5}/demo_extractors/limit_other.py +5 -2
  4. maco-1.2.5/demo_extractors/requirements.txt +1 -0
  5. {maco-1.2.3 → maco-1.2.5}/demo_extractors/shared.py +1 -0
  6. {maco-1.2.3/model_setup → maco-1.2.5}/maco/base_test.py +15 -8
  7. {maco-1.2.3/model_setup → maco-1.2.5}/maco/cli.py +1 -1
  8. {maco-1.2.3/model_setup → maco-1.2.5}/maco/collector.py +19 -6
  9. {maco-1.2.3/model_setup → maco-1.2.5}/maco/extractor.py +4 -4
  10. {maco-1.2.3 → maco-1.2.5}/maco/utils.py +129 -103
  11. {maco-1.2.3 → maco-1.2.5/maco.egg-info}/PKG-INFO +1 -1
  12. {maco-1.2.3 → maco-1.2.5}/maco.egg-info/SOURCES.txt +4 -0
  13. {maco-1.2.3 → maco-1.2.5/model_setup}/maco/base_test.py +15 -8
  14. {maco-1.2.3 → maco-1.2.5/model_setup}/maco/cli.py +1 -1
  15. {maco-1.2.3 → maco-1.2.5/model_setup}/maco/collector.py +19 -6
  16. {maco-1.2.3 → maco-1.2.5/model_setup}/maco/extractor.py +4 -4
  17. {maco-1.2.3 → maco-1.2.5}/model_setup/maco/utils.py +129 -103
  18. maco-1.2.5/tests/benchmark.py +107 -0
  19. maco-1.2.5/tests/data/trigger_complex.txt.cart +0 -0
  20. maco-1.2.5/tests/extractors/bob/__init__.py +0 -0
  21. maco-1.2.5/tests/test_base_test.py +60 -0
  22. {maco-1.2.3 → maco-1.2.5}/tests/test_cli.py +2 -2
  23. {maco-1.2.3 → maco-1.2.5}/tests/test_detection.py +5 -3
  24. {maco-1.2.3 → maco-1.2.5}/tox.ini +1 -1
  25. maco-1.2.3/tests/test_base_test.py +0 -17
  26. {maco-1.2.3 → maco-1.2.5}/.gitignore +0 -0
  27. {maco-1.2.3 → maco-1.2.5}/.vscode/settings.json +0 -0
  28. {maco-1.2.3 → maco-1.2.5}/LICENSE.md +0 -0
  29. {maco-1.2.3 → maco-1.2.5}/README.md +0 -0
  30. {maco-1.2.3/demo_extractors/complex → maco-1.2.5/demo_extractors}/__init__.py +0 -0
  31. {maco-1.2.3/maco → maco-1.2.5/demo_extractors/complex}/__init__.py +0 -0
  32. {maco-1.2.3 → maco-1.2.5}/demo_extractors/complex/complex_utils.py +0 -0
  33. {maco-1.2.3 → maco-1.2.5}/demo_extractors/elfy.py +0 -0
  34. {maco-1.2.3 → maco-1.2.5}/demo_extractors/nothing.py +0 -0
  35. {maco-1.2.3/model_setup → maco-1.2.5}/maco/__init__.py +0 -0
  36. {maco-1.2.3 → maco-1.2.5}/maco/model/__init__.py +0 -0
  37. {maco-1.2.3 → maco-1.2.5}/maco/model/model.py +0 -0
  38. {maco-1.2.3 → maco-1.2.5}/maco/yara.py +0 -0
  39. {maco-1.2.3 → maco-1.2.5}/maco.egg-info/dependency_links.txt +0 -0
  40. {maco-1.2.3 → maco-1.2.5}/maco.egg-info/entry_points.txt +0 -0
  41. {maco-1.2.3 → maco-1.2.5}/maco.egg-info/requires.txt +0 -0
  42. {maco-1.2.3 → maco-1.2.5}/maco.egg-info/top_level.txt +0 -0
  43. {maco-1.2.3 → maco-1.2.5}/model_setup/LICENSE.md +0 -0
  44. {maco-1.2.3 → maco-1.2.5}/model_setup/README.md +0 -0
  45. {maco-1.2.3/tests/extractors → maco-1.2.5/model_setup/maco}/__init__.py +0 -0
  46. {maco-1.2.3 → maco-1.2.5}/model_setup/maco/model/__init__.py +0 -0
  47. {maco-1.2.3 → maco-1.2.5}/model_setup/maco/model/model.py +0 -0
  48. {maco-1.2.3 → maco-1.2.5}/model_setup/maco/yara.py +0 -0
  49. {maco-1.2.3 → maco-1.2.5}/model_setup/pyproject.toml +0 -0
  50. {maco-1.2.3 → maco-1.2.5}/model_setup/setup.py +0 -0
  51. {maco-1.2.3 → maco-1.2.5}/pipelines/publish.yaml +0 -0
  52. {maco-1.2.3 → maco-1.2.5}/pipelines/test.yaml +0 -0
  53. {maco-1.2.3 → maco-1.2.5}/pyproject.toml +0 -0
  54. {maco-1.2.3 → maco-1.2.5}/requirements.txt +0 -0
  55. {maco-1.2.3 → maco-1.2.5}/setup.cfg +0 -0
  56. {maco-1.2.3 → maco-1.2.5}/tests/data/example.txt.cart +0 -0
  57. {maco-1.2.3 → maco-1.2.5}/tests/data/trigger_complex.txt +0 -0
  58. {maco-1.2.3/tests/extractors/bob → maco-1.2.5/tests/extractors}/__init__.py +0 -0
  59. {maco-1.2.3 → maco-1.2.5}/tests/extractors/basic.py +0 -0
  60. {maco-1.2.3 → maco-1.2.5}/tests/extractors/basic_longer.py +0 -0
  61. {maco-1.2.3 → maco-1.2.5}/tests/extractors/bob/bob.py +0 -0
  62. {maco-1.2.3 → maco-1.2.5}/tests/extractors/test_basic.py +0 -0
  63. {maco-1.2.3 → maco-1.2.5}/tests/pytest.ini +0 -0
  64. {maco-1.2.3 → maco-1.2.5}/tests/requirements.txt +0 -0
  65. {maco-1.2.3 → maco-1.2.5}/tests/test_demo_extractors.py +0 -0
  66. {maco-1.2.3 → maco-1.2.5}/tests/test_extractor.py +0 -0
  67. {maco-1.2.3 → maco-1.2.5}/tests/test_helpers.py +0 -0
  68. {maco-1.2.3 → maco-1.2.5}/tests/test_model.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Author: sl-govau
5
5
  Maintainer: cccs-rs
6
6
  License: MIT License
@@ -1,10 +1,9 @@
1
1
  from io import BytesIO
2
2
  from typing import List, Optional
3
3
 
4
+ from demo_extractors.complex import complex_utils
4
5
  from maco import extractor, model, yara
5
6
 
6
- from complex import complex_utils
7
-
8
7
 
9
8
  class Complex(extractor.Extractor):
10
9
  """This script has multiple yara rules and coverage of the data model."""
@@ -1,10 +1,9 @@
1
1
  from io import BytesIO
2
2
  from typing import Dict, List, Optional
3
3
 
4
+ from demo_extractors import shared
4
5
  from maco import extractor, model, yara
5
6
 
6
- from . import shared
7
-
8
7
 
9
8
  class LimitOther(extractor.Extractor):
10
9
  """An example of how the 'other' dictionary can be limited in a custom way."""
@@ -24,6 +23,10 @@ class LimitOther(extractor.Extractor):
24
23
  """
25
24
 
26
25
  def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
26
+ # import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
27
+ # the tests that do direct importing
28
+ import httpx
29
+
27
30
  # use a custom model that inherits from ExtractorModel
28
31
  # this model defines what can go in the 'other' dict
29
32
  tmp = shared.MyCustomModel(family="specify_other")
@@ -0,0 +1 @@
1
+ httpx
@@ -1,4 +1,5 @@
1
1
  from typing import Optional
2
+
2
3
  import pydantic
3
4
 
4
5
  from maco import model
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool=False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
@@ -179,7 +179,7 @@ def main():
179
179
  parser.add_argument(
180
180
  "--create_venv",
181
181
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
182
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
183
183
  )
184
184
  args = parser.parse_args()
185
185
  inc = args.include.split(",") if args.include else []
@@ -2,8 +2,9 @@
2
2
 
3
3
  import inspect
4
4
  import logging
5
+ import logging.handlers
5
6
  import os
6
- from multiprocessing import Manager, Process
7
+ from multiprocessing import Manager, Process, Queue
7
8
  from tempfile import NamedTemporaryFile
8
9
  from types import ModuleType
9
10
  from typing import Any, BinaryIO, Dict, List, Union
@@ -86,21 +87,33 @@ class Collector:
86
87
  )
87
88
  namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
88
89
 
90
+ # multiprocess logging is awkward - set up a queue to ensure we can log
91
+ logging_queue = Queue()
92
+ queue_handler = logging.handlers.QueueListener(logging_queue,*logging.getLogger().handlers)
93
+ queue_handler.start()
94
+
89
95
  # Find the extractors within the given directory
90
96
  # Execute within a child process to ensure main process interpreter is kept clean
91
97
  p = Process(
92
- target=utils.import_extractors,
98
+ target=utils.proxy_logging,
93
99
  args=(
94
- path_extractors,
95
- yara.compile(source=utils.MACO_YARA_RULE),
100
+ logging_queue,
101
+ utils.import_extractors,
96
102
  extractor_module_callback,
97
- logger,
98
- create_venv and os.path.isdir(path_extractors),
103
+ ),
104
+ kwargs=dict(
105
+ root_directory=path_extractors,
106
+ scanner=yara.compile(source=utils.MACO_YARA_RULE),
107
+ create_venv=create_venv and os.path.isdir(path_extractors),
99
108
  ),
100
109
  )
101
110
  p.start()
102
111
  p.join()
103
112
 
113
+ # stop multiprocess logging
114
+ queue_handler.stop()
115
+ logging_queue.close()
116
+
104
117
  self.extractors = dict(extractors)
105
118
  if not self.extractors:
106
119
  raise ExtractorLoadError("no extractors were loaded")
@@ -51,14 +51,14 @@ class Extractor:
51
51
  # check yara rules conform to expected structure
52
52
  # we throw away these compiled rules as we need all rules in system compiled together
53
53
  try:
54
- rules = yara.compile(source=self.yara_rule)
54
+ self.yara_compiled = yara.compile(source=self.yara_rule)
55
55
  except yara.SyntaxError as e:
56
56
  raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
57
57
  # need to track which plugin owns the rules
58
- self.yara_rule_names = [x.identifier for x in rules]
59
- if not len(list(rules)):
58
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
59
+ if not len(list(self.yara_compiled)):
60
60
  raise InvalidExtractor(f"{name} must define at least one yara rule")
61
- for x in rules:
61
+ for x in self.yara_compiled:
62
62
  if x.is_global:
63
63
  raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
64
64
 
@@ -4,6 +4,9 @@ import importlib.machinery
4
4
  import importlib.util
5
5
  import inspect
6
6
  import json
7
+ import logging
8
+ import logging.handlers
9
+ import multiprocessing
7
10
  import os
8
11
  import re
9
12
  import shutil
@@ -24,32 +27,24 @@ from glob import glob
24
27
  from logging import Logger
25
28
  from pkgutil import walk_packages
26
29
  from types import ModuleType
27
- from typing import Callable, Dict, List, Set, Tuple
30
+ from typing import Callable, Dict, List, Set, Tuple, Union
28
31
 
29
- from maco.extractor import Extractor
30
-
31
- VENV_DIRECTORY_NAME = ".venv"
32
+ from uv import find_uv_bin
32
33
 
33
- RELATIVE_FROM_RE = re.compile("from (\.+)")
34
- RELATIVE_FROM_IMPORT_RE = re.compile("from (\.+) import")
34
+ from maco import model
35
+ from maco.extractor import Extractor
35
36
 
36
- try:
37
- # Attempt to use the uv package manager (Recommended)
38
- from uv import find_uv_bin
37
+ logger = logging.getLogger("maco.lib.utils")
39
38
 
40
- UV_BIN = find_uv_bin()
39
+ VENV_DIRECTORY_NAME = ".venv"
41
40
 
42
- PIP_CMD = f"{UV_BIN} pip"
43
- VENV_CREATE_CMD = f"{UV_BIN} venv"
44
- PACKAGE_MANAGER = "uv"
45
- except ImportError:
46
- # Otherwise default to pip
47
- from sys import executable
41
+ RELATIVE_FROM_RE = re.compile(r"from (\.+)")
42
+ RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
48
43
 
49
- PIP_CMD = "pip"
50
- VENV_CREATE_CMD = f"{executable} -m venv"
51
- PACKAGE_MANAGER = "pip"
44
+ UV_BIN = find_uv_bin()
52
45
 
46
+ PIP_CMD = f"{UV_BIN} pip"
47
+ VENV_CREATE_CMD = f"{UV_BIN} venv"
53
48
 
54
49
  class Base64Decoder(json.JSONDecoder):
55
50
  def __init__(self, *args, **kwargs):
@@ -69,6 +64,7 @@ import importlib
69
64
  import json
70
65
  import os
71
66
  import sys
67
+ import logging
72
68
 
73
69
  try:
74
70
  from maco import yara
@@ -76,6 +72,19 @@ except:
76
72
  import yara
77
73
 
78
74
  from base64 import b64encode
75
+
76
+ # ensure we have a logger to stderr
77
+ import logging
78
+ logger = logging.getLogger()
79
+ logger.setLevel(logging.DEBUG)
80
+ sh = logging.StreamHandler()
81
+ logger.addHandler(sh)
82
+ sh.setLevel(logging.DEBUG)
83
+ formatter = logging.Formatter(
84
+ fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
85
+ )
86
+ sh.setFormatter(formatter)
87
+
79
88
  parent_package_path = "{parent_package_path}"
80
89
  sys.path.insert(1, parent_package_path)
81
90
  mod = importlib.import_module("{module_name}")
@@ -101,7 +110,7 @@ with open("{output_path}", 'w') as fp:
101
110
  json.dump(result.dict(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
102
111
  """
103
112
 
104
- MACO_YARA_RULE = """
113
+ MACO_YARA_RULE = r"""
105
114
  rule MACO {
106
115
  meta:
107
116
  desc = "Used to match on Python files that contain MACO extractors"
@@ -191,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
191
200
  return extractor_dirs, extractor_files
192
201
 
193
202
 
194
- def create_virtual_environments(directories: List[str], python_version: str, logger: Logger):
203
+ def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
195
204
  venvs = []
196
- logger.info("Creating virtual environment(s)..")
197
205
  env = deepcopy(os.environ)
198
206
  stop_directory = os.path.dirname(sorted(directories)[0])
199
207
  # Track directories that we've already visited
@@ -203,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
203
211
  while dir != stop_directory and dir not in visited_dirs:
204
212
  req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
205
213
  if req_files:
206
- venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
207
- env.update({"VIRTUAL_ENV": venv_path})
208
- # Create a virtual environment for the directory
209
- if not os.path.exists(venv_path):
210
- cmd = VENV_CREATE_CMD
211
- if PACKAGE_MANAGER == "uv":
212
- cmd += f" --python {python_version}"
213
- subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
214
+ # create a virtual environment, otherwise directly install into current env
215
+ if create_venv:
216
+ venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
217
+ logger.info(f"Updating virtual environment {venv_path}")
218
+ env.update({"VIRTUAL_ENV": venv_path})
219
+ # Create a virtual environment for the directory
220
+ if not os.path.exists(venv_path):
221
+ cmd = f"{VENV_CREATE_CMD} --python {python_version}"
222
+ subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
214
223
 
215
224
  # Install/Update the packages in the environment
216
225
  install_command = PIP_CMD.split(" ") + ["install", "-U"]
@@ -234,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
234
243
 
235
244
  install_command.extend(pyproject_command)
236
245
 
246
+ # always require maco to be installed
247
+ install_command.append("maco")
237
248
  logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
249
+ # this uses VIRTUAL_ENV to control usage of a virtual environment
238
250
  p = subprocess.run(
239
251
  install_command,
240
252
  cwd=dir,
@@ -245,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
245
257
  if b"is being installed using the legacy" in p.stderr:
246
258
  # Ignore these types of errors
247
259
  continue
248
- logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
260
+ logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
249
261
  else:
250
- logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
251
- venvs.append(venv_path)
262
+ logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
263
+ if create_venv:
264
+ venvs.append(venv_path)
252
265
 
253
266
  # Cleanup any build directories that are the product of package installation
254
267
  expected_build_path = os.path.join(dir, "build")
@@ -292,15 +305,10 @@ def register_extractors(
292
305
  ):
293
306
  package_name = os.path.basename(current_directory)
294
307
  parent_directory = os.path.dirname(current_directory)
295
- symlink = None
296
- while package_name in sys.modules:
297
- # Package name conflicts with an existing loaded module, let's deconflict that
298
- package_name = f"_{package_name}"
299
-
300
- # We'll need to create a link back to the original
301
- if package_name not in sys.modules:
302
- symlink = os.path.join(parent_directory, package_name)
303
- os.symlink(current_directory, symlink)
308
+ if venvs and package_name in sys.modules:
309
+ # this may happen as part of testing if some part of the extractor code was directly imported
310
+ logger.warning(f"Looks like {package_name} is already loaded. "
311
+ "If your maco extractor overlaps an existing package name this could cause problems.")
304
312
 
305
313
  try:
306
314
  # Modify the PATH so we can recognize this new package on import
@@ -351,10 +359,6 @@ def register_extractors(
351
359
  # Remove any modules that were loaded to deconflict with later modules loads
352
360
  [sys.modules.pop(k) for k in set(sys.modules.keys()) - default_loaded_modules]
353
361
 
354
- # Cleanup any symlinks
355
- if symlink:
356
- os.remove(symlink)
357
-
358
362
  # If there still exists extractor files we haven't found yet, try searching in the available subdirectories
359
363
  if extractor_files:
360
364
  for dir in os.listdir(current_directory):
@@ -379,13 +383,21 @@ def register_extractors(
379
383
  # We were able to find all the extractor files
380
384
  break
381
385
 
386
+ def proxy_logging(queue: multiprocessing.Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
387
+ """Ensures logging is set up correctly for a child process and then executes the callback."""
388
+ logger = logging.getLogger()
389
+ qh = logging.handlers.QueueHandler(queue)
390
+ qh.setLevel(logging.DEBUG)
391
+ logger.addHandler(qh)
392
+ callback(*args, **kwargs, logger=logger)
382
393
 
383
394
  def import_extractors(
395
+ extractor_module_callback: Callable[[ModuleType, str], bool],
396
+ *,
384
397
  root_directory: str,
385
398
  scanner: yara.Rules,
386
- extractor_module_callback: Callable[[ModuleType, str], bool],
399
+ create_venv: bool,
387
400
  logger: Logger,
388
- create_venv: bool = False,
389
401
  python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
390
402
  ):
391
403
  extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
@@ -393,23 +405,17 @@ def import_extractors(
393
405
  logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
394
406
  logger.debug(extractor_files)
395
407
 
396
- venvs = []
397
- if create_venv:
398
- venvs = create_virtual_environments(extractor_dirs, python_version, logger)
399
- else:
400
- # Look for pre-existing virtual environments, if any
401
- logger.info("Checking for pre-existing virtual environment(s)..")
402
- venvs = [
403
- os.path.join(root, VENV_DIRECTORY_NAME)
404
- for root, dirs, _ in os.walk(root_directory)
405
- if VENV_DIRECTORY_NAME in dirs
406
- ]
408
+ # Install packages into the current environment or dynamically created virtual environments
409
+ venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
407
410
 
408
411
  # With the environment prepared, we can now hunt for the extractors and register them
409
412
  logger.info("Registering extractors..")
410
413
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
411
414
 
412
415
 
416
+ # holds cached extractors when not running in venv mode
417
+ _loaded_extractors: Dict[str, Extractor] = {}
418
+
413
419
  def run_extractor(
414
420
  sample_path,
415
421
  module_name,
@@ -418,49 +424,69 @@ def run_extractor(
418
424
  venv,
419
425
  venv_script=VENV_SCRIPT,
420
426
  json_decoder=Base64Decoder,
421
- ) -> Dict[str, dict]:
422
- # Write temporary script in the same directory as extractor to resolve relative imports
423
- python_exe = sys.executable
424
- if venv:
425
- # If there is a linked virtual environment, execute within that environment
427
+ ) -> Union[Dict[str, dict], model.ExtractorModel]:
428
+ """Runs the maco extractor against sample either in current process or child process."""
429
+ if not venv:
430
+ key = f"{module_name}_{extractor_class}"
431
+ if key not in _loaded_extractors:
432
+ # dynamic import of extractor
433
+ mod = importlib.import_module(module_name)
434
+ extractor_cls = mod.__getattribute__(extractor_class)
435
+ extractor = extractor_cls()
436
+ else:
437
+ # retrieve cached extractor
438
+ extractor = _loaded_extractors[key]
439
+ if extractor.yara_compiled:
440
+ matches = extractor.yara_compiled.match(sample_path)
441
+ loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
442
+ else:
443
+ # execute extractor in child process with separate virtual environment
444
+ # Write temporary script in the same directory as extractor to resolve relative imports
426
445
  python_exe = os.path.join(venv, "bin", "python")
427
- dirname = os.path.dirname(module_path)
428
- with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
429
- with tempfile.NamedTemporaryFile() as output:
430
- parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
431
- root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
432
-
433
- script.write(
434
- venv_script.format(
435
- parent_package_path=parent_package_path,
436
- module_name=module_name,
437
- module_class=extractor_class,
438
- sample_path=sample_path,
439
- output_path=output.name,
446
+ dirname = os.path.dirname(module_path)
447
+ with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
448
+ with tempfile.NamedTemporaryFile() as output:
449
+ parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
450
+ root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
451
+
452
+ script.write(
453
+ venv_script.format(
454
+ parent_package_path=parent_package_path,
455
+ module_name=module_name,
456
+ module_class=extractor_class,
457
+ sample_path=sample_path,
458
+ output_path=output.name,
459
+ )
440
460
  )
441
- )
442
- script.flush()
443
- cwd = root_directory
444
- custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
445
-
446
- if custom_module.startswith("src."):
447
- # src layout found, which means the actual module content is within 'src' directory
448
- custom_module = custom_module[4:]
449
- cwd = os.path.join(cwd, "src")
450
-
451
- proc = subprocess.run(
452
- [python_exe, "-m", custom_module],
453
- cwd=cwd,
454
- capture_output=True,
455
- )
456
- try:
457
- # Load results and return them
458
- output.seek(0)
459
- return json.load(output, cls=json_decoder)
460
- except Exception:
461
- # If there was an error raised during runtime, then propagate
462
- delim = f'File "{module_path}"'
463
- exception = proc.stderr.decode()
464
- if delim in exception:
465
- exception = f"{delim}{exception.split(delim, 1)[1]}"
466
- raise Exception(exception)
461
+ script.flush()
462
+ cwd = root_directory
463
+ custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
464
+
465
+ if custom_module.startswith("src."):
466
+ # src layout found, which means the actual module content is within 'src' directory
467
+ custom_module = custom_module[4:]
468
+ cwd = os.path.join(cwd, "src")
469
+
470
+ # run the maco extractor in full venv process isolation (slow)
471
+ proc = subprocess.run(
472
+ [python_exe, "-m", custom_module],
473
+ cwd=cwd,
474
+ capture_output=True,
475
+ )
476
+ stderr = proc.stderr.decode()
477
+ try:
478
+ # Load results and return them
479
+ output.seek(0)
480
+ loaded = json.load(output, cls=json_decoder)
481
+ except Exception as e:
482
+ # If there was an error raised during runtime, then propagate
483
+ delim = f'File "{module_path}"'
484
+ exception = stderr
485
+ if delim in exception:
486
+ exception = f"{delim}{exception.split(delim, 1)[1]}"
487
+ # print extractor logging at error level
488
+ logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
489
+ raise Exception(exception) from e
490
+ # ensure that extractor logging is available
491
+ logger.info(f"maco extractor stderr:\n{stderr}")
492
+ return loaded
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Author: sl-govau
5
5
  Maintainer: cccs-rs
6
6
  License: MIT License
@@ -5,9 +5,11 @@ pyproject.toml
5
5
  requirements.txt
6
6
  tox.ini
7
7
  .vscode/settings.json
8
+ demo_extractors/__init__.py
8
9
  demo_extractors/elfy.py
9
10
  demo_extractors/limit_other.py
10
11
  demo_extractors/nothing.py
12
+ demo_extractors/requirements.txt
11
13
  demo_extractors/shared.py
12
14
  demo_extractors/complex/__init__.py
13
15
  demo_extractors/complex/complex.py
@@ -43,6 +45,7 @@ model_setup/maco/model/__init__.py
43
45
  model_setup/maco/model/model.py
44
46
  pipelines/publish.yaml
45
47
  pipelines/test.yaml
48
+ tests/benchmark.py
46
49
  tests/pytest.ini
47
50
  tests/requirements.txt
48
51
  tests/test_base_test.py
@@ -54,6 +57,7 @@ tests/test_helpers.py
54
57
  tests/test_model.py
55
58
  tests/data/example.txt.cart
56
59
  tests/data/trigger_complex.txt
60
+ tests/data/trigger_complex.txt.cart
57
61
  tests/extractors/__init__.py
58
62
  tests/extractors/basic.py
59
63
  tests/extractors/basic_longer.py
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool=False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
@@ -179,7 +179,7 @@ def main():
179
179
  parser.add_argument(
180
180
  "--create_venv",
181
181
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
182
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
183
183
  )
184
184
  args = parser.parse_args()
185
185
  inc = args.include.split(",") if args.include else []