maco 1.2.3__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- demo_extractors/__init__.py +0 -0
- demo_extractors/complex/complex.py +1 -2
- demo_extractors/limit_other.py +5 -2
- demo_extractors/requirements.txt +1 -0
- demo_extractors/shared.py +1 -0
- maco/base_test.py +15 -8
- maco/cli.py +1 -1
- maco/collector.py +19 -6
- maco/extractor.py +4 -4
- maco/utils.py +129 -103
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/METADATA +1 -1
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/RECORD +22 -19
- model_setup/maco/base_test.py +15 -8
- model_setup/maco/cli.py +1 -1
- model_setup/maco/collector.py +19 -6
- model_setup/maco/extractor.py +4 -4
- model_setup/maco/utils.py +129 -103
- tests/data/trigger_complex.txt.cart +0 -0
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/LICENSE.md +0 -0
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/WHEEL +0 -0
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/entry_points.txt +0 -0
- {maco-1.2.3.dist-info → maco-1.2.5.dist-info}/top_level.txt +0 -0
|
File without changes
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
|
+
from demo_extractors.complex import complex_utils
|
|
4
5
|
from maco import extractor, model, yara
|
|
5
6
|
|
|
6
|
-
from complex import complex_utils
|
|
7
|
-
|
|
8
7
|
|
|
9
8
|
class Complex(extractor.Extractor):
|
|
10
9
|
"""This script has multiple yara rules and coverage of the data model."""
|
demo_extractors/limit_other.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
|
+
from demo_extractors import shared
|
|
4
5
|
from maco import extractor, model, yara
|
|
5
6
|
|
|
6
|
-
from . import shared
|
|
7
|
-
|
|
8
7
|
|
|
9
8
|
class LimitOther(extractor.Extractor):
|
|
10
9
|
"""An example of how the 'other' dictionary can be limited in a custom way."""
|
|
@@ -24,6 +23,10 @@ class LimitOther(extractor.Extractor):
|
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
25
|
def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
|
|
26
|
+
# import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
|
|
27
|
+
# the tests that do direct importing
|
|
28
|
+
import httpx
|
|
29
|
+
|
|
27
30
|
# use a custom model that inherits from ExtractorModel
|
|
28
31
|
# this model defines what can go in the 'other' dict
|
|
29
32
|
tmp = shared.MyCustomModel(family="specify_other")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
httpx
|
demo_extractors/shared.py
CHANGED
maco/base_test.py
CHANGED
|
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
|
|
|
32
32
|
# I recommend something like os.path.join(__file__, "../../extractors")
|
|
33
33
|
# if your extractors are in a folder 'extractors' next to a folder of tests
|
|
34
34
|
path: str = None
|
|
35
|
+
create_venv: bool=False
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
@classmethod
|
|
38
|
+
def setUpClass(cls) -> None:
|
|
39
|
+
if not cls.name or not cls.path:
|
|
38
40
|
raise Exception("name and path must be set")
|
|
39
|
-
|
|
41
|
+
cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
|
|
42
|
+
return super().setUpClass()
|
|
43
|
+
|
|
44
|
+
def test_default_metadata(self):
|
|
45
|
+
"""Require extractor to be loadable and valid."""
|
|
40
46
|
self.assertIn(self.name, self.c.extractors)
|
|
41
47
|
self.assertEqual(len(self.c.extractors), 1)
|
|
42
|
-
return super().setUp()
|
|
43
48
|
|
|
44
49
|
def extract(self, stream):
|
|
45
50
|
"""Return results for running extractor over stream, including yara check."""
|
|
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
|
|
|
49
54
|
resp = self.c.extract(stream, self.name)
|
|
50
55
|
return resp
|
|
51
56
|
|
|
52
|
-
|
|
57
|
+
@classmethod
|
|
58
|
+
def _get_location(cls) -> str:
|
|
53
59
|
"""Return path to child class that implements this class."""
|
|
54
60
|
# import child module
|
|
55
|
-
module =
|
|
61
|
+
module = cls.__module__
|
|
56
62
|
i = importlib.import_module(module)
|
|
57
63
|
# get location to child module
|
|
58
64
|
return i.__file__
|
|
59
65
|
|
|
60
|
-
|
|
66
|
+
@classmethod
|
|
67
|
+
def load_cart(cls, filepath: str) -> io.BytesIO:
|
|
61
68
|
"""Load and unneuter a test file (likely malware) into memory for processing."""
|
|
62
69
|
# it is nice if we can load files relative to whatever is implementing base_test
|
|
63
|
-
dirpath = os.path.split(
|
|
70
|
+
dirpath = os.path.split(cls._get_location())[0]
|
|
64
71
|
# either filepath is absolute, or should be loaded relative to child of base_test
|
|
65
72
|
filepath = os.path.join(dirpath, filepath)
|
|
66
73
|
if not os.path.isfile(filepath):
|
maco/cli.py
CHANGED
|
@@ -179,7 +179,7 @@ def main():
|
|
|
179
179
|
parser.add_argument(
|
|
180
180
|
"--create_venv",
|
|
181
181
|
action="store_true",
|
|
182
|
-
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
|
|
182
|
+
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
|
|
183
183
|
)
|
|
184
184
|
args = parser.parse_args()
|
|
185
185
|
inc = args.include.split(",") if args.include else []
|
maco/collector.py
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
+
import logging.handlers
|
|
5
6
|
import os
|
|
6
|
-
from multiprocessing import Manager, Process
|
|
7
|
+
from multiprocessing import Manager, Process, Queue
|
|
7
8
|
from tempfile import NamedTemporaryFile
|
|
8
9
|
from types import ModuleType
|
|
9
10
|
from typing import Any, BinaryIO, Dict, List, Union
|
|
@@ -86,21 +87,33 @@ class Collector:
|
|
|
86
87
|
)
|
|
87
88
|
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
88
89
|
|
|
90
|
+
# multiprocess logging is awkward - set up a queue to ensure we can log
|
|
91
|
+
logging_queue = Queue()
|
|
92
|
+
queue_handler = logging.handlers.QueueListener(logging_queue,*logging.getLogger().handlers)
|
|
93
|
+
queue_handler.start()
|
|
94
|
+
|
|
89
95
|
# Find the extractors within the given directory
|
|
90
96
|
# Execute within a child process to ensure main process interpreter is kept clean
|
|
91
97
|
p = Process(
|
|
92
|
-
target=utils.
|
|
98
|
+
target=utils.proxy_logging,
|
|
93
99
|
args=(
|
|
94
|
-
|
|
95
|
-
|
|
100
|
+
logging_queue,
|
|
101
|
+
utils.import_extractors,
|
|
96
102
|
extractor_module_callback,
|
|
97
|
-
|
|
98
|
-
|
|
103
|
+
),
|
|
104
|
+
kwargs=dict(
|
|
105
|
+
root_directory=path_extractors,
|
|
106
|
+
scanner=yara.compile(source=utils.MACO_YARA_RULE),
|
|
107
|
+
create_venv=create_venv and os.path.isdir(path_extractors),
|
|
99
108
|
),
|
|
100
109
|
)
|
|
101
110
|
p.start()
|
|
102
111
|
p.join()
|
|
103
112
|
|
|
113
|
+
# stop multiprocess logging
|
|
114
|
+
queue_handler.stop()
|
|
115
|
+
logging_queue.close()
|
|
116
|
+
|
|
104
117
|
self.extractors = dict(extractors)
|
|
105
118
|
if not self.extractors:
|
|
106
119
|
raise ExtractorLoadError("no extractors were loaded")
|
maco/extractor.py
CHANGED
|
@@ -51,14 +51,14 @@ class Extractor:
|
|
|
51
51
|
# check yara rules conform to expected structure
|
|
52
52
|
# we throw away these compiled rules as we need all rules in system compiled together
|
|
53
53
|
try:
|
|
54
|
-
|
|
54
|
+
self.yara_compiled = yara.compile(source=self.yara_rule)
|
|
55
55
|
except yara.SyntaxError as e:
|
|
56
56
|
raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
|
|
57
57
|
# need to track which plugin owns the rules
|
|
58
|
-
self.yara_rule_names = [x.identifier for x in
|
|
59
|
-
if not len(list(
|
|
58
|
+
self.yara_rule_names = [x.identifier for x in self.yara_compiled]
|
|
59
|
+
if not len(list(self.yara_compiled)):
|
|
60
60
|
raise InvalidExtractor(f"{name} must define at least one yara rule")
|
|
61
|
-
for x in
|
|
61
|
+
for x in self.yara_compiled:
|
|
62
62
|
if x.is_global:
|
|
63
63
|
raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
|
|
64
64
|
|
maco/utils.py
CHANGED
|
@@ -4,6 +4,9 @@ import importlib.machinery
|
|
|
4
4
|
import importlib.util
|
|
5
5
|
import inspect
|
|
6
6
|
import json
|
|
7
|
+
import logging
|
|
8
|
+
import logging.handlers
|
|
9
|
+
import multiprocessing
|
|
7
10
|
import os
|
|
8
11
|
import re
|
|
9
12
|
import shutil
|
|
@@ -24,32 +27,24 @@ from glob import glob
|
|
|
24
27
|
from logging import Logger
|
|
25
28
|
from pkgutil import walk_packages
|
|
26
29
|
from types import ModuleType
|
|
27
|
-
from typing import Callable, Dict, List, Set, Tuple
|
|
30
|
+
from typing import Callable, Dict, List, Set, Tuple, Union
|
|
28
31
|
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
VENV_DIRECTORY_NAME = ".venv"
|
|
32
|
+
from uv import find_uv_bin
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
from maco import model
|
|
35
|
+
from maco.extractor import Extractor
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
# Attempt to use the uv package manager (Recommended)
|
|
38
|
-
from uv import find_uv_bin
|
|
37
|
+
logger = logging.getLogger("maco.lib.utils")
|
|
39
38
|
|
|
40
|
-
|
|
39
|
+
VENV_DIRECTORY_NAME = ".venv"
|
|
41
40
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
PACKAGE_MANAGER = "uv"
|
|
45
|
-
except ImportError:
|
|
46
|
-
# Otherwise default to pip
|
|
47
|
-
from sys import executable
|
|
41
|
+
RELATIVE_FROM_RE = re.compile(r"from (\.+)")
|
|
42
|
+
RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
VENV_CREATE_CMD = f"{executable} -m venv"
|
|
51
|
-
PACKAGE_MANAGER = "pip"
|
|
44
|
+
UV_BIN = find_uv_bin()
|
|
52
45
|
|
|
46
|
+
PIP_CMD = f"{UV_BIN} pip"
|
|
47
|
+
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
53
48
|
|
|
54
49
|
class Base64Decoder(json.JSONDecoder):
|
|
55
50
|
def __init__(self, *args, **kwargs):
|
|
@@ -69,6 +64,7 @@ import importlib
|
|
|
69
64
|
import json
|
|
70
65
|
import os
|
|
71
66
|
import sys
|
|
67
|
+
import logging
|
|
72
68
|
|
|
73
69
|
try:
|
|
74
70
|
from maco import yara
|
|
@@ -76,6 +72,19 @@ except:
|
|
|
76
72
|
import yara
|
|
77
73
|
|
|
78
74
|
from base64 import b64encode
|
|
75
|
+
|
|
76
|
+
# ensure we have a logger to stderr
|
|
77
|
+
import logging
|
|
78
|
+
logger = logging.getLogger()
|
|
79
|
+
logger.setLevel(logging.DEBUG)
|
|
80
|
+
sh = logging.StreamHandler()
|
|
81
|
+
logger.addHandler(sh)
|
|
82
|
+
sh.setLevel(logging.DEBUG)
|
|
83
|
+
formatter = logging.Formatter(
|
|
84
|
+
fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
|
|
85
|
+
)
|
|
86
|
+
sh.setFormatter(formatter)
|
|
87
|
+
|
|
79
88
|
parent_package_path = "{parent_package_path}"
|
|
80
89
|
sys.path.insert(1, parent_package_path)
|
|
81
90
|
mod = importlib.import_module("{module_name}")
|
|
@@ -101,7 +110,7 @@ with open("{output_path}", 'w') as fp:
|
|
|
101
110
|
json.dump(result.dict(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
|
|
102
111
|
"""
|
|
103
112
|
|
|
104
|
-
MACO_YARA_RULE = """
|
|
113
|
+
MACO_YARA_RULE = r"""
|
|
105
114
|
rule MACO {
|
|
106
115
|
meta:
|
|
107
116
|
desc = "Used to match on Python files that contain MACO extractors"
|
|
@@ -191,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
|
|
|
191
200
|
return extractor_dirs, extractor_files
|
|
192
201
|
|
|
193
202
|
|
|
194
|
-
def
|
|
203
|
+
def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
|
|
195
204
|
venvs = []
|
|
196
|
-
logger.info("Creating virtual environment(s)..")
|
|
197
205
|
env = deepcopy(os.environ)
|
|
198
206
|
stop_directory = os.path.dirname(sorted(directories)[0])
|
|
199
207
|
# Track directories that we've already visited
|
|
@@ -203,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
203
211
|
while dir != stop_directory and dir not in visited_dirs:
|
|
204
212
|
req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
|
|
205
213
|
if req_files:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
+
# create a virtual environment, otherwise directly install into current env
|
|
215
|
+
if create_venv:
|
|
216
|
+
venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
|
|
217
|
+
logger.info(f"Updating virtual environment {venv_path}")
|
|
218
|
+
env.update({"VIRTUAL_ENV": venv_path})
|
|
219
|
+
# Create a virtual environment for the directory
|
|
220
|
+
if not os.path.exists(venv_path):
|
|
221
|
+
cmd = f"{VENV_CREATE_CMD} --python {python_version}"
|
|
222
|
+
subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
|
|
214
223
|
|
|
215
224
|
# Install/Update the packages in the environment
|
|
216
225
|
install_command = PIP_CMD.split(" ") + ["install", "-U"]
|
|
@@ -234,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
234
243
|
|
|
235
244
|
install_command.extend(pyproject_command)
|
|
236
245
|
|
|
246
|
+
# always require maco to be installed
|
|
247
|
+
install_command.append("maco")
|
|
237
248
|
logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
|
|
249
|
+
# this uses VIRTUAL_ENV to control usage of a virtual environment
|
|
238
250
|
p = subprocess.run(
|
|
239
251
|
install_command,
|
|
240
252
|
cwd=dir,
|
|
@@ -245,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
245
257
|
if b"is being installed using the legacy" in p.stderr:
|
|
246
258
|
# Ignore these types of errors
|
|
247
259
|
continue
|
|
248
|
-
logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
|
|
260
|
+
logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
249
261
|
else:
|
|
250
|
-
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
251
|
-
|
|
262
|
+
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
263
|
+
if create_venv:
|
|
264
|
+
venvs.append(venv_path)
|
|
252
265
|
|
|
253
266
|
# Cleanup any build directories that are the product of package installation
|
|
254
267
|
expected_build_path = os.path.join(dir, "build")
|
|
@@ -292,15 +305,10 @@ def register_extractors(
|
|
|
292
305
|
):
|
|
293
306
|
package_name = os.path.basename(current_directory)
|
|
294
307
|
parent_directory = os.path.dirname(current_directory)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
# We'll need to create a link back to the original
|
|
301
|
-
if package_name not in sys.modules:
|
|
302
|
-
symlink = os.path.join(parent_directory, package_name)
|
|
303
|
-
os.symlink(current_directory, symlink)
|
|
308
|
+
if venvs and package_name in sys.modules:
|
|
309
|
+
# this may happen as part of testing if some part of the extractor code was directly imported
|
|
310
|
+
logger.warning(f"Looks like {package_name} is already loaded. "
|
|
311
|
+
"If your maco extractor overlaps an existing package name this could cause problems.")
|
|
304
312
|
|
|
305
313
|
try:
|
|
306
314
|
# Modify the PATH so we can recognize this new package on import
|
|
@@ -351,10 +359,6 @@ def register_extractors(
|
|
|
351
359
|
# Remove any modules that were loaded to deconflict with later modules loads
|
|
352
360
|
[sys.modules.pop(k) for k in set(sys.modules.keys()) - default_loaded_modules]
|
|
353
361
|
|
|
354
|
-
# Cleanup any symlinks
|
|
355
|
-
if symlink:
|
|
356
|
-
os.remove(symlink)
|
|
357
|
-
|
|
358
362
|
# If there still exists extractor files we haven't found yet, try searching in the available subdirectories
|
|
359
363
|
if extractor_files:
|
|
360
364
|
for dir in os.listdir(current_directory):
|
|
@@ -379,13 +383,21 @@ def register_extractors(
|
|
|
379
383
|
# We were able to find all the extractor files
|
|
380
384
|
break
|
|
381
385
|
|
|
386
|
+
def proxy_logging(queue: multiprocessing.Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
|
|
387
|
+
"""Ensures logging is set up correctly for a child process and then executes the callback."""
|
|
388
|
+
logger = logging.getLogger()
|
|
389
|
+
qh = logging.handlers.QueueHandler(queue)
|
|
390
|
+
qh.setLevel(logging.DEBUG)
|
|
391
|
+
logger.addHandler(qh)
|
|
392
|
+
callback(*args, **kwargs, logger=logger)
|
|
382
393
|
|
|
383
394
|
def import_extractors(
|
|
395
|
+
extractor_module_callback: Callable[[ModuleType, str], bool],
|
|
396
|
+
*,
|
|
384
397
|
root_directory: str,
|
|
385
398
|
scanner: yara.Rules,
|
|
386
|
-
|
|
399
|
+
create_venv: bool,
|
|
387
400
|
logger: Logger,
|
|
388
|
-
create_venv: bool = False,
|
|
389
401
|
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
390
402
|
):
|
|
391
403
|
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
@@ -393,23 +405,17 @@ def import_extractors(
|
|
|
393
405
|
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
394
406
|
logger.debug(extractor_files)
|
|
395
407
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
venvs = create_virtual_environments(extractor_dirs, python_version, logger)
|
|
399
|
-
else:
|
|
400
|
-
# Look for pre-existing virtual environments, if any
|
|
401
|
-
logger.info("Checking for pre-existing virtual environment(s)..")
|
|
402
|
-
venvs = [
|
|
403
|
-
os.path.join(root, VENV_DIRECTORY_NAME)
|
|
404
|
-
for root, dirs, _ in os.walk(root_directory)
|
|
405
|
-
if VENV_DIRECTORY_NAME in dirs
|
|
406
|
-
]
|
|
408
|
+
# Install packages into the current environment or dynamically created virtual environments
|
|
409
|
+
venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
|
|
407
410
|
|
|
408
411
|
# With the environment prepared, we can now hunt for the extractors and register them
|
|
409
412
|
logger.info("Registering extractors..")
|
|
410
413
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
411
414
|
|
|
412
415
|
|
|
416
|
+
# holds cached extractors when not running in venv mode
|
|
417
|
+
_loaded_extractors: Dict[str, Extractor] = {}
|
|
418
|
+
|
|
413
419
|
def run_extractor(
|
|
414
420
|
sample_path,
|
|
415
421
|
module_name,
|
|
@@ -418,49 +424,69 @@ def run_extractor(
|
|
|
418
424
|
venv,
|
|
419
425
|
venv_script=VENV_SCRIPT,
|
|
420
426
|
json_decoder=Base64Decoder,
|
|
421
|
-
) -> Dict[str, dict]:
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
427
|
+
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
428
|
+
"""Runs the maco extractor against sample either in current process or child process."""
|
|
429
|
+
if not venv:
|
|
430
|
+
key = f"{module_name}_{extractor_class}"
|
|
431
|
+
if key not in _loaded_extractors:
|
|
432
|
+
# dynamic import of extractor
|
|
433
|
+
mod = importlib.import_module(module_name)
|
|
434
|
+
extractor_cls = mod.__getattribute__(extractor_class)
|
|
435
|
+
extractor = extractor_cls()
|
|
436
|
+
else:
|
|
437
|
+
# retrieve cached extractor
|
|
438
|
+
extractor = _loaded_extractors[key]
|
|
439
|
+
if extractor.yara_compiled:
|
|
440
|
+
matches = extractor.yara_compiled.match(sample_path)
|
|
441
|
+
loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
|
|
442
|
+
else:
|
|
443
|
+
# execute extractor in child process with separate virtual environment
|
|
444
|
+
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
426
445
|
python_exe = os.path.join(venv, "bin", "python")
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
446
|
+
dirname = os.path.dirname(module_path)
|
|
447
|
+
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
448
|
+
with tempfile.NamedTemporaryFile() as output:
|
|
449
|
+
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
450
|
+
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
451
|
+
|
|
452
|
+
script.write(
|
|
453
|
+
venv_script.format(
|
|
454
|
+
parent_package_path=parent_package_path,
|
|
455
|
+
module_name=module_name,
|
|
456
|
+
module_class=extractor_class,
|
|
457
|
+
sample_path=sample_path,
|
|
458
|
+
output_path=output.name,
|
|
459
|
+
)
|
|
440
460
|
)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
461
|
+
script.flush()
|
|
462
|
+
cwd = root_directory
|
|
463
|
+
custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
|
|
464
|
+
|
|
465
|
+
if custom_module.startswith("src."):
|
|
466
|
+
# src layout found, which means the actual module content is within 'src' directory
|
|
467
|
+
custom_module = custom_module[4:]
|
|
468
|
+
cwd = os.path.join(cwd, "src")
|
|
469
|
+
|
|
470
|
+
# run the maco extractor in full venv process isolation (slow)
|
|
471
|
+
proc = subprocess.run(
|
|
472
|
+
[python_exe, "-m", custom_module],
|
|
473
|
+
cwd=cwd,
|
|
474
|
+
capture_output=True,
|
|
475
|
+
)
|
|
476
|
+
stderr = proc.stderr.decode()
|
|
477
|
+
try:
|
|
478
|
+
# Load results and return them
|
|
479
|
+
output.seek(0)
|
|
480
|
+
loaded = json.load(output, cls=json_decoder)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
# If there was an error raised during runtime, then propagate
|
|
483
|
+
delim = f'File "{module_path}"'
|
|
484
|
+
exception = stderr
|
|
485
|
+
if delim in exception:
|
|
486
|
+
exception = f"{delim}{exception.split(delim, 1)[1]}"
|
|
487
|
+
# print extractor logging at error level
|
|
488
|
+
logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
|
|
489
|
+
raise Exception(exception) from e
|
|
490
|
+
# ensure that extractor logging is available
|
|
491
|
+
logger.info(f"maco extractor stderr:\n{stderr}")
|
|
492
|
+
return loaded
|
|
@@ -1,25 +1,27 @@
|
|
|
1
|
+
demo_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1
2
|
demo_extractors/elfy.py,sha256=AAFr5i1aivPwO4nycyXJEud57EpVNA-5k_2GicWesbY,771
|
|
2
|
-
demo_extractors/limit_other.py,sha256=
|
|
3
|
+
demo_extractors/limit_other.py,sha256=8Z7X0cXUyZuK3MhDtObMWmdruRj5hgFdDi_VVGXqRx4,1123
|
|
3
4
|
demo_extractors/nothing.py,sha256=3aeQJTY-dakmVXmyfmrRM8YCQVT7q3bq880DFH1Ol_Y,607
|
|
4
|
-
demo_extractors/
|
|
5
|
+
demo_extractors/requirements.txt,sha256=E0tD6xBZldq6sQGTHng6k88lBeASOhmLJcdcjpcqBNE,6
|
|
6
|
+
demo_extractors/shared.py,sha256=2P1cyuRbHDvM9IRt3UZnwdyhxx7OWqNC83xLyV8Y190,305
|
|
5
7
|
demo_extractors/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
demo_extractors/complex/complex.py,sha256=
|
|
8
|
+
demo_extractors/complex/complex.py,sha256=tXrzj_zWIXbTOwj7Lezapk-qkrM-lfwcyjd5m-BYzdg,2322
|
|
7
9
|
demo_extractors/complex/complex_utils.py,sha256=aec8kJsYUrMPo-waihkVLt-0QpiOPkw7dDqfT9MNuHk,123
|
|
8
10
|
maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
maco/base_test.py,sha256=
|
|
10
|
-
maco/cli.py,sha256=
|
|
11
|
-
maco/collector.py,sha256=
|
|
12
|
-
maco/extractor.py,sha256=
|
|
13
|
-
maco/utils.py,sha256=
|
|
11
|
+
maco/base_test.py,sha256=EPxCun9Tv91V-lFpaenn14tPyW17TPvXVH4AjE3t6js,2716
|
|
12
|
+
maco/cli.py,sha256=fIeUXOgOxcecmAkl6OAdnjBKqk1gBPv1ryWe50pT60g,8135
|
|
13
|
+
maco/collector.py,sha256=Vlo7KcJC7TKZFTElv8i_f_hvWEnlWCRzOP1xOc9x7vk,6532
|
|
14
|
+
maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
|
|
15
|
+
maco/utils.py,sha256=K41c-H7naaoiEYf0WNfP054IxwvHPujsbmmzgTizuLU,20159
|
|
14
16
|
maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
|
|
15
17
|
maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
|
|
16
18
|
maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
|
|
17
19
|
model_setup/maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
model_setup/maco/base_test.py,sha256=
|
|
19
|
-
model_setup/maco/cli.py,sha256=
|
|
20
|
-
model_setup/maco/collector.py,sha256=
|
|
21
|
-
model_setup/maco/extractor.py,sha256=
|
|
22
|
-
model_setup/maco/utils.py,sha256=
|
|
20
|
+
model_setup/maco/base_test.py,sha256=EPxCun9Tv91V-lFpaenn14tPyW17TPvXVH4AjE3t6js,2716
|
|
21
|
+
model_setup/maco/cli.py,sha256=fIeUXOgOxcecmAkl6OAdnjBKqk1gBPv1ryWe50pT60g,8135
|
|
22
|
+
model_setup/maco/collector.py,sha256=Vlo7KcJC7TKZFTElv8i_f_hvWEnlWCRzOP1xOc9x7vk,6532
|
|
23
|
+
model_setup/maco/extractor.py,sha256=uGSGiCQ4jd8jFmfw2T99BGcY5iQJzXHcG_RoTIxClTE,2802
|
|
24
|
+
model_setup/maco/utils.py,sha256=K41c-H7naaoiEYf0WNfP054IxwvHPujsbmmzgTizuLU,20159
|
|
23
25
|
model_setup/maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
|
|
24
26
|
model_setup/maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
|
|
25
27
|
model_setup/maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
|
|
@@ -27,15 +29,16 @@ pipelines/publish.yaml,sha256=xt3WNU-5kIICJgKIiiE94M3dWjS3uEiun-n4OmIssK8,1471
|
|
|
27
29
|
pipelines/test.yaml,sha256=3KOoo-8SqP_bTAscsz5V3xxnuL91J-62mTjnQD1Btag,1019
|
|
28
30
|
tests/data/example.txt.cart,sha256=j4ZdDnFNVq7lb-Qi4pY4evOXKQPKG-GSg-n-uEqPhV0,289
|
|
29
31
|
tests/data/trigger_complex.txt,sha256=uqnLSrnyDGCmXwuPmZ2s8vdhH0hJs8DxvyaW_tuYY24,64
|
|
32
|
+
tests/data/trigger_complex.txt.cart,sha256=Z7qF1Zi640O45Znkl9ooP2RhSLAEqY0NRf51d-q7utU,345
|
|
30
33
|
tests/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
34
|
tests/extractors/basic.py,sha256=r5eLCL6Ynr14nCBgtbLvUbm0NdrXizyc9c-4xBCNShU,828
|
|
32
35
|
tests/extractors/basic_longer.py,sha256=1ClU2QD-Y0TOl_loNFvEqIEpTR5TSVJ6zg9ZmC-ESJo,860
|
|
33
36
|
tests/extractors/test_basic.py,sha256=FLKekfSGM69HaiF7Vu_7D7KDXHZko-9hZkMO8_DoyYA,697
|
|
34
37
|
tests/extractors/bob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
38
|
tests/extractors/bob/bob.py,sha256=Gy5p8KssJX87cwa9vVv8UBODF_ulbUteZXh15frW2hs,247
|
|
36
|
-
maco-1.2.
|
|
37
|
-
maco-1.2.
|
|
38
|
-
maco-1.2.
|
|
39
|
-
maco-1.2.
|
|
40
|
-
maco-1.2.
|
|
41
|
-
maco-1.2.
|
|
39
|
+
maco-1.2.5.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
|
|
40
|
+
maco-1.2.5.dist-info/METADATA,sha256=cJ7x_shBhDgKVjkq_e2d94aj3qiUzi0lt7f3lPO334U,15610
|
|
41
|
+
maco-1.2.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
42
|
+
maco-1.2.5.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
|
|
43
|
+
maco-1.2.5.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
|
|
44
|
+
maco-1.2.5.dist-info/RECORD,,
|
model_setup/maco/base_test.py
CHANGED
|
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
|
|
|
32
32
|
# I recommend something like os.path.join(__file__, "../../extractors")
|
|
33
33
|
# if your extractors are in a folder 'extractors' next to a folder of tests
|
|
34
34
|
path: str = None
|
|
35
|
+
create_venv: bool=False
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
@classmethod
|
|
38
|
+
def setUpClass(cls) -> None:
|
|
39
|
+
if not cls.name or not cls.path:
|
|
38
40
|
raise Exception("name and path must be set")
|
|
39
|
-
|
|
41
|
+
cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
|
|
42
|
+
return super().setUpClass()
|
|
43
|
+
|
|
44
|
+
def test_default_metadata(self):
|
|
45
|
+
"""Require extractor to be loadable and valid."""
|
|
40
46
|
self.assertIn(self.name, self.c.extractors)
|
|
41
47
|
self.assertEqual(len(self.c.extractors), 1)
|
|
42
|
-
return super().setUp()
|
|
43
48
|
|
|
44
49
|
def extract(self, stream):
|
|
45
50
|
"""Return results for running extractor over stream, including yara check."""
|
|
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
|
|
|
49
54
|
resp = self.c.extract(stream, self.name)
|
|
50
55
|
return resp
|
|
51
56
|
|
|
52
|
-
|
|
57
|
+
@classmethod
|
|
58
|
+
def _get_location(cls) -> str:
|
|
53
59
|
"""Return path to child class that implements this class."""
|
|
54
60
|
# import child module
|
|
55
|
-
module =
|
|
61
|
+
module = cls.__module__
|
|
56
62
|
i = importlib.import_module(module)
|
|
57
63
|
# get location to child module
|
|
58
64
|
return i.__file__
|
|
59
65
|
|
|
60
|
-
|
|
66
|
+
@classmethod
|
|
67
|
+
def load_cart(cls, filepath: str) -> io.BytesIO:
|
|
61
68
|
"""Load and unneuter a test file (likely malware) into memory for processing."""
|
|
62
69
|
# it is nice if we can load files relative to whatever is implementing base_test
|
|
63
|
-
dirpath = os.path.split(
|
|
70
|
+
dirpath = os.path.split(cls._get_location())[0]
|
|
64
71
|
# either filepath is absolute, or should be loaded relative to child of base_test
|
|
65
72
|
filepath = os.path.join(dirpath, filepath)
|
|
66
73
|
if not os.path.isfile(filepath):
|
model_setup/maco/cli.py
CHANGED
|
@@ -179,7 +179,7 @@ def main():
|
|
|
179
179
|
parser.add_argument(
|
|
180
180
|
"--create_venv",
|
|
181
181
|
action="store_true",
|
|
182
|
-
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
|
|
182
|
+
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
|
|
183
183
|
)
|
|
184
184
|
args = parser.parse_args()
|
|
185
185
|
inc = args.include.split(",") if args.include else []
|
model_setup/maco/collector.py
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
+
import logging.handlers
|
|
5
6
|
import os
|
|
6
|
-
from multiprocessing import Manager, Process
|
|
7
|
+
from multiprocessing import Manager, Process, Queue
|
|
7
8
|
from tempfile import NamedTemporaryFile
|
|
8
9
|
from types import ModuleType
|
|
9
10
|
from typing import Any, BinaryIO, Dict, List, Union
|
|
@@ -86,21 +87,33 @@ class Collector:
|
|
|
86
87
|
)
|
|
87
88
|
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
88
89
|
|
|
90
|
+
# multiprocess logging is awkward - set up a queue to ensure we can log
|
|
91
|
+
logging_queue = Queue()
|
|
92
|
+
queue_handler = logging.handlers.QueueListener(logging_queue,*logging.getLogger().handlers)
|
|
93
|
+
queue_handler.start()
|
|
94
|
+
|
|
89
95
|
# Find the extractors within the given directory
|
|
90
96
|
# Execute within a child process to ensure main process interpreter is kept clean
|
|
91
97
|
p = Process(
|
|
92
|
-
target=utils.
|
|
98
|
+
target=utils.proxy_logging,
|
|
93
99
|
args=(
|
|
94
|
-
|
|
95
|
-
|
|
100
|
+
logging_queue,
|
|
101
|
+
utils.import_extractors,
|
|
96
102
|
extractor_module_callback,
|
|
97
|
-
|
|
98
|
-
|
|
103
|
+
),
|
|
104
|
+
kwargs=dict(
|
|
105
|
+
root_directory=path_extractors,
|
|
106
|
+
scanner=yara.compile(source=utils.MACO_YARA_RULE),
|
|
107
|
+
create_venv=create_venv and os.path.isdir(path_extractors),
|
|
99
108
|
),
|
|
100
109
|
)
|
|
101
110
|
p.start()
|
|
102
111
|
p.join()
|
|
103
112
|
|
|
113
|
+
# stop multiprocess logging
|
|
114
|
+
queue_handler.stop()
|
|
115
|
+
logging_queue.close()
|
|
116
|
+
|
|
104
117
|
self.extractors = dict(extractors)
|
|
105
118
|
if not self.extractors:
|
|
106
119
|
raise ExtractorLoadError("no extractors were loaded")
|
model_setup/maco/extractor.py
CHANGED
|
@@ -51,14 +51,14 @@ class Extractor:
|
|
|
51
51
|
# check yara rules conform to expected structure
|
|
52
52
|
# we throw away these compiled rules as we need all rules in system compiled together
|
|
53
53
|
try:
|
|
54
|
-
|
|
54
|
+
self.yara_compiled = yara.compile(source=self.yara_rule)
|
|
55
55
|
except yara.SyntaxError as e:
|
|
56
56
|
raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
|
|
57
57
|
# need to track which plugin owns the rules
|
|
58
|
-
self.yara_rule_names = [x.identifier for x in
|
|
59
|
-
if not len(list(
|
|
58
|
+
self.yara_rule_names = [x.identifier for x in self.yara_compiled]
|
|
59
|
+
if not len(list(self.yara_compiled)):
|
|
60
60
|
raise InvalidExtractor(f"{name} must define at least one yara rule")
|
|
61
|
-
for x in
|
|
61
|
+
for x in self.yara_compiled:
|
|
62
62
|
if x.is_global:
|
|
63
63
|
raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
|
|
64
64
|
|
model_setup/maco/utils.py
CHANGED
|
@@ -4,6 +4,9 @@ import importlib.machinery
|
|
|
4
4
|
import importlib.util
|
|
5
5
|
import inspect
|
|
6
6
|
import json
|
|
7
|
+
import logging
|
|
8
|
+
import logging.handlers
|
|
9
|
+
import multiprocessing
|
|
7
10
|
import os
|
|
8
11
|
import re
|
|
9
12
|
import shutil
|
|
@@ -24,32 +27,24 @@ from glob import glob
|
|
|
24
27
|
from logging import Logger
|
|
25
28
|
from pkgutil import walk_packages
|
|
26
29
|
from types import ModuleType
|
|
27
|
-
from typing import Callable, Dict, List, Set, Tuple
|
|
30
|
+
from typing import Callable, Dict, List, Set, Tuple, Union
|
|
28
31
|
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
VENV_DIRECTORY_NAME = ".venv"
|
|
32
|
+
from uv import find_uv_bin
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
from maco import model
|
|
35
|
+
from maco.extractor import Extractor
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
# Attempt to use the uv package manager (Recommended)
|
|
38
|
-
from uv import find_uv_bin
|
|
37
|
+
logger = logging.getLogger("maco.lib.utils")
|
|
39
38
|
|
|
40
|
-
|
|
39
|
+
VENV_DIRECTORY_NAME = ".venv"
|
|
41
40
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
PACKAGE_MANAGER = "uv"
|
|
45
|
-
except ImportError:
|
|
46
|
-
# Otherwise default to pip
|
|
47
|
-
from sys import executable
|
|
41
|
+
RELATIVE_FROM_RE = re.compile(r"from (\.+)")
|
|
42
|
+
RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
VENV_CREATE_CMD = f"{executable} -m venv"
|
|
51
|
-
PACKAGE_MANAGER = "pip"
|
|
44
|
+
UV_BIN = find_uv_bin()
|
|
52
45
|
|
|
46
|
+
PIP_CMD = f"{UV_BIN} pip"
|
|
47
|
+
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
53
48
|
|
|
54
49
|
class Base64Decoder(json.JSONDecoder):
|
|
55
50
|
def __init__(self, *args, **kwargs):
|
|
@@ -69,6 +64,7 @@ import importlib
|
|
|
69
64
|
import json
|
|
70
65
|
import os
|
|
71
66
|
import sys
|
|
67
|
+
import logging
|
|
72
68
|
|
|
73
69
|
try:
|
|
74
70
|
from maco import yara
|
|
@@ -76,6 +72,19 @@ except:
|
|
|
76
72
|
import yara
|
|
77
73
|
|
|
78
74
|
from base64 import b64encode
|
|
75
|
+
|
|
76
|
+
# ensure we have a logger to stderr
|
|
77
|
+
import logging
|
|
78
|
+
logger = logging.getLogger()
|
|
79
|
+
logger.setLevel(logging.DEBUG)
|
|
80
|
+
sh = logging.StreamHandler()
|
|
81
|
+
logger.addHandler(sh)
|
|
82
|
+
sh.setLevel(logging.DEBUG)
|
|
83
|
+
formatter = logging.Formatter(
|
|
84
|
+
fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
|
|
85
|
+
)
|
|
86
|
+
sh.setFormatter(formatter)
|
|
87
|
+
|
|
79
88
|
parent_package_path = "{parent_package_path}"
|
|
80
89
|
sys.path.insert(1, parent_package_path)
|
|
81
90
|
mod = importlib.import_module("{module_name}")
|
|
@@ -101,7 +110,7 @@ with open("{output_path}", 'w') as fp:
|
|
|
101
110
|
json.dump(result.dict(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
|
|
102
111
|
"""
|
|
103
112
|
|
|
104
|
-
MACO_YARA_RULE = """
|
|
113
|
+
MACO_YARA_RULE = r"""
|
|
105
114
|
rule MACO {
|
|
106
115
|
meta:
|
|
107
116
|
desc = "Used to match on Python files that contain MACO extractors"
|
|
@@ -191,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
|
|
|
191
200
|
return extractor_dirs, extractor_files
|
|
192
201
|
|
|
193
202
|
|
|
194
|
-
def
|
|
203
|
+
def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
|
|
195
204
|
venvs = []
|
|
196
|
-
logger.info("Creating virtual environment(s)..")
|
|
197
205
|
env = deepcopy(os.environ)
|
|
198
206
|
stop_directory = os.path.dirname(sorted(directories)[0])
|
|
199
207
|
# Track directories that we've already visited
|
|
@@ -203,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
203
211
|
while dir != stop_directory and dir not in visited_dirs:
|
|
204
212
|
req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
|
|
205
213
|
if req_files:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
+
# create a virtual environment, otherwise directly install into current env
|
|
215
|
+
if create_venv:
|
|
216
|
+
venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
|
|
217
|
+
logger.info(f"Updating virtual environment {venv_path}")
|
|
218
|
+
env.update({"VIRTUAL_ENV": venv_path})
|
|
219
|
+
# Create a virtual environment for the directory
|
|
220
|
+
if not os.path.exists(venv_path):
|
|
221
|
+
cmd = f"{VENV_CREATE_CMD} --python {python_version}"
|
|
222
|
+
subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
|
|
214
223
|
|
|
215
224
|
# Install/Update the packages in the environment
|
|
216
225
|
install_command = PIP_CMD.split(" ") + ["install", "-U"]
|
|
@@ -234,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
234
243
|
|
|
235
244
|
install_command.extend(pyproject_command)
|
|
236
245
|
|
|
246
|
+
# always require maco to be installed
|
|
247
|
+
install_command.append("maco")
|
|
237
248
|
logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
|
|
249
|
+
# this uses VIRTUAL_ENV to control usage of a virtual environment
|
|
238
250
|
p = subprocess.run(
|
|
239
251
|
install_command,
|
|
240
252
|
cwd=dir,
|
|
@@ -245,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
245
257
|
if b"is being installed using the legacy" in p.stderr:
|
|
246
258
|
# Ignore these types of errors
|
|
247
259
|
continue
|
|
248
|
-
logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
|
|
260
|
+
logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
249
261
|
else:
|
|
250
|
-
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
251
|
-
|
|
262
|
+
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
263
|
+
if create_venv:
|
|
264
|
+
venvs.append(venv_path)
|
|
252
265
|
|
|
253
266
|
# Cleanup any build directories that are the product of package installation
|
|
254
267
|
expected_build_path = os.path.join(dir, "build")
|
|
@@ -292,15 +305,10 @@ def register_extractors(
|
|
|
292
305
|
):
|
|
293
306
|
package_name = os.path.basename(current_directory)
|
|
294
307
|
parent_directory = os.path.dirname(current_directory)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
# We'll need to create a link back to the original
|
|
301
|
-
if package_name not in sys.modules:
|
|
302
|
-
symlink = os.path.join(parent_directory, package_name)
|
|
303
|
-
os.symlink(current_directory, symlink)
|
|
308
|
+
if venvs and package_name in sys.modules:
|
|
309
|
+
# this may happen as part of testing if some part of the extractor code was directly imported
|
|
310
|
+
logger.warning(f"Looks like {package_name} is already loaded. "
|
|
311
|
+
"If your maco extractor overlaps an existing package name this could cause problems.")
|
|
304
312
|
|
|
305
313
|
try:
|
|
306
314
|
# Modify the PATH so we can recognize this new package on import
|
|
@@ -351,10 +359,6 @@ def register_extractors(
|
|
|
351
359
|
# Remove any modules that were loaded to deconflict with later modules loads
|
|
352
360
|
[sys.modules.pop(k) for k in set(sys.modules.keys()) - default_loaded_modules]
|
|
353
361
|
|
|
354
|
-
# Cleanup any symlinks
|
|
355
|
-
if symlink:
|
|
356
|
-
os.remove(symlink)
|
|
357
|
-
|
|
358
362
|
# If there still exists extractor files we haven't found yet, try searching in the available subdirectories
|
|
359
363
|
if extractor_files:
|
|
360
364
|
for dir in os.listdir(current_directory):
|
|
@@ -379,13 +383,21 @@ def register_extractors(
|
|
|
379
383
|
# We were able to find all the extractor files
|
|
380
384
|
break
|
|
381
385
|
|
|
386
|
+
def proxy_logging(queue: multiprocessing.Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
|
|
387
|
+
"""Ensures logging is set up correctly for a child process and then executes the callback."""
|
|
388
|
+
logger = logging.getLogger()
|
|
389
|
+
qh = logging.handlers.QueueHandler(queue)
|
|
390
|
+
qh.setLevel(logging.DEBUG)
|
|
391
|
+
logger.addHandler(qh)
|
|
392
|
+
callback(*args, **kwargs, logger=logger)
|
|
382
393
|
|
|
383
394
|
def import_extractors(
|
|
395
|
+
extractor_module_callback: Callable[[ModuleType, str], bool],
|
|
396
|
+
*,
|
|
384
397
|
root_directory: str,
|
|
385
398
|
scanner: yara.Rules,
|
|
386
|
-
|
|
399
|
+
create_venv: bool,
|
|
387
400
|
logger: Logger,
|
|
388
|
-
create_venv: bool = False,
|
|
389
401
|
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
390
402
|
):
|
|
391
403
|
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
@@ -393,23 +405,17 @@ def import_extractors(
|
|
|
393
405
|
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
394
406
|
logger.debug(extractor_files)
|
|
395
407
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
venvs = create_virtual_environments(extractor_dirs, python_version, logger)
|
|
399
|
-
else:
|
|
400
|
-
# Look for pre-existing virtual environments, if any
|
|
401
|
-
logger.info("Checking for pre-existing virtual environment(s)..")
|
|
402
|
-
venvs = [
|
|
403
|
-
os.path.join(root, VENV_DIRECTORY_NAME)
|
|
404
|
-
for root, dirs, _ in os.walk(root_directory)
|
|
405
|
-
if VENV_DIRECTORY_NAME in dirs
|
|
406
|
-
]
|
|
408
|
+
# Install packages into the current environment or dynamically created virtual environments
|
|
409
|
+
venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
|
|
407
410
|
|
|
408
411
|
# With the environment prepared, we can now hunt for the extractors and register them
|
|
409
412
|
logger.info("Registering extractors..")
|
|
410
413
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
411
414
|
|
|
412
415
|
|
|
416
|
+
# holds cached extractors when not running in venv mode
|
|
417
|
+
_loaded_extractors: Dict[str, Extractor] = {}
|
|
418
|
+
|
|
413
419
|
def run_extractor(
|
|
414
420
|
sample_path,
|
|
415
421
|
module_name,
|
|
@@ -418,49 +424,69 @@ def run_extractor(
|
|
|
418
424
|
venv,
|
|
419
425
|
venv_script=VENV_SCRIPT,
|
|
420
426
|
json_decoder=Base64Decoder,
|
|
421
|
-
) -> Dict[str, dict]:
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
427
|
+
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
428
|
+
"""Runs the maco extractor against sample either in current process or child process."""
|
|
429
|
+
if not venv:
|
|
430
|
+
key = f"{module_name}_{extractor_class}"
|
|
431
|
+
if key not in _loaded_extractors:
|
|
432
|
+
# dynamic import of extractor
|
|
433
|
+
mod = importlib.import_module(module_name)
|
|
434
|
+
extractor_cls = mod.__getattribute__(extractor_class)
|
|
435
|
+
extractor = extractor_cls()
|
|
436
|
+
else:
|
|
437
|
+
# retrieve cached extractor
|
|
438
|
+
extractor = _loaded_extractors[key]
|
|
439
|
+
if extractor.yara_compiled:
|
|
440
|
+
matches = extractor.yara_compiled.match(sample_path)
|
|
441
|
+
loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
|
|
442
|
+
else:
|
|
443
|
+
# execute extractor in child process with separate virtual environment
|
|
444
|
+
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
426
445
|
python_exe = os.path.join(venv, "bin", "python")
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
446
|
+
dirname = os.path.dirname(module_path)
|
|
447
|
+
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
448
|
+
with tempfile.NamedTemporaryFile() as output:
|
|
449
|
+
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
450
|
+
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
451
|
+
|
|
452
|
+
script.write(
|
|
453
|
+
venv_script.format(
|
|
454
|
+
parent_package_path=parent_package_path,
|
|
455
|
+
module_name=module_name,
|
|
456
|
+
module_class=extractor_class,
|
|
457
|
+
sample_path=sample_path,
|
|
458
|
+
output_path=output.name,
|
|
459
|
+
)
|
|
440
460
|
)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
461
|
+
script.flush()
|
|
462
|
+
cwd = root_directory
|
|
463
|
+
custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
|
|
464
|
+
|
|
465
|
+
if custom_module.startswith("src."):
|
|
466
|
+
# src layout found, which means the actual module content is within 'src' directory
|
|
467
|
+
custom_module = custom_module[4:]
|
|
468
|
+
cwd = os.path.join(cwd, "src")
|
|
469
|
+
|
|
470
|
+
# run the maco extractor in full venv process isolation (slow)
|
|
471
|
+
proc = subprocess.run(
|
|
472
|
+
[python_exe, "-m", custom_module],
|
|
473
|
+
cwd=cwd,
|
|
474
|
+
capture_output=True,
|
|
475
|
+
)
|
|
476
|
+
stderr = proc.stderr.decode()
|
|
477
|
+
try:
|
|
478
|
+
# Load results and return them
|
|
479
|
+
output.seek(0)
|
|
480
|
+
loaded = json.load(output, cls=json_decoder)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
# If there was an error raised during runtime, then propagate
|
|
483
|
+
delim = f'File "{module_path}"'
|
|
484
|
+
exception = stderr
|
|
485
|
+
if delim in exception:
|
|
486
|
+
exception = f"{delim}{exception.split(delim, 1)[1]}"
|
|
487
|
+
# print extractor logging at error level
|
|
488
|
+
logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
|
|
489
|
+
raise Exception(exception) from e
|
|
490
|
+
# ensure that extractor logging is available
|
|
491
|
+
logger.info(f"maco extractor stderr:\n{stderr}")
|
|
492
|
+
return loaded
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|