maco 1.2.4__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maco-1.2.4/maco.egg-info → maco-1.2.5}/PKG-INFO +1 -1
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/complex/complex.py +1 -2
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/limit_other.py +5 -2
- maco-1.2.5/demo_extractors/requirements.txt +1 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/shared.py +1 -0
- {maco-1.2.4/model_setup → maco-1.2.5}/maco/base_test.py +15 -8
- {maco-1.2.4/model_setup → maco-1.2.5}/maco/cli.py +1 -1
- {maco-1.2.4/model_setup → maco-1.2.5}/maco/extractor.py +4 -4
- {maco-1.2.4 → maco-1.2.5}/maco/utils.py +98 -96
- {maco-1.2.4 → maco-1.2.5/maco.egg-info}/PKG-INFO +1 -1
- {maco-1.2.4 → maco-1.2.5}/maco.egg-info/SOURCES.txt +2 -0
- {maco-1.2.4 → maco-1.2.5/model_setup}/maco/base_test.py +15 -8
- {maco-1.2.4 → maco-1.2.5/model_setup}/maco/cli.py +1 -1
- {maco-1.2.4 → maco-1.2.5/model_setup}/maco/extractor.py +4 -4
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/utils.py +98 -96
- maco-1.2.5/tests/benchmark.py +107 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_base_test.py +15 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_detection.py +5 -3
- {maco-1.2.4 → maco-1.2.5}/tox.ini +1 -1
- {maco-1.2.4 → maco-1.2.5}/.gitignore +0 -0
- {maco-1.2.4 → maco-1.2.5}/.vscode/settings.json +0 -0
- {maco-1.2.4 → maco-1.2.5}/LICENSE.md +0 -0
- {maco-1.2.4 → maco-1.2.5}/README.md +0 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/complex/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/complex/complex_utils.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/elfy.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/demo_extractors/nothing.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco/collector.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco/model/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco/model/model.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco/yara.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco.egg-info/dependency_links.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco.egg-info/entry_points.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco.egg-info/requires.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/maco.egg-info/top_level.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/LICENSE.md +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/README.md +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/collector.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/model/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/model/model.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/maco/yara.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/pyproject.toml +0 -0
- {maco-1.2.4 → maco-1.2.5}/model_setup/setup.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/pipelines/publish.yaml +0 -0
- {maco-1.2.4 → maco-1.2.5}/pipelines/test.yaml +0 -0
- {maco-1.2.4 → maco-1.2.5}/pyproject.toml +0 -0
- {maco-1.2.4 → maco-1.2.5}/requirements.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/setup.cfg +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/data/example.txt.cart +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/data/trigger_complex.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/data/trigger_complex.txt.cart +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/basic.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/basic_longer.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/bob/__init__.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/bob/bob.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/extractors/test_basic.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/pytest.ini +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/requirements.txt +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_cli.py +1 -1
- {maco-1.2.4 → maco-1.2.5}/tests/test_demo_extractors.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_extractor.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_helpers.py +0 -0
- {maco-1.2.4 → maco-1.2.5}/tests/test_model.py +0 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from maco import extractor, model, yara
|
|
5
|
-
|
|
6
4
|
from demo_extractors import shared
|
|
5
|
+
from maco import extractor, model, yara
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class LimitOther(extractor.Extractor):
|
|
@@ -24,6 +23,10 @@ class LimitOther(extractor.Extractor):
|
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
25
|
def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
|
|
26
|
+
# import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
|
|
27
|
+
# the tests that do direct importing
|
|
28
|
+
import httpx
|
|
29
|
+
|
|
27
30
|
# use a custom model that inherits from ExtractorModel
|
|
28
31
|
# this model defines what can go in the 'other' dict
|
|
29
32
|
tmp = shared.MyCustomModel(family="specify_other")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
httpx
|
|
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
|
|
|
32
32
|
# I recommend something like os.path.join(__file__, "../../extractors")
|
|
33
33
|
# if your extractors are in a folder 'extractors' next to a folder of tests
|
|
34
34
|
path: str = None
|
|
35
|
+
create_venv: bool=False
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
@classmethod
|
|
38
|
+
def setUpClass(cls) -> None:
|
|
39
|
+
if not cls.name or not cls.path:
|
|
38
40
|
raise Exception("name and path must be set")
|
|
39
|
-
|
|
41
|
+
cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
|
|
42
|
+
return super().setUpClass()
|
|
43
|
+
|
|
44
|
+
def test_default_metadata(self):
|
|
45
|
+
"""Require extractor to be loadable and valid."""
|
|
40
46
|
self.assertIn(self.name, self.c.extractors)
|
|
41
47
|
self.assertEqual(len(self.c.extractors), 1)
|
|
42
|
-
return super().setUp()
|
|
43
48
|
|
|
44
49
|
def extract(self, stream):
|
|
45
50
|
"""Return results for running extractor over stream, including yara check."""
|
|
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
|
|
|
49
54
|
resp = self.c.extract(stream, self.name)
|
|
50
55
|
return resp
|
|
51
56
|
|
|
52
|
-
|
|
57
|
+
@classmethod
|
|
58
|
+
def _get_location(cls) -> str:
|
|
53
59
|
"""Return path to child class that implements this class."""
|
|
54
60
|
# import child module
|
|
55
|
-
module =
|
|
61
|
+
module = cls.__module__
|
|
56
62
|
i = importlib.import_module(module)
|
|
57
63
|
# get location to child module
|
|
58
64
|
return i.__file__
|
|
59
65
|
|
|
60
|
-
|
|
66
|
+
@classmethod
|
|
67
|
+
def load_cart(cls, filepath: str) -> io.BytesIO:
|
|
61
68
|
"""Load and unneuter a test file (likely malware) into memory for processing."""
|
|
62
69
|
# it is nice if we can load files relative to whatever is implementing base_test
|
|
63
|
-
dirpath = os.path.split(
|
|
70
|
+
dirpath = os.path.split(cls._get_location())[0]
|
|
64
71
|
# either filepath is absolute, or should be loaded relative to child of base_test
|
|
65
72
|
filepath = os.path.join(dirpath, filepath)
|
|
66
73
|
if not os.path.isfile(filepath):
|
|
@@ -179,7 +179,7 @@ def main():
|
|
|
179
179
|
parser.add_argument(
|
|
180
180
|
"--create_venv",
|
|
181
181
|
action="store_true",
|
|
182
|
-
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
|
|
182
|
+
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
|
|
183
183
|
)
|
|
184
184
|
args = parser.parse_args()
|
|
185
185
|
inc = args.include.split(",") if args.include else []
|
|
@@ -51,14 +51,14 @@ class Extractor:
|
|
|
51
51
|
# check yara rules conform to expected structure
|
|
52
52
|
# we throw away these compiled rules as we need all rules in system compiled together
|
|
53
53
|
try:
|
|
54
|
-
|
|
54
|
+
self.yara_compiled = yara.compile(source=self.yara_rule)
|
|
55
55
|
except yara.SyntaxError as e:
|
|
56
56
|
raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
|
|
57
57
|
# need to track which plugin owns the rules
|
|
58
|
-
self.yara_rule_names = [x.identifier for x in
|
|
59
|
-
if not len(list(
|
|
58
|
+
self.yara_rule_names = [x.identifier for x in self.yara_compiled]
|
|
59
|
+
if not len(list(self.yara_compiled)):
|
|
60
60
|
raise InvalidExtractor(f"{name} must define at least one yara rule")
|
|
61
|
-
for x in
|
|
61
|
+
for x in self.yara_compiled:
|
|
62
62
|
if x.is_global:
|
|
63
63
|
raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
|
|
64
64
|
|
|
@@ -4,14 +4,14 @@ import importlib.machinery
|
|
|
4
4
|
import importlib.util
|
|
5
5
|
import inspect
|
|
6
6
|
import json
|
|
7
|
+
import logging
|
|
7
8
|
import logging.handlers
|
|
9
|
+
import multiprocessing
|
|
8
10
|
import os
|
|
9
11
|
import re
|
|
10
12
|
import shutil
|
|
11
13
|
import subprocess
|
|
12
14
|
import sys
|
|
13
|
-
import multiprocessing
|
|
14
|
-
import logging
|
|
15
15
|
import tempfile
|
|
16
16
|
|
|
17
17
|
from maco import yara
|
|
@@ -27,8 +27,11 @@ from glob import glob
|
|
|
27
27
|
from logging import Logger
|
|
28
28
|
from pkgutil import walk_packages
|
|
29
29
|
from types import ModuleType
|
|
30
|
-
from typing import Callable, Dict, List, Set, Tuple
|
|
30
|
+
from typing import Callable, Dict, List, Set, Tuple, Union
|
|
31
|
+
|
|
32
|
+
from uv import find_uv_bin
|
|
31
33
|
|
|
34
|
+
from maco import model
|
|
32
35
|
from maco.extractor import Extractor
|
|
33
36
|
|
|
34
37
|
logger = logging.getLogger("maco.lib.utils")
|
|
@@ -38,23 +41,10 @@ VENV_DIRECTORY_NAME = ".venv"
|
|
|
38
41
|
RELATIVE_FROM_RE = re.compile(r"from (\.+)")
|
|
39
42
|
RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
# Attempt to use the uv package manager (Recommended)
|
|
43
|
-
from uv import find_uv_bin
|
|
44
|
-
|
|
45
|
-
UV_BIN = find_uv_bin()
|
|
46
|
-
|
|
47
|
-
PIP_CMD = f"{UV_BIN} pip"
|
|
48
|
-
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
49
|
-
PACKAGE_MANAGER = "uv"
|
|
50
|
-
except ImportError:
|
|
51
|
-
# Otherwise default to pip
|
|
52
|
-
from sys import executable
|
|
53
|
-
|
|
54
|
-
PIP_CMD = "pip"
|
|
55
|
-
VENV_CREATE_CMD = f"{executable} -m venv"
|
|
56
|
-
PACKAGE_MANAGER = "pip"
|
|
44
|
+
UV_BIN = find_uv_bin()
|
|
57
45
|
|
|
46
|
+
PIP_CMD = f"{UV_BIN} pip"
|
|
47
|
+
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
58
48
|
|
|
59
49
|
class Base64Decoder(json.JSONDecoder):
|
|
60
50
|
def __init__(self, *args, **kwargs):
|
|
@@ -210,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
|
|
|
210
200
|
return extractor_dirs, extractor_files
|
|
211
201
|
|
|
212
202
|
|
|
213
|
-
def
|
|
203
|
+
def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
|
|
214
204
|
venvs = []
|
|
215
|
-
logger.info("Creating virtual environment(s)..")
|
|
216
205
|
env = deepcopy(os.environ)
|
|
217
206
|
stop_directory = os.path.dirname(sorted(directories)[0])
|
|
218
207
|
# Track directories that we've already visited
|
|
@@ -222,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
222
211
|
while dir != stop_directory and dir not in visited_dirs:
|
|
223
212
|
req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
|
|
224
213
|
if req_files:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
214
|
+
# create a virtual environment, otherwise directly install into current env
|
|
215
|
+
if create_venv:
|
|
216
|
+
venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
|
|
217
|
+
logger.info(f"Updating virtual environment {venv_path}")
|
|
218
|
+
env.update({"VIRTUAL_ENV": venv_path})
|
|
219
|
+
# Create a virtual environment for the directory
|
|
220
|
+
if not os.path.exists(venv_path):
|
|
221
|
+
cmd = f"{VENV_CREATE_CMD} --python {python_version}"
|
|
222
|
+
subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
|
|
233
223
|
|
|
234
224
|
# Install/Update the packages in the environment
|
|
235
225
|
install_command = PIP_CMD.split(" ") + ["install", "-U"]
|
|
@@ -253,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
253
243
|
|
|
254
244
|
install_command.extend(pyproject_command)
|
|
255
245
|
|
|
246
|
+
# always require maco to be installed
|
|
247
|
+
install_command.append("maco")
|
|
256
248
|
logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
|
|
249
|
+
# this uses VIRTUAL_ENV to control usage of a virtual environment
|
|
257
250
|
p = subprocess.run(
|
|
258
251
|
install_command,
|
|
259
252
|
cwd=dir,
|
|
@@ -264,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
264
257
|
if b"is being installed using the legacy" in p.stderr:
|
|
265
258
|
# Ignore these types of errors
|
|
266
259
|
continue
|
|
267
|
-
logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
|
|
260
|
+
logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
268
261
|
else:
|
|
269
|
-
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
270
|
-
|
|
262
|
+
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
263
|
+
if create_venv:
|
|
264
|
+
venvs.append(venv_path)
|
|
271
265
|
|
|
272
266
|
# Cleanup any build directories that are the product of package installation
|
|
273
267
|
expected_build_path = os.path.join(dir, "build")
|
|
@@ -311,7 +305,7 @@ def register_extractors(
|
|
|
311
305
|
):
|
|
312
306
|
package_name = os.path.basename(current_directory)
|
|
313
307
|
parent_directory = os.path.dirname(current_directory)
|
|
314
|
-
if package_name in sys.modules:
|
|
308
|
+
if venvs and package_name in sys.modules:
|
|
315
309
|
# this may happen as part of testing if some part of the extractor code was directly imported
|
|
316
310
|
logger.warning(f"Looks like {package_name} is already loaded. "
|
|
317
311
|
"If your maco extractor overlaps an existing package name this could cause problems.")
|
|
@@ -402,32 +396,26 @@ def import_extractors(
|
|
|
402
396
|
*,
|
|
403
397
|
root_directory: str,
|
|
404
398
|
scanner: yara.Rules,
|
|
405
|
-
create_venv: bool
|
|
406
|
-
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
399
|
+
create_venv: bool,
|
|
407
400
|
logger: Logger,
|
|
401
|
+
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
408
402
|
):
|
|
409
403
|
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
410
404
|
|
|
411
405
|
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
412
406
|
logger.debug(extractor_files)
|
|
413
407
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
venvs = create_virtual_environments(extractor_dirs, python_version, logger)
|
|
417
|
-
else:
|
|
418
|
-
# Look for pre-existing virtual environments, if any
|
|
419
|
-
logger.info("Checking for pre-existing virtual environment(s)..")
|
|
420
|
-
venvs = [
|
|
421
|
-
os.path.join(root, VENV_DIRECTORY_NAME)
|
|
422
|
-
for root, dirs, _ in os.walk(root_directory)
|
|
423
|
-
if VENV_DIRECTORY_NAME in dirs
|
|
424
|
-
]
|
|
408
|
+
# Install packages into the current environment or dynamically created virtual environments
|
|
409
|
+
venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
|
|
425
410
|
|
|
426
411
|
# With the environment prepared, we can now hunt for the extractors and register them
|
|
427
412
|
logger.info("Registering extractors..")
|
|
428
413
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
429
414
|
|
|
430
415
|
|
|
416
|
+
# holds cached extractors when not running in venv mode
|
|
417
|
+
_loaded_extractors: Dict[str, Extractor] = {}
|
|
418
|
+
|
|
431
419
|
def run_extractor(
|
|
432
420
|
sample_path,
|
|
433
421
|
module_name,
|
|
@@ -436,55 +424,69 @@ def run_extractor(
|
|
|
436
424
|
venv,
|
|
437
425
|
venv_script=VENV_SCRIPT,
|
|
438
426
|
json_decoder=Base64Decoder,
|
|
439
|
-
) -> Dict[str, dict]:
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
427
|
+
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
428
|
+
"""Runs the maco extractor against sample either in current process or child process."""
|
|
429
|
+
if not venv:
|
|
430
|
+
key = f"{module_name}_{extractor_class}"
|
|
431
|
+
if key not in _loaded_extractors:
|
|
432
|
+
# dynamic import of extractor
|
|
433
|
+
mod = importlib.import_module(module_name)
|
|
434
|
+
extractor_cls = mod.__getattribute__(extractor_class)
|
|
435
|
+
extractor = extractor_cls()
|
|
436
|
+
else:
|
|
437
|
+
# retrieve cached extractor
|
|
438
|
+
extractor = _loaded_extractors[key]
|
|
439
|
+
if extractor.yara_compiled:
|
|
440
|
+
matches = extractor.yara_compiled.match(sample_path)
|
|
441
|
+
loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
|
|
442
|
+
else:
|
|
443
|
+
# execute extractor in child process with separate virtual environment
|
|
444
|
+
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
444
445
|
python_exe = os.path.join(venv, "bin", "python")
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
446
|
+
dirname = os.path.dirname(module_path)
|
|
447
|
+
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
448
|
+
with tempfile.NamedTemporaryFile() as output:
|
|
449
|
+
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
450
|
+
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
451
|
+
|
|
452
|
+
script.write(
|
|
453
|
+
venv_script.format(
|
|
454
|
+
parent_package_path=parent_package_path,
|
|
455
|
+
module_name=module_name,
|
|
456
|
+
module_class=extractor_class,
|
|
457
|
+
sample_path=sample_path,
|
|
458
|
+
output_path=output.name,
|
|
459
|
+
)
|
|
458
460
|
)
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
461
|
+
script.flush()
|
|
462
|
+
cwd = root_directory
|
|
463
|
+
custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
|
|
464
|
+
|
|
465
|
+
if custom_module.startswith("src."):
|
|
466
|
+
# src layout found, which means the actual module content is within 'src' directory
|
|
467
|
+
custom_module = custom_module[4:]
|
|
468
|
+
cwd = os.path.join(cwd, "src")
|
|
469
|
+
|
|
470
|
+
# run the maco extractor in full venv process isolation (slow)
|
|
471
|
+
proc = subprocess.run(
|
|
472
|
+
[python_exe, "-m", custom_module],
|
|
473
|
+
cwd=cwd,
|
|
474
|
+
capture_output=True,
|
|
475
|
+
)
|
|
476
|
+
stderr = proc.stderr.decode()
|
|
477
|
+
try:
|
|
478
|
+
# Load results and return them
|
|
479
|
+
output.seek(0)
|
|
480
|
+
loaded = json.load(output, cls=json_decoder)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
# If there was an error raised during runtime, then propagate
|
|
483
|
+
delim = f'File "{module_path}"'
|
|
484
|
+
exception = stderr
|
|
485
|
+
if delim in exception:
|
|
486
|
+
exception = f"{delim}{exception.split(delim, 1)[1]}"
|
|
487
|
+
# print extractor logging at error level
|
|
488
|
+
logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
|
|
489
|
+
raise Exception(exception) from e
|
|
490
|
+
# ensure that extractor logging is available
|
|
491
|
+
logger.info(f"maco extractor stderr:\n{stderr}")
|
|
492
|
+
return loaded
|
|
@@ -9,6 +9,7 @@ demo_extractors/__init__.py
|
|
|
9
9
|
demo_extractors/elfy.py
|
|
10
10
|
demo_extractors/limit_other.py
|
|
11
11
|
demo_extractors/nothing.py
|
|
12
|
+
demo_extractors/requirements.txt
|
|
12
13
|
demo_extractors/shared.py
|
|
13
14
|
demo_extractors/complex/__init__.py
|
|
14
15
|
demo_extractors/complex/complex.py
|
|
@@ -44,6 +45,7 @@ model_setup/maco/model/__init__.py
|
|
|
44
45
|
model_setup/maco/model/model.py
|
|
45
46
|
pipelines/publish.yaml
|
|
46
47
|
pipelines/test.yaml
|
|
48
|
+
tests/benchmark.py
|
|
47
49
|
tests/pytest.ini
|
|
48
50
|
tests/requirements.txt
|
|
49
51
|
tests/test_base_test.py
|
|
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
|
|
|
32
32
|
# I recommend something like os.path.join(__file__, "../../extractors")
|
|
33
33
|
# if your extractors are in a folder 'extractors' next to a folder of tests
|
|
34
34
|
path: str = None
|
|
35
|
+
create_venv: bool=False
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
@classmethod
|
|
38
|
+
def setUpClass(cls) -> None:
|
|
39
|
+
if not cls.name or not cls.path:
|
|
38
40
|
raise Exception("name and path must be set")
|
|
39
|
-
|
|
41
|
+
cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
|
|
42
|
+
return super().setUpClass()
|
|
43
|
+
|
|
44
|
+
def test_default_metadata(self):
|
|
45
|
+
"""Require extractor to be loadable and valid."""
|
|
40
46
|
self.assertIn(self.name, self.c.extractors)
|
|
41
47
|
self.assertEqual(len(self.c.extractors), 1)
|
|
42
|
-
return super().setUp()
|
|
43
48
|
|
|
44
49
|
def extract(self, stream):
|
|
45
50
|
"""Return results for running extractor over stream, including yara check."""
|
|
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
|
|
|
49
54
|
resp = self.c.extract(stream, self.name)
|
|
50
55
|
return resp
|
|
51
56
|
|
|
52
|
-
|
|
57
|
+
@classmethod
|
|
58
|
+
def _get_location(cls) -> str:
|
|
53
59
|
"""Return path to child class that implements this class."""
|
|
54
60
|
# import child module
|
|
55
|
-
module =
|
|
61
|
+
module = cls.__module__
|
|
56
62
|
i = importlib.import_module(module)
|
|
57
63
|
# get location to child module
|
|
58
64
|
return i.__file__
|
|
59
65
|
|
|
60
|
-
|
|
66
|
+
@classmethod
|
|
67
|
+
def load_cart(cls, filepath: str) -> io.BytesIO:
|
|
61
68
|
"""Load and unneuter a test file (likely malware) into memory for processing."""
|
|
62
69
|
# it is nice if we can load files relative to whatever is implementing base_test
|
|
63
|
-
dirpath = os.path.split(
|
|
70
|
+
dirpath = os.path.split(cls._get_location())[0]
|
|
64
71
|
# either filepath is absolute, or should be loaded relative to child of base_test
|
|
65
72
|
filepath = os.path.join(dirpath, filepath)
|
|
66
73
|
if not os.path.isfile(filepath):
|
|
@@ -179,7 +179,7 @@ def main():
|
|
|
179
179
|
parser.add_argument(
|
|
180
180
|
"--create_venv",
|
|
181
181
|
action="store_true",
|
|
182
|
-
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
|
|
182
|
+
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
|
|
183
183
|
)
|
|
184
184
|
args = parser.parse_args()
|
|
185
185
|
inc = args.include.split(",") if args.include else []
|
|
@@ -51,14 +51,14 @@ class Extractor:
|
|
|
51
51
|
# check yara rules conform to expected structure
|
|
52
52
|
# we throw away these compiled rules as we need all rules in system compiled together
|
|
53
53
|
try:
|
|
54
|
-
|
|
54
|
+
self.yara_compiled = yara.compile(source=self.yara_rule)
|
|
55
55
|
except yara.SyntaxError as e:
|
|
56
56
|
raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
|
|
57
57
|
# need to track which plugin owns the rules
|
|
58
|
-
self.yara_rule_names = [x.identifier for x in
|
|
59
|
-
if not len(list(
|
|
58
|
+
self.yara_rule_names = [x.identifier for x in self.yara_compiled]
|
|
59
|
+
if not len(list(self.yara_compiled)):
|
|
60
60
|
raise InvalidExtractor(f"{name} must define at least one yara rule")
|
|
61
|
-
for x in
|
|
61
|
+
for x in self.yara_compiled:
|
|
62
62
|
if x.is_global:
|
|
63
63
|
raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
|
|
64
64
|
|
|
@@ -4,14 +4,14 @@ import importlib.machinery
|
|
|
4
4
|
import importlib.util
|
|
5
5
|
import inspect
|
|
6
6
|
import json
|
|
7
|
+
import logging
|
|
7
8
|
import logging.handlers
|
|
9
|
+
import multiprocessing
|
|
8
10
|
import os
|
|
9
11
|
import re
|
|
10
12
|
import shutil
|
|
11
13
|
import subprocess
|
|
12
14
|
import sys
|
|
13
|
-
import multiprocessing
|
|
14
|
-
import logging
|
|
15
15
|
import tempfile
|
|
16
16
|
|
|
17
17
|
from maco import yara
|
|
@@ -27,8 +27,11 @@ from glob import glob
|
|
|
27
27
|
from logging import Logger
|
|
28
28
|
from pkgutil import walk_packages
|
|
29
29
|
from types import ModuleType
|
|
30
|
-
from typing import Callable, Dict, List, Set, Tuple
|
|
30
|
+
from typing import Callable, Dict, List, Set, Tuple, Union
|
|
31
|
+
|
|
32
|
+
from uv import find_uv_bin
|
|
31
33
|
|
|
34
|
+
from maco import model
|
|
32
35
|
from maco.extractor import Extractor
|
|
33
36
|
|
|
34
37
|
logger = logging.getLogger("maco.lib.utils")
|
|
@@ -38,23 +41,10 @@ VENV_DIRECTORY_NAME = ".venv"
|
|
|
38
41
|
RELATIVE_FROM_RE = re.compile(r"from (\.+)")
|
|
39
42
|
RELATIVE_FROM_IMPORT_RE = re.compile(r"from (\.+) import")
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
# Attempt to use the uv package manager (Recommended)
|
|
43
|
-
from uv import find_uv_bin
|
|
44
|
-
|
|
45
|
-
UV_BIN = find_uv_bin()
|
|
46
|
-
|
|
47
|
-
PIP_CMD = f"{UV_BIN} pip"
|
|
48
|
-
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
49
|
-
PACKAGE_MANAGER = "uv"
|
|
50
|
-
except ImportError:
|
|
51
|
-
# Otherwise default to pip
|
|
52
|
-
from sys import executable
|
|
53
|
-
|
|
54
|
-
PIP_CMD = "pip"
|
|
55
|
-
VENV_CREATE_CMD = f"{executable} -m venv"
|
|
56
|
-
PACKAGE_MANAGER = "pip"
|
|
44
|
+
UV_BIN = find_uv_bin()
|
|
57
45
|
|
|
46
|
+
PIP_CMD = f"{UV_BIN} pip"
|
|
47
|
+
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
58
48
|
|
|
59
49
|
class Base64Decoder(json.JSONDecoder):
|
|
60
50
|
def __init__(self, *args, **kwargs):
|
|
@@ -210,9 +200,8 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
|
|
|
210
200
|
return extractor_dirs, extractor_files
|
|
211
201
|
|
|
212
202
|
|
|
213
|
-
def
|
|
203
|
+
def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
|
|
214
204
|
venvs = []
|
|
215
|
-
logger.info("Creating virtual environment(s)..")
|
|
216
205
|
env = deepcopy(os.environ)
|
|
217
206
|
stop_directory = os.path.dirname(sorted(directories)[0])
|
|
218
207
|
# Track directories that we've already visited
|
|
@@ -222,14 +211,15 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
222
211
|
while dir != stop_directory and dir not in visited_dirs:
|
|
223
212
|
req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
|
|
224
213
|
if req_files:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
214
|
+
# create a virtual environment, otherwise directly install into current env
|
|
215
|
+
if create_venv:
|
|
216
|
+
venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
|
|
217
|
+
logger.info(f"Updating virtual environment {venv_path}")
|
|
218
|
+
env.update({"VIRTUAL_ENV": venv_path})
|
|
219
|
+
# Create a virtual environment for the directory
|
|
220
|
+
if not os.path.exists(venv_path):
|
|
221
|
+
cmd = f"{VENV_CREATE_CMD} --python {python_version}"
|
|
222
|
+
subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
|
|
233
223
|
|
|
234
224
|
# Install/Update the packages in the environment
|
|
235
225
|
install_command = PIP_CMD.split(" ") + ["install", "-U"]
|
|
@@ -253,7 +243,10 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
253
243
|
|
|
254
244
|
install_command.extend(pyproject_command)
|
|
255
245
|
|
|
246
|
+
# always require maco to be installed
|
|
247
|
+
install_command.append("maco")
|
|
256
248
|
logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
|
|
249
|
+
# this uses VIRTUAL_ENV to control usage of a virtual environment
|
|
257
250
|
p = subprocess.run(
|
|
258
251
|
install_command,
|
|
259
252
|
cwd=dir,
|
|
@@ -264,10 +257,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
264
257
|
if b"is being installed using the legacy" in p.stderr:
|
|
265
258
|
# Ignore these types of errors
|
|
266
259
|
continue
|
|
267
|
-
logger.error(f"Error installing into venv:\n{p.stderr.decode()}")
|
|
260
|
+
logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
268
261
|
else:
|
|
269
|
-
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
270
|
-
|
|
262
|
+
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
263
|
+
if create_venv:
|
|
264
|
+
venvs.append(venv_path)
|
|
271
265
|
|
|
272
266
|
# Cleanup any build directories that are the product of package installation
|
|
273
267
|
expected_build_path = os.path.join(dir, "build")
|
|
@@ -311,7 +305,7 @@ def register_extractors(
|
|
|
311
305
|
):
|
|
312
306
|
package_name = os.path.basename(current_directory)
|
|
313
307
|
parent_directory = os.path.dirname(current_directory)
|
|
314
|
-
if package_name in sys.modules:
|
|
308
|
+
if venvs and package_name in sys.modules:
|
|
315
309
|
# this may happen as part of testing if some part of the extractor code was directly imported
|
|
316
310
|
logger.warning(f"Looks like {package_name} is already loaded. "
|
|
317
311
|
"If your maco extractor overlaps an existing package name this could cause problems.")
|
|
@@ -402,32 +396,26 @@ def import_extractors(
|
|
|
402
396
|
*,
|
|
403
397
|
root_directory: str,
|
|
404
398
|
scanner: yara.Rules,
|
|
405
|
-
create_venv: bool
|
|
406
|
-
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
399
|
+
create_venv: bool,
|
|
407
400
|
logger: Logger,
|
|
401
|
+
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
408
402
|
):
|
|
409
403
|
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
410
404
|
|
|
411
405
|
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
412
406
|
logger.debug(extractor_files)
|
|
413
407
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
venvs = create_virtual_environments(extractor_dirs, python_version, logger)
|
|
417
|
-
else:
|
|
418
|
-
# Look for pre-existing virtual environments, if any
|
|
419
|
-
logger.info("Checking for pre-existing virtual environment(s)..")
|
|
420
|
-
venvs = [
|
|
421
|
-
os.path.join(root, VENV_DIRECTORY_NAME)
|
|
422
|
-
for root, dirs, _ in os.walk(root_directory)
|
|
423
|
-
if VENV_DIRECTORY_NAME in dirs
|
|
424
|
-
]
|
|
408
|
+
# Install packages into the current environment or dynamically created virtual environments
|
|
409
|
+
venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
|
|
425
410
|
|
|
426
411
|
# With the environment prepared, we can now hunt for the extractors and register them
|
|
427
412
|
logger.info("Registering extractors..")
|
|
428
413
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
429
414
|
|
|
430
415
|
|
|
416
|
+
# holds cached extractors when not running in venv mode
|
|
417
|
+
_loaded_extractors: Dict[str, Extractor] = {}
|
|
418
|
+
|
|
431
419
|
def run_extractor(
|
|
432
420
|
sample_path,
|
|
433
421
|
module_name,
|
|
@@ -436,55 +424,69 @@ def run_extractor(
|
|
|
436
424
|
venv,
|
|
437
425
|
venv_script=VENV_SCRIPT,
|
|
438
426
|
json_decoder=Base64Decoder,
|
|
439
|
-
) -> Dict[str, dict]:
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
427
|
+
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
428
|
+
"""Runs the maco extractor against sample either in current process or child process."""
|
|
429
|
+
if not venv:
|
|
430
|
+
key = f"{module_name}_{extractor_class}"
|
|
431
|
+
if key not in _loaded_extractors:
|
|
432
|
+
# dynamic import of extractor
|
|
433
|
+
mod = importlib.import_module(module_name)
|
|
434
|
+
extractor_cls = mod.__getattribute__(extractor_class)
|
|
435
|
+
extractor = extractor_cls()
|
|
436
|
+
else:
|
|
437
|
+
# retrieve cached extractor
|
|
438
|
+
extractor = _loaded_extractors[key]
|
|
439
|
+
if extractor.yara_compiled:
|
|
440
|
+
matches = extractor.yara_compiled.match(sample_path)
|
|
441
|
+
loaded = extractor.run(open(sample_path, 'rb'), matches=matches)
|
|
442
|
+
else:
|
|
443
|
+
# execute extractor in child process with separate virtual environment
|
|
444
|
+
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
444
445
|
python_exe = os.path.join(venv, "bin", "python")
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
446
|
+
dirname = os.path.dirname(module_path)
|
|
447
|
+
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
448
|
+
with tempfile.NamedTemporaryFile() as output:
|
|
449
|
+
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
450
|
+
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
451
|
+
|
|
452
|
+
script.write(
|
|
453
|
+
venv_script.format(
|
|
454
|
+
parent_package_path=parent_package_path,
|
|
455
|
+
module_name=module_name,
|
|
456
|
+
module_class=extractor_class,
|
|
457
|
+
sample_path=sample_path,
|
|
458
|
+
output_path=output.name,
|
|
459
|
+
)
|
|
458
460
|
)
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
461
|
+
script.flush()
|
|
462
|
+
cwd = root_directory
|
|
463
|
+
custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
|
|
464
|
+
|
|
465
|
+
if custom_module.startswith("src."):
|
|
466
|
+
# src layout found, which means the actual module content is within 'src' directory
|
|
467
|
+
custom_module = custom_module[4:]
|
|
468
|
+
cwd = os.path.join(cwd, "src")
|
|
469
|
+
|
|
470
|
+
# run the maco extractor in full venv process isolation (slow)
|
|
471
|
+
proc = subprocess.run(
|
|
472
|
+
[python_exe, "-m", custom_module],
|
|
473
|
+
cwd=cwd,
|
|
474
|
+
capture_output=True,
|
|
475
|
+
)
|
|
476
|
+
stderr = proc.stderr.decode()
|
|
477
|
+
try:
|
|
478
|
+
# Load results and return them
|
|
479
|
+
output.seek(0)
|
|
480
|
+
loaded = json.load(output, cls=json_decoder)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
# If there was an error raised during runtime, then propagate
|
|
483
|
+
delim = f'File "{module_path}"'
|
|
484
|
+
exception = stderr
|
|
485
|
+
if delim in exception:
|
|
486
|
+
exception = f"{delim}{exception.split(delim, 1)[1]}"
|
|
487
|
+
# print extractor logging at error level
|
|
488
|
+
logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
|
|
489
|
+
raise Exception(exception) from e
|
|
490
|
+
# ensure that extractor logging is available
|
|
491
|
+
logger.info(f"maco extractor stderr:\n{stderr}")
|
|
492
|
+
return loaded
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import timeit
|
|
3
|
+
|
|
4
|
+
from demo_extractors.complex import complex
|
|
5
|
+
from maco import base_test
|
|
6
|
+
|
|
7
|
+
# instance of extractor for synthetic comparison to maco
|
|
8
|
+
instance = complex.Complex()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LocalBaseTest(base_test.BaseTest):
|
|
12
|
+
name = "Complex"
|
|
13
|
+
path = os.path.join(__file__, "../../demo_extractors")
|
|
14
|
+
create_venv = False
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def setUpClass(cls) -> None:
|
|
18
|
+
super().setUpClass()
|
|
19
|
+
cls.input_file = cls.load_cart("data/trigger_complex.txt.cart")
|
|
20
|
+
cls.input_file.seek(0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestComplexSynthetic(LocalBaseTest):
|
|
24
|
+
"""Test extractors work bypassing maco."""
|
|
25
|
+
|
|
26
|
+
def test_extract(self):
|
|
27
|
+
self.input_file.seek(0)
|
|
28
|
+
raw = self.input_file.read()
|
|
29
|
+
self.input_file.seek(0)
|
|
30
|
+
# run yara rules against sample
|
|
31
|
+
matches = instance.yara_compiled.match(data=raw)
|
|
32
|
+
self.assertEqual(len(matches), 2)
|
|
33
|
+
result = instance.run(self.input_file, [])
|
|
34
|
+
self.assertEqual(result.family, "complex")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TestComplexNoVenv(LocalBaseTest):
|
|
38
|
+
"""Test extractors work without full venv isolation."""
|
|
39
|
+
|
|
40
|
+
def test_extract(self):
|
|
41
|
+
self.input_file.seek(0)
|
|
42
|
+
ret = self.extract(self.input_file)
|
|
43
|
+
self.assertEqual(ret["family"], "complex")
|
|
44
|
+
self.assertEqual(ret["version"], "5")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TestComplexVenv(LocalBaseTest):
|
|
48
|
+
"""Test extractors work when run with virtual environments."""
|
|
49
|
+
|
|
50
|
+
create_venv = True
|
|
51
|
+
|
|
52
|
+
def test_extract(self):
|
|
53
|
+
self.input_file.seek(0)
|
|
54
|
+
ret = self.extract(self.input_file)
|
|
55
|
+
self.assertEqual(ret["family"], "complex")
|
|
56
|
+
self.assertEqual(ret["version"], "5")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def make_synthetic():
|
|
60
|
+
TestComplexSynthetic.setUpClass()
|
|
61
|
+
tc = TestComplexSynthetic()
|
|
62
|
+
tc.setUp()
|
|
63
|
+
return tc
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def make_no_venv():
|
|
67
|
+
TestComplexNoVenv.setUpClass()
|
|
68
|
+
tc = TestComplexNoVenv()
|
|
69
|
+
tc.setUp()
|
|
70
|
+
return tc
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def make_venv():
|
|
74
|
+
TestComplexVenv.setUpClass()
|
|
75
|
+
tc = TestComplexVenv()
|
|
76
|
+
tc.setUp()
|
|
77
|
+
return tc
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
trials = 1000
|
|
82
|
+
print(f"num trials: {trials}")
|
|
83
|
+
print("results are number of seconds to execute total number of trials")
|
|
84
|
+
print("synthetic comparison (directly import and execute extractor)")
|
|
85
|
+
print(
|
|
86
|
+
timeit.timeit(
|
|
87
|
+
"tc.test_extract()",
|
|
88
|
+
setup="from __main__ import make_synthetic; tc=make_synthetic()",
|
|
89
|
+
number=trials,
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
print("maco no venv isolation")
|
|
93
|
+
print(
|
|
94
|
+
timeit.timeit(
|
|
95
|
+
"tc.test_extract()",
|
|
96
|
+
setup="from __main__ import make_no_venv; tc=make_no_venv()",
|
|
97
|
+
number=trials,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
print("maco venv isolation")
|
|
101
|
+
print(
|
|
102
|
+
timeit.timeit(
|
|
103
|
+
"tc.test_extract()",
|
|
104
|
+
setup="from __main__ import make_venv; tc=make_venv()",
|
|
105
|
+
number=trials,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
@@ -19,9 +19,12 @@ class TestLimitOther(base_test.BaseTest):
|
|
|
19
19
|
self.assertEqual(ret["family"], "specify_other")
|
|
20
20
|
self.assertEqual(ret["campaign_id"], ["12345"])
|
|
21
21
|
|
|
22
|
+
|
|
22
23
|
class TestComplex(base_test.BaseTest):
|
|
24
|
+
"""Test that complex extractor can be used in base environment."""
|
|
23
25
|
name = "Complex"
|
|
24
26
|
path = os.path.join(__file__, "../../demo_extractors")
|
|
27
|
+
create_venv = False
|
|
25
28
|
|
|
26
29
|
def test_extract(self):
|
|
27
30
|
"""Tests that we can run an extractor through maco."""
|
|
@@ -43,3 +46,15 @@ class TestComplex(base_test.BaseTest):
|
|
|
43
46
|
data = io.BytesIO(b"my malwarez")
|
|
44
47
|
result = instance.run(data, [])
|
|
45
48
|
self.assertEqual(result.family, "complex")
|
|
49
|
+
|
|
50
|
+
class TestComplexVenv(base_test.BaseTest):
|
|
51
|
+
"""Test that complex extractor can be used in full venv isolation."""
|
|
52
|
+
name = "Complex"
|
|
53
|
+
path = os.path.join(__file__, "../../demo_extractors")
|
|
54
|
+
create_venv = True
|
|
55
|
+
|
|
56
|
+
def test_extract(self):
|
|
57
|
+
"""Tests that we can run an extractor through maco."""
|
|
58
|
+
ret = self.extract(self.load_cart("data/trigger_complex.txt.cart"))
|
|
59
|
+
self.assertEqual(ret["family"], "complex")
|
|
60
|
+
self.assertEqual(ret["version"], "5")
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import pytest
|
|
3
2
|
import sys
|
|
4
3
|
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
5
6
|
from maco.collector import Collector
|
|
6
7
|
|
|
7
8
|
INIT_MODULES = list(sys.modules.keys())
|
|
@@ -105,9 +106,9 @@ def test_public_projects(repository_url: str, extractors: list, python_minor: in
|
|
|
105
106
|
# Ensure that any changes we make doesn't break usage of public projects
|
|
106
107
|
# which can affect downstream systems using like library (ie. Assemblyline)
|
|
107
108
|
import sys
|
|
109
|
+
from tempfile import TemporaryDirectory
|
|
108
110
|
|
|
109
111
|
from git import Repo
|
|
110
|
-
from tempfile import TemporaryDirectory
|
|
111
112
|
|
|
112
113
|
if sys.version_info >= (3, python_minor):
|
|
113
114
|
with TemporaryDirectory() as working_dir:
|
|
@@ -123,8 +124,9 @@ def test_public_projects(repository_url: str, extractors: list, python_minor: in
|
|
|
123
124
|
|
|
124
125
|
|
|
125
126
|
def test_module_confusion():
|
|
126
|
-
from tempfile import TemporaryDirectory
|
|
127
127
|
import shutil
|
|
128
|
+
from tempfile import TemporaryDirectory
|
|
129
|
+
|
|
128
130
|
import git
|
|
129
131
|
|
|
130
132
|
# Directories that have the same name as the Python module, shouldn't cause confusion on loading the right module
|
|
@@ -7,4 +7,4 @@ deps =
|
|
|
7
7
|
-r requirements.txt
|
|
8
8
|
-r tests/requirements.txt
|
|
9
9
|
# run the tests
|
|
10
|
-
commands = python -m pytest -p no:cacheprovider --durations=10 -ra -q -k "not git and not extractors" -vv -W ignore::DeprecationWarning
|
|
10
|
+
commands = python -m pytest tests/ -p no:cacheprovider --durations=10 -ra -q -k "not git and not extractors" -vv -W ignore::DeprecationWarning
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|