maco 1.2.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maco-1.2.0/maco.egg-info → maco-1.2.1}/PKG-INFO +2 -3
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/complex.py +3 -3
- maco-1.2.1/maco/collector.py +137 -0
- {maco-1.2.0 → maco-1.2.1}/maco/utils.py +21 -8
- {maco-1.2.0/model_setup → maco-1.2.1}/maco/yara.py +4 -4
- {maco-1.2.0 → maco-1.2.1/maco.egg-info}/PKG-INFO +2 -3
- {maco-1.2.0 → maco-1.2.1}/maco.egg-info/SOURCES.txt +2 -1
- {maco-1.2.0 → maco-1.2.1}/maco.egg-info/requires.txt +1 -1
- maco-1.2.1/model_setup/maco/collector.py +137 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/utils.py +21 -8
- {maco-1.2.0 → maco-1.2.1/model_setup}/maco/yara.py +4 -4
- {maco-1.2.0 → maco-1.2.1}/pipelines/publish.yaml +7 -7
- maco-1.2.1/pipelines/test.yaml +45 -0
- {maco-1.2.0 → maco-1.2.1}/pyproject.toml +5 -10
- {maco-1.2.0 → maco-1.2.1}/requirements.txt +1 -1
- maco-1.2.1/tests/data/trigger_complex.txt +6 -0
- maco-1.2.1/tests/test_demo_extractors.py +60 -0
- maco-1.2.1/tox.ini +10 -0
- maco-1.2.0/maco/collector.py +0 -131
- maco-1.2.0/model_setup/maco/collector.py +0 -131
- maco-1.2.0/pipelines/test.yaml +0 -41
- maco-1.2.0/setup.py +0 -25
- maco-1.2.0/tox.ini +0 -10
- {maco-1.2.0 → maco-1.2.1}/.gitignore +0 -0
- {maco-1.2.0 → maco-1.2.1}/.vscode/settings.json +0 -0
- {maco-1.2.0 → maco-1.2.1}/LICENSE.md +0 -0
- {maco-1.2.0 → maco-1.2.1}/README.md +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/complex_utils.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/elfy.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/limit_other.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/nothing.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/demo_extractors/shared.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/base_test.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/cli.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/extractor.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/model/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco/model/model.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco.egg-info/dependency_links.txt +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco.egg-info/entry_points.txt +0 -0
- {maco-1.2.0 → maco-1.2.1}/maco.egg-info/top_level.txt +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/LICENSE.md +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/README.md +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/base_test.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/cli.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/extractor.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/model/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/maco/model/model.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/pyproject.toml +0 -0
- {maco-1.2.0 → maco-1.2.1}/model_setup/setup.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/setup.cfg +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/data/example.txt.cart +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/basic.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/basic_longer.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/bob/__init__.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/bob/bob.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/extractors/test_basic.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/pytest.ini +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/requirements.txt +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/test_base_test.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/test_detection.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/test_extractor.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/test_helpers.py +0 -0
- {maco-1.2.0 → maco-1.2.1}/tests/test_model.py +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maco
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Author: sl-govau
|
|
5
|
-
Author-email:
|
|
6
5
|
Maintainer: cccs-rs
|
|
7
6
|
License: MIT License
|
|
8
7
|
|
|
@@ -35,7 +34,7 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
35
34
|
Requires-Dist: tomli>=1.1.0; python_version < "3.11"
|
|
36
35
|
Requires-Dist: uv
|
|
37
36
|
Requires-Dist: yara-python
|
|
38
|
-
Requires-Dist: yara-x==0.
|
|
37
|
+
Requires-Dist: yara-x==0.11.0
|
|
39
38
|
|
|
40
39
|
# Maco - Malware config extractor framework
|
|
41
40
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
from maco import extractor, model, yara
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from complex import complex_utils
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Complex(extractor.Extractor):
|
|
@@ -50,7 +50,7 @@ class Complex(extractor.Extractor):
|
|
|
50
50
|
other = complex_utils.getdata()["result"]
|
|
51
51
|
self.logger.debug("got data from lib")
|
|
52
52
|
# example - accessing yara strings
|
|
53
|
-
strings = {
|
|
53
|
+
strings = sorted({z.plaintext().decode("utf8") for x in matches for y in x.strings for z in y.instances})
|
|
54
54
|
self.logger.debug(f"{strings=}")
|
|
55
55
|
# construct model of results
|
|
56
56
|
tmp = model.ExtractorModel(family=self.family)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Convenience functions for discovering your extractors."""
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from multiprocessing import Manager, Process
|
|
7
|
+
from tempfile import NamedTemporaryFile
|
|
8
|
+
from types import ModuleType
|
|
9
|
+
from typing import Any, BinaryIO, Dict, List, Union
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from maco import extractor, model, utils, yara
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExtractorLoadError(Exception):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("maco.lib.helpers")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
|
|
24
|
+
"""Enforce types and verify properties, and remove defaults."""
|
|
25
|
+
if not resp:
|
|
26
|
+
return None
|
|
27
|
+
# check the response is valid for its own model
|
|
28
|
+
# this is useful if a restriction on the 'other' dictionary is needed
|
|
29
|
+
resp_model = type(resp)
|
|
30
|
+
if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
|
|
31
|
+
resp = resp_model.model_validate(resp)
|
|
32
|
+
# check the response is valid according to the ExtractorModel
|
|
33
|
+
resp = model.ExtractorModel.model_validate(resp)
|
|
34
|
+
# coerce sets to correct types
|
|
35
|
+
# otherwise we end up with sets where we expect lists
|
|
36
|
+
resp = model.ExtractorModel(**resp.model_dump())
|
|
37
|
+
# dump model to dict
|
|
38
|
+
return resp.model_dump(exclude_defaults=True)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Collector:
|
|
42
|
+
def __init__(
|
|
43
|
+
self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
|
|
44
|
+
):
|
|
45
|
+
"""Discover and load extractors from file system."""
|
|
46
|
+
path_extractors = os.path.realpath(path_extractors)
|
|
47
|
+
self.path: str = path_extractors
|
|
48
|
+
self.extractors: Dict[str, Dict[str, str]] = {}
|
|
49
|
+
|
|
50
|
+
with Manager() as manager:
|
|
51
|
+
extractors = manager.dict()
|
|
52
|
+
namespaced_rules = manager.dict()
|
|
53
|
+
|
|
54
|
+
def extractor_module_callback(module: ModuleType, venv: str):
|
|
55
|
+
members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
|
|
56
|
+
for member in members:
|
|
57
|
+
name, member = member
|
|
58
|
+
if exclude and name in exclude:
|
|
59
|
+
# Module is part of the exclusion list, skip
|
|
60
|
+
logger.debug(f"exclude excluded '{name}'")
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if include and name not in include:
|
|
64
|
+
# Module wasn't part of the inclusion list, skip
|
|
65
|
+
logger.debug(f"include excluded '{name}'")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# initialise and register
|
|
69
|
+
logger.debug(f"register '{name}'")
|
|
70
|
+
extractors[name] = dict(
|
|
71
|
+
venv=venv,
|
|
72
|
+
module_path=module.__file__,
|
|
73
|
+
module_name=member.__module__,
|
|
74
|
+
extractor_class=member.__name__,
|
|
75
|
+
)
|
|
76
|
+
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
77
|
+
|
|
78
|
+
# Find the extractors within the given directory
|
|
79
|
+
# Execute within a child process to ensure main process interpreter is kept clean
|
|
80
|
+
p = Process(
|
|
81
|
+
target=utils.import_extractors,
|
|
82
|
+
args=(
|
|
83
|
+
path_extractors,
|
|
84
|
+
yara.compile(source=utils.MACO_YARA_RULE),
|
|
85
|
+
extractor_module_callback,
|
|
86
|
+
logger,
|
|
87
|
+
create_venv and os.path.isdir(path_extractors),
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
p.start()
|
|
91
|
+
p.join()
|
|
92
|
+
|
|
93
|
+
self.extractors = dict(extractors)
|
|
94
|
+
if not self.extractors:
|
|
95
|
+
raise ExtractorLoadError("no extractors were loaded")
|
|
96
|
+
logger.debug(f"found extractors {list(self.extractors.keys())}\n")
|
|
97
|
+
|
|
98
|
+
# compile yara rules gathered from extractors
|
|
99
|
+
self.rules = yara.compile(sources=dict(namespaced_rules))
|
|
100
|
+
|
|
101
|
+
def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
|
|
102
|
+
"""Return extractors that should run based on yara rules."""
|
|
103
|
+
# execute yara rules on file to find extractors we should run
|
|
104
|
+
# yara can't run on a stream so we give it a bytestring
|
|
105
|
+
matches = self.rules.match(data=stream.read())
|
|
106
|
+
stream.seek(0)
|
|
107
|
+
if not matches:
|
|
108
|
+
return
|
|
109
|
+
# get all rules that hit for each extractor
|
|
110
|
+
runs = {}
|
|
111
|
+
for match in matches:
|
|
112
|
+
runs.setdefault(match.namespace, []).append(match)
|
|
113
|
+
|
|
114
|
+
return runs
|
|
115
|
+
|
|
116
|
+
def extract(
|
|
117
|
+
self,
|
|
118
|
+
stream: BinaryIO,
|
|
119
|
+
matches: List[yara.Match],
|
|
120
|
+
extractor_name: str,
|
|
121
|
+
) -> Dict[str, Any]:
|
|
122
|
+
"""Run extractor with stream and verify output matches the model."""
|
|
123
|
+
extractor = self.extractors[extractor_name]
|
|
124
|
+
try:
|
|
125
|
+
# Run extractor on a copy of the sample
|
|
126
|
+
with NamedTemporaryFile() as sample_path:
|
|
127
|
+
sample_path.write(stream.read())
|
|
128
|
+
sample_path.flush()
|
|
129
|
+
# enforce types and verify properties, and remove defaults
|
|
130
|
+
return _verify_response(utils.run_extractor(sample_path.name, **extractor))
|
|
131
|
+
except Exception:
|
|
132
|
+
# caller can deal with the exception
|
|
133
|
+
raise
|
|
134
|
+
finally:
|
|
135
|
+
# make sure to reset where we are in the file
|
|
136
|
+
# otherwise follow on extractors are going to read 0 bytes
|
|
137
|
+
stream.seek(0)
|
|
@@ -6,9 +6,11 @@ import inspect
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
import re
|
|
9
|
+
import shutil
|
|
9
10
|
import subprocess
|
|
10
11
|
import sys
|
|
11
12
|
import tempfile
|
|
13
|
+
|
|
12
14
|
from maco import yara
|
|
13
15
|
|
|
14
16
|
if sys.version_info >= (3, 11):
|
|
@@ -21,8 +23,8 @@ from copy import deepcopy
|
|
|
21
23
|
from glob import glob
|
|
22
24
|
from logging import Logger
|
|
23
25
|
from pkgutil import walk_packages
|
|
24
|
-
from typing import Callable, Dict, Tuple, List, Set
|
|
25
26
|
from types import ModuleType
|
|
27
|
+
from typing import Callable, Dict, List, Set, Tuple
|
|
26
28
|
|
|
27
29
|
from maco.extractor import Extractor
|
|
28
30
|
|
|
@@ -67,7 +69,11 @@ import importlib
|
|
|
67
69
|
import json
|
|
68
70
|
import os
|
|
69
71
|
import sys
|
|
70
|
-
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
from maco import yara
|
|
75
|
+
except:
|
|
76
|
+
import yara
|
|
71
77
|
|
|
72
78
|
from base64 import b64encode
|
|
73
79
|
parent_package_path = "{parent_package_path}"
|
|
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
244
250
|
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
245
251
|
venvs.append(venv_path)
|
|
246
252
|
|
|
253
|
+
# Cleanup any build directories that are the product of package installation
|
|
254
|
+
expected_build_path = os.path.join(dir, "build")
|
|
255
|
+
if os.path.exists(expected_build_path):
|
|
256
|
+
shutil.rmtree(expected_build_path)
|
|
257
|
+
|
|
247
258
|
# Add directories to our visited list and check the parent of this directory on the next loop
|
|
248
259
|
visited_dirs.append(dir)
|
|
249
260
|
dir = os.path.dirname(dir)
|
|
@@ -399,21 +410,23 @@ def import_extractors(
|
|
|
399
410
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
400
411
|
|
|
401
412
|
|
|
402
|
-
def
|
|
413
|
+
def run_extractor(
|
|
403
414
|
sample_path,
|
|
404
|
-
|
|
415
|
+
module_name,
|
|
416
|
+
extractor_class,
|
|
405
417
|
module_path,
|
|
406
418
|
venv,
|
|
407
419
|
venv_script=VENV_SCRIPT,
|
|
408
420
|
json_decoder=Base64Decoder,
|
|
409
421
|
) -> Dict[str, dict]:
|
|
410
422
|
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
411
|
-
python_exe =
|
|
423
|
+
python_exe = sys.executable
|
|
424
|
+
if venv:
|
|
425
|
+
# If there is a linked virtual environment, execute within that environment
|
|
426
|
+
python_exe = os.path.join(venv, "bin", "python")
|
|
412
427
|
dirname = os.path.dirname(module_path)
|
|
413
428
|
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
414
429
|
with tempfile.NamedTemporaryFile() as output:
|
|
415
|
-
module_name = module.__module__
|
|
416
|
-
module_class = module.__name__
|
|
417
430
|
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
418
431
|
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
419
432
|
|
|
@@ -421,7 +434,7 @@ def run_in_venv(
|
|
|
421
434
|
venv_script.format(
|
|
422
435
|
parent_package_path=parent_package_path,
|
|
423
436
|
module_name=module_name,
|
|
424
|
-
module_class=
|
|
437
|
+
module_class=extractor_class,
|
|
425
438
|
sample_path=sample_path,
|
|
426
439
|
output_path=output.name,
|
|
427
440
|
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import yara
|
|
3
|
-
import yara_x
|
|
4
|
-
|
|
5
2
|
from collections import namedtuple
|
|
6
3
|
from itertools import cycle
|
|
7
4
|
from typing import Dict
|
|
8
5
|
|
|
6
|
+
import yara
|
|
7
|
+
import yara_x
|
|
8
|
+
|
|
9
9
|
RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
|
|
10
10
|
|
|
11
11
|
|
|
@@ -42,7 +42,7 @@ class Match:
|
|
|
42
42
|
def __init__(self, rule: yara_x.Rule, file_content: bytes):
|
|
43
43
|
self.rule = rule.identifier
|
|
44
44
|
self.namespace = rule.namespace
|
|
45
|
-
self.tags = rule.tags
|
|
45
|
+
self.tags = list(rule.tags) or []
|
|
46
46
|
self.meta = dict()
|
|
47
47
|
# Ensure metadata doesn't get overwritten
|
|
48
48
|
for k, v in rule.metadata:
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maco
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Author: sl-govau
|
|
5
|
-
Author-email:
|
|
6
5
|
Maintainer: cccs-rs
|
|
7
6
|
License: MIT License
|
|
8
7
|
|
|
@@ -35,7 +34,7 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
35
34
|
Requires-Dist: tomli>=1.1.0; python_version < "3.11"
|
|
36
35
|
Requires-Dist: uv
|
|
37
36
|
Requires-Dist: yara-python
|
|
38
|
-
Requires-Dist: yara-x==0.
|
|
37
|
+
Requires-Dist: yara-x==0.11.0
|
|
39
38
|
|
|
40
39
|
# Maco - Malware config extractor framework
|
|
41
40
|
|
|
@@ -3,7 +3,6 @@ LICENSE.md
|
|
|
3
3
|
README.md
|
|
4
4
|
pyproject.toml
|
|
5
5
|
requirements.txt
|
|
6
|
-
setup.py
|
|
7
6
|
tox.ini
|
|
8
7
|
.vscode/settings.json
|
|
9
8
|
demo_extractors/elfy.py
|
|
@@ -47,11 +46,13 @@ pipelines/test.yaml
|
|
|
47
46
|
tests/pytest.ini
|
|
48
47
|
tests/requirements.txt
|
|
49
48
|
tests/test_base_test.py
|
|
49
|
+
tests/test_demo_extractors.py
|
|
50
50
|
tests/test_detection.py
|
|
51
51
|
tests/test_extractor.py
|
|
52
52
|
tests/test_helpers.py
|
|
53
53
|
tests/test_model.py
|
|
54
54
|
tests/data/example.txt.cart
|
|
55
|
+
tests/data/trigger_complex.txt
|
|
55
56
|
tests/extractors/__init__.py
|
|
56
57
|
tests/extractors/basic.py
|
|
57
58
|
tests/extractors/basic_longer.py
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Convenience functions for discovering your extractors."""
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from multiprocessing import Manager, Process
|
|
7
|
+
from tempfile import NamedTemporaryFile
|
|
8
|
+
from types import ModuleType
|
|
9
|
+
from typing import Any, BinaryIO, Dict, List, Union
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from maco import extractor, model, utils, yara
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExtractorLoadError(Exception):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("maco.lib.helpers")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
|
|
24
|
+
"""Enforce types and verify properties, and remove defaults."""
|
|
25
|
+
if not resp:
|
|
26
|
+
return None
|
|
27
|
+
# check the response is valid for its own model
|
|
28
|
+
# this is useful if a restriction on the 'other' dictionary is needed
|
|
29
|
+
resp_model = type(resp)
|
|
30
|
+
if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
|
|
31
|
+
resp = resp_model.model_validate(resp)
|
|
32
|
+
# check the response is valid according to the ExtractorModel
|
|
33
|
+
resp = model.ExtractorModel.model_validate(resp)
|
|
34
|
+
# coerce sets to correct types
|
|
35
|
+
# otherwise we end up with sets where we expect lists
|
|
36
|
+
resp = model.ExtractorModel(**resp.model_dump())
|
|
37
|
+
# dump model to dict
|
|
38
|
+
return resp.model_dump(exclude_defaults=True)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Collector:
|
|
42
|
+
def __init__(
|
|
43
|
+
self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
|
|
44
|
+
):
|
|
45
|
+
"""Discover and load extractors from file system."""
|
|
46
|
+
path_extractors = os.path.realpath(path_extractors)
|
|
47
|
+
self.path: str = path_extractors
|
|
48
|
+
self.extractors: Dict[str, Dict[str, str]] = {}
|
|
49
|
+
|
|
50
|
+
with Manager() as manager:
|
|
51
|
+
extractors = manager.dict()
|
|
52
|
+
namespaced_rules = manager.dict()
|
|
53
|
+
|
|
54
|
+
def extractor_module_callback(module: ModuleType, venv: str):
|
|
55
|
+
members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
|
|
56
|
+
for member in members:
|
|
57
|
+
name, member = member
|
|
58
|
+
if exclude and name in exclude:
|
|
59
|
+
# Module is part of the exclusion list, skip
|
|
60
|
+
logger.debug(f"exclude excluded '{name}'")
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if include and name not in include:
|
|
64
|
+
# Module wasn't part of the inclusion list, skip
|
|
65
|
+
logger.debug(f"include excluded '{name}'")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# initialise and register
|
|
69
|
+
logger.debug(f"register '{name}'")
|
|
70
|
+
extractors[name] = dict(
|
|
71
|
+
venv=venv,
|
|
72
|
+
module_path=module.__file__,
|
|
73
|
+
module_name=member.__module__,
|
|
74
|
+
extractor_class=member.__name__,
|
|
75
|
+
)
|
|
76
|
+
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
77
|
+
|
|
78
|
+
# Find the extractors within the given directory
|
|
79
|
+
# Execute within a child process to ensure main process interpreter is kept clean
|
|
80
|
+
p = Process(
|
|
81
|
+
target=utils.import_extractors,
|
|
82
|
+
args=(
|
|
83
|
+
path_extractors,
|
|
84
|
+
yara.compile(source=utils.MACO_YARA_RULE),
|
|
85
|
+
extractor_module_callback,
|
|
86
|
+
logger,
|
|
87
|
+
create_venv and os.path.isdir(path_extractors),
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
p.start()
|
|
91
|
+
p.join()
|
|
92
|
+
|
|
93
|
+
self.extractors = dict(extractors)
|
|
94
|
+
if not self.extractors:
|
|
95
|
+
raise ExtractorLoadError("no extractors were loaded")
|
|
96
|
+
logger.debug(f"found extractors {list(self.extractors.keys())}\n")
|
|
97
|
+
|
|
98
|
+
# compile yara rules gathered from extractors
|
|
99
|
+
self.rules = yara.compile(sources=dict(namespaced_rules))
|
|
100
|
+
|
|
101
|
+
def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
|
|
102
|
+
"""Return extractors that should run based on yara rules."""
|
|
103
|
+
# execute yara rules on file to find extractors we should run
|
|
104
|
+
# yara can't run on a stream so we give it a bytestring
|
|
105
|
+
matches = self.rules.match(data=stream.read())
|
|
106
|
+
stream.seek(0)
|
|
107
|
+
if not matches:
|
|
108
|
+
return
|
|
109
|
+
# get all rules that hit for each extractor
|
|
110
|
+
runs = {}
|
|
111
|
+
for match in matches:
|
|
112
|
+
runs.setdefault(match.namespace, []).append(match)
|
|
113
|
+
|
|
114
|
+
return runs
|
|
115
|
+
|
|
116
|
+
def extract(
|
|
117
|
+
self,
|
|
118
|
+
stream: BinaryIO,
|
|
119
|
+
matches: List[yara.Match],
|
|
120
|
+
extractor_name: str,
|
|
121
|
+
) -> Dict[str, Any]:
|
|
122
|
+
"""Run extractor with stream and verify output matches the model."""
|
|
123
|
+
extractor = self.extractors[extractor_name]
|
|
124
|
+
try:
|
|
125
|
+
# Run extractor on a copy of the sample
|
|
126
|
+
with NamedTemporaryFile() as sample_path:
|
|
127
|
+
sample_path.write(stream.read())
|
|
128
|
+
sample_path.flush()
|
|
129
|
+
# enforce types and verify properties, and remove defaults
|
|
130
|
+
return _verify_response(utils.run_extractor(sample_path.name, **extractor))
|
|
131
|
+
except Exception:
|
|
132
|
+
# caller can deal with the exception
|
|
133
|
+
raise
|
|
134
|
+
finally:
|
|
135
|
+
# make sure to reset where we are in the file
|
|
136
|
+
# otherwise follow on extractors are going to read 0 bytes
|
|
137
|
+
stream.seek(0)
|
|
@@ -6,9 +6,11 @@ import inspect
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
import re
|
|
9
|
+
import shutil
|
|
9
10
|
import subprocess
|
|
10
11
|
import sys
|
|
11
12
|
import tempfile
|
|
13
|
+
|
|
12
14
|
from maco import yara
|
|
13
15
|
|
|
14
16
|
if sys.version_info >= (3, 11):
|
|
@@ -21,8 +23,8 @@ from copy import deepcopy
|
|
|
21
23
|
from glob import glob
|
|
22
24
|
from logging import Logger
|
|
23
25
|
from pkgutil import walk_packages
|
|
24
|
-
from typing import Callable, Dict, Tuple, List, Set
|
|
25
26
|
from types import ModuleType
|
|
27
|
+
from typing import Callable, Dict, List, Set, Tuple
|
|
26
28
|
|
|
27
29
|
from maco.extractor import Extractor
|
|
28
30
|
|
|
@@ -67,7 +69,11 @@ import importlib
|
|
|
67
69
|
import json
|
|
68
70
|
import os
|
|
69
71
|
import sys
|
|
70
|
-
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
from maco import yara
|
|
75
|
+
except:
|
|
76
|
+
import yara
|
|
71
77
|
|
|
72
78
|
from base64 import b64encode
|
|
73
79
|
parent_package_path = "{parent_package_path}"
|
|
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
|
|
|
244
250
|
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
|
|
245
251
|
venvs.append(venv_path)
|
|
246
252
|
|
|
253
|
+
# Cleanup any build directories that are the product of package installation
|
|
254
|
+
expected_build_path = os.path.join(dir, "build")
|
|
255
|
+
if os.path.exists(expected_build_path):
|
|
256
|
+
shutil.rmtree(expected_build_path)
|
|
257
|
+
|
|
247
258
|
# Add directories to our visited list and check the parent of this directory on the next loop
|
|
248
259
|
visited_dirs.append(dir)
|
|
249
260
|
dir = os.path.dirname(dir)
|
|
@@ -399,21 +410,23 @@ def import_extractors(
|
|
|
399
410
|
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
400
411
|
|
|
401
412
|
|
|
402
|
-
def
|
|
413
|
+
def run_extractor(
|
|
403
414
|
sample_path,
|
|
404
|
-
|
|
415
|
+
module_name,
|
|
416
|
+
extractor_class,
|
|
405
417
|
module_path,
|
|
406
418
|
venv,
|
|
407
419
|
venv_script=VENV_SCRIPT,
|
|
408
420
|
json_decoder=Base64Decoder,
|
|
409
421
|
) -> Dict[str, dict]:
|
|
410
422
|
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
411
|
-
python_exe =
|
|
423
|
+
python_exe = sys.executable
|
|
424
|
+
if venv:
|
|
425
|
+
# If there is a linked virtual environment, execute within that environment
|
|
426
|
+
python_exe = os.path.join(venv, "bin", "python")
|
|
412
427
|
dirname = os.path.dirname(module_path)
|
|
413
428
|
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
414
429
|
with tempfile.NamedTemporaryFile() as output:
|
|
415
|
-
module_name = module.__module__
|
|
416
|
-
module_class = module.__name__
|
|
417
430
|
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
418
431
|
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
419
432
|
|
|
@@ -421,7 +434,7 @@ def run_in_venv(
|
|
|
421
434
|
venv_script.format(
|
|
422
435
|
parent_package_path=parent_package_path,
|
|
423
436
|
module_name=module_name,
|
|
424
|
-
module_class=
|
|
437
|
+
module_class=extractor_class,
|
|
425
438
|
sample_path=sample_path,
|
|
426
439
|
output_path=output.name,
|
|
427
440
|
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import yara
|
|
3
|
-
import yara_x
|
|
4
|
-
|
|
5
2
|
from collections import namedtuple
|
|
6
3
|
from itertools import cycle
|
|
7
4
|
from typing import Dict
|
|
8
5
|
|
|
6
|
+
import yara
|
|
7
|
+
import yara_x
|
|
8
|
+
|
|
9
9
|
RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
|
|
10
10
|
|
|
11
11
|
|
|
@@ -42,7 +42,7 @@ class Match:
|
|
|
42
42
|
def __init__(self, rule: yara_x.Rule, file_content: bytes):
|
|
43
43
|
self.rule = rule.identifier
|
|
44
44
|
self.namespace = rule.namespace
|
|
45
|
-
self.tags = rule.tags
|
|
45
|
+
self.tags = list(rule.tags) or []
|
|
46
46
|
self.meta = dict()
|
|
47
47
|
# Ensure metadata doesn't get overwritten
|
|
48
48
|
for k, v in rule.metadata:
|
|
@@ -9,7 +9,7 @@ trigger:
|
|
|
9
9
|
pr: none
|
|
10
10
|
|
|
11
11
|
pool:
|
|
12
|
-
vmImage: "ubuntu-
|
|
12
|
+
vmImage: "ubuntu-22.04"
|
|
13
13
|
|
|
14
14
|
jobs:
|
|
15
15
|
- job: test
|
|
@@ -20,12 +20,12 @@ jobs:
|
|
|
20
20
|
python.version: '3.8'
|
|
21
21
|
Python39:
|
|
22
22
|
python.version: '3.9'
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
Python310:
|
|
24
|
+
python.version: '3.10'
|
|
25
|
+
Python311:
|
|
26
|
+
python.version: '3.11'
|
|
27
|
+
Python312:
|
|
28
|
+
python.version: '3.12'
|
|
29
29
|
steps:
|
|
30
30
|
- task: UsePythonVersion@0
|
|
31
31
|
displayName: 'Use Python $(python.version)'
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
trigger: ["*"]
|
|
4
|
+
pr: ["*"]
|
|
5
|
+
|
|
6
|
+
pool:
|
|
7
|
+
vmImage: "ubuntu-22.04"
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
- job: run_test
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
Python3_8:
|
|
14
|
+
python.version: "3.8"
|
|
15
|
+
Python3_9:
|
|
16
|
+
python.version: "3.9"
|
|
17
|
+
Python3_10:
|
|
18
|
+
python.version: "3.10"
|
|
19
|
+
Python3_11:
|
|
20
|
+
python.version: "3.11"
|
|
21
|
+
Python3_12:
|
|
22
|
+
python.version: "3.12"
|
|
23
|
+
timeoutInMinutes: 10
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- task: UsePythonVersion@0
|
|
27
|
+
displayName: Set python version
|
|
28
|
+
inputs:
|
|
29
|
+
versionSpec: "$(python.version)"
|
|
30
|
+
|
|
31
|
+
- script: |
|
|
32
|
+
runtests=true
|
|
33
|
+
if [ ! -d "$(pwd)/tests" ]; then
|
|
34
|
+
echo "No tests found"
|
|
35
|
+
runtest=false
|
|
36
|
+
else
|
|
37
|
+
python -m pip install -U tox
|
|
38
|
+
fi
|
|
39
|
+
echo "##vso[task.setvariable variable=runtests;]$runtests"
|
|
40
|
+
displayName: Install tox
|
|
41
|
+
|
|
42
|
+
- script: |
|
|
43
|
+
python -m tox -e py
|
|
44
|
+
displayName: "Run tests"
|
|
45
|
+
condition: and(succeeded(), eq(variables.runtests, true))
|
|
@@ -4,19 +4,10 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "maco"
|
|
7
|
-
dynamic = ["version"]
|
|
8
|
-
dependencies = [
|
|
9
|
-
"cart",
|
|
10
|
-
"pydantic>=2.0.0",
|
|
11
|
-
'tomli >= 1.1.0 ; python_version < "3.11"',
|
|
12
|
-
"uv",
|
|
13
|
-
"yara-python",
|
|
14
|
-
"yara-x==0.10.0",
|
|
15
|
-
]
|
|
7
|
+
dynamic = ["version", "readme", "dependencies"]
|
|
16
8
|
requires-python = ">=3.8"
|
|
17
9
|
authors = [{ name = "sl-govau" }]
|
|
18
10
|
maintainers = [{ name = "cccs-rs" }]
|
|
19
|
-
readme = "README.md"
|
|
20
11
|
license = { file = "LICENSE.md" }
|
|
21
12
|
|
|
22
13
|
classifiers = [
|
|
@@ -43,6 +34,10 @@ Issues = "https://github.com/CybercentreCanada/Maco/issues"
|
|
|
43
34
|
|
|
44
35
|
[tool.setuptools_scm]
|
|
45
36
|
|
|
37
|
+
[tool.setuptools.dynamic]
|
|
38
|
+
readme = { file = ["README.md"], content-type = "text/markdown" }
|
|
39
|
+
dependencies = { file = ["requirements.txt"] }
|
|
40
|
+
|
|
46
41
|
[tool.setuptools.packages.find]
|
|
47
42
|
where = ["."]
|
|
48
43
|
exclude = ["test", "tests", "extractors", "model_setup"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
|
|
4
|
+
from maco import cli
|
|
5
|
+
from maco.collector import Collector
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestDemoExtractors(unittest.TestCase):
|
|
9
|
+
def test_complex(self):
|
|
10
|
+
path_file = os.path.normpath(
|
|
11
|
+
os.path.join(__file__, "../data/trigger_complex.txt")
|
|
12
|
+
)
|
|
13
|
+
collector = Collector(os.path.join(__file__, "../../demo_extractors"))
|
|
14
|
+
self.assertEqual(
|
|
15
|
+
set(collector.extractors.keys()),
|
|
16
|
+
{
|
|
17
|
+
"Elfy",
|
|
18
|
+
"Nothing",
|
|
19
|
+
"Complex",
|
|
20
|
+
"LimitOther",
|
|
21
|
+
},
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
with open(path_file, "rb") as stream:
|
|
25
|
+
ret = cli.process_file(
|
|
26
|
+
collector,
|
|
27
|
+
path_file,
|
|
28
|
+
stream,
|
|
29
|
+
pretty=True,
|
|
30
|
+
force=False,
|
|
31
|
+
include_base64=False,
|
|
32
|
+
)
|
|
33
|
+
self.assertEqual(
|
|
34
|
+
ret,
|
|
35
|
+
{
|
|
36
|
+
"Complex": {
|
|
37
|
+
"family": "complex",
|
|
38
|
+
"version": "5",
|
|
39
|
+
"decoded_strings": sorted(["Paradise", "Complex"]),
|
|
40
|
+
"binaries": [
|
|
41
|
+
{
|
|
42
|
+
"datatype": "payload",
|
|
43
|
+
"encryption": {"algorithm": "something"},
|
|
44
|
+
"sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
|
|
45
|
+
"size": 9,
|
|
46
|
+
"hex_sample": "736F6D652064617461",
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"http": [
|
|
50
|
+
{
|
|
51
|
+
"protocol": "https",
|
|
52
|
+
"hostname": "blarg5.com",
|
|
53
|
+
"path": "/malz/64",
|
|
54
|
+
"usage": "c2",
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"encryption": [{"algorithm": "sha256"}],
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
)
|
maco-1.2.1/tox.ini
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
[tox]
|
|
2
|
+
envlist = py38,py39,py310,py311,py312
|
|
3
|
+
[testenv]
|
|
4
|
+
# install testing framework
|
|
5
|
+
deps =
|
|
6
|
+
pytest
|
|
7
|
+
-r requirements.txt
|
|
8
|
+
-r tests/requirements.txt
|
|
9
|
+
# run the tests
|
|
10
|
+
commands = python -m pytest -p no:cacheprovider --durations=10 -ra -q -k "not git and not extractors" -vv -W ignore::DeprecationWarning
|
maco-1.2.0/maco/collector.py
DELETED
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
"""Convenience functions for discovering your extractors."""
|
|
2
|
-
|
|
3
|
-
import inspect
|
|
4
|
-
import logging
|
|
5
|
-
import os
|
|
6
|
-
|
|
7
|
-
from tempfile import NamedTemporaryFile
|
|
8
|
-
from typing import Any, BinaryIO, Dict, List
|
|
9
|
-
from types import ModuleType
|
|
10
|
-
|
|
11
|
-
from maco import yara
|
|
12
|
-
from pydantic import BaseModel
|
|
13
|
-
|
|
14
|
-
from maco import extractor, model, utils
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class ExtractorLoadError(Exception):
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
logger = logging.getLogger("maco.lib.helpers")
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _verify_response(resp: BaseModel) -> Dict:
|
|
25
|
-
"""Enforce types and verify properties, and remove defaults."""
|
|
26
|
-
# check the response is valid for its own model
|
|
27
|
-
# this is useful if a restriction on the 'other' dictionary is needed
|
|
28
|
-
resp_model = type(resp)
|
|
29
|
-
if resp_model != model.ExtractorModel:
|
|
30
|
-
resp = resp_model.model_validate(resp)
|
|
31
|
-
# check the response is valid according to the ExtractorModel
|
|
32
|
-
resp = model.ExtractorModel.model_validate(resp)
|
|
33
|
-
# coerce sets to correct types
|
|
34
|
-
# otherwise we end up with sets where we expect lists
|
|
35
|
-
resp = model.ExtractorModel(**resp.model_dump())
|
|
36
|
-
# dump model to dict
|
|
37
|
-
return resp.model_dump(exclude_defaults=True)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class Collector:
|
|
41
|
-
def __init__(
|
|
42
|
-
self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
|
|
43
|
-
):
|
|
44
|
-
"""Discover and load extractors from file system."""
|
|
45
|
-
path_extractors = os.path.realpath(path_extractors)
|
|
46
|
-
self.path = path_extractors
|
|
47
|
-
self.extractors = {}
|
|
48
|
-
namespaced_rules = {}
|
|
49
|
-
|
|
50
|
-
def extractor_module_callback(module: ModuleType, venv: str):
|
|
51
|
-
members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
|
|
52
|
-
for member in members:
|
|
53
|
-
name, member = member
|
|
54
|
-
if exclude and name in exclude:
|
|
55
|
-
# Module is part of the exclusion list, skip
|
|
56
|
-
logger.debug(f"exclude excluded '{name}'")
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
if include and name not in include:
|
|
60
|
-
# Module wasn't part of the inclusion list, skip
|
|
61
|
-
logger.debug(f"include excluded '{name}'")
|
|
62
|
-
return
|
|
63
|
-
|
|
64
|
-
# initialise and register
|
|
65
|
-
logger.debug(f"register '{name}'")
|
|
66
|
-
self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
|
|
67
|
-
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
68
|
-
|
|
69
|
-
# Find the extractors within the given directory
|
|
70
|
-
utils.import_extractors(
|
|
71
|
-
path_extractors,
|
|
72
|
-
yara.compile(source=utils.MACO_YARA_RULE),
|
|
73
|
-
extractor_module_callback,
|
|
74
|
-
logger,
|
|
75
|
-
create_venv and os.path.isdir(path_extractors),
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
if not self.extractors:
|
|
79
|
-
raise ExtractorLoadError("no extractors were loaded")
|
|
80
|
-
logger.debug(f"found extractors {list(self.extractors.keys())}\n")
|
|
81
|
-
|
|
82
|
-
# compile yara rules gathered from extractors
|
|
83
|
-
self.rules = yara.compile(sources=namespaced_rules)
|
|
84
|
-
|
|
85
|
-
def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
|
|
86
|
-
"""Return extractors that should run based on yara rules."""
|
|
87
|
-
# execute yara rules on file to find extractors we should run
|
|
88
|
-
# yara can't run on a stream so we give it a bytestring
|
|
89
|
-
matches = self.rules.match(data=stream.read())
|
|
90
|
-
stream.seek(0)
|
|
91
|
-
if not matches:
|
|
92
|
-
return
|
|
93
|
-
# get all rules that hit for each extractor
|
|
94
|
-
runs = {}
|
|
95
|
-
for match in matches:
|
|
96
|
-
runs.setdefault(match.namespace, []).append(match)
|
|
97
|
-
|
|
98
|
-
return runs
|
|
99
|
-
|
|
100
|
-
def extract(
|
|
101
|
-
self,
|
|
102
|
-
stream: BinaryIO,
|
|
103
|
-
matches: List[yara.Match],
|
|
104
|
-
extractor_name: str,
|
|
105
|
-
) -> Dict[str, Any]:
|
|
106
|
-
"""Run extractor with stream and verify output matches the model."""
|
|
107
|
-
extractor = self.extractors[extractor_name]
|
|
108
|
-
resp = None
|
|
109
|
-
try:
|
|
110
|
-
if extractor["venv"]:
|
|
111
|
-
# Run extractor within a virtual environment
|
|
112
|
-
with NamedTemporaryFile() as sample_path:
|
|
113
|
-
sample_path.write(stream.read())
|
|
114
|
-
sample_path.flush()
|
|
115
|
-
return utils.run_in_venv(sample_path.name, **extractor)
|
|
116
|
-
else:
|
|
117
|
-
# Run extractor within on host environment
|
|
118
|
-
resp = extractor["module"]().run(stream, matches)
|
|
119
|
-
except Exception:
|
|
120
|
-
# caller can deal with the exception
|
|
121
|
-
raise
|
|
122
|
-
finally:
|
|
123
|
-
# make sure to reset where we are in the file
|
|
124
|
-
# otherwise follow on extractors are going to read 0 bytes
|
|
125
|
-
stream.seek(0)
|
|
126
|
-
|
|
127
|
-
# enforce types and verify properties, and remove defaults
|
|
128
|
-
if resp is not None:
|
|
129
|
-
resp = _verify_response(resp)
|
|
130
|
-
|
|
131
|
-
return resp
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
"""Convenience functions for discovering your extractors."""
|
|
2
|
-
|
|
3
|
-
import inspect
|
|
4
|
-
import logging
|
|
5
|
-
import os
|
|
6
|
-
|
|
7
|
-
from tempfile import NamedTemporaryFile
|
|
8
|
-
from typing import Any, BinaryIO, Dict, List
|
|
9
|
-
from types import ModuleType
|
|
10
|
-
|
|
11
|
-
from maco import yara
|
|
12
|
-
from pydantic import BaseModel
|
|
13
|
-
|
|
14
|
-
from maco import extractor, model, utils
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class ExtractorLoadError(Exception):
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
logger = logging.getLogger("maco.lib.helpers")
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _verify_response(resp: BaseModel) -> Dict:
|
|
25
|
-
"""Enforce types and verify properties, and remove defaults."""
|
|
26
|
-
# check the response is valid for its own model
|
|
27
|
-
# this is useful if a restriction on the 'other' dictionary is needed
|
|
28
|
-
resp_model = type(resp)
|
|
29
|
-
if resp_model != model.ExtractorModel:
|
|
30
|
-
resp = resp_model.model_validate(resp)
|
|
31
|
-
# check the response is valid according to the ExtractorModel
|
|
32
|
-
resp = model.ExtractorModel.model_validate(resp)
|
|
33
|
-
# coerce sets to correct types
|
|
34
|
-
# otherwise we end up with sets where we expect lists
|
|
35
|
-
resp = model.ExtractorModel(**resp.model_dump())
|
|
36
|
-
# dump model to dict
|
|
37
|
-
return resp.model_dump(exclude_defaults=True)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class Collector:
|
|
41
|
-
def __init__(
|
|
42
|
-
self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
|
|
43
|
-
):
|
|
44
|
-
"""Discover and load extractors from file system."""
|
|
45
|
-
path_extractors = os.path.realpath(path_extractors)
|
|
46
|
-
self.path = path_extractors
|
|
47
|
-
self.extractors = {}
|
|
48
|
-
namespaced_rules = {}
|
|
49
|
-
|
|
50
|
-
def extractor_module_callback(module: ModuleType, venv: str):
|
|
51
|
-
members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
|
|
52
|
-
for member in members:
|
|
53
|
-
name, member = member
|
|
54
|
-
if exclude and name in exclude:
|
|
55
|
-
# Module is part of the exclusion list, skip
|
|
56
|
-
logger.debug(f"exclude excluded '{name}'")
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
if include and name not in include:
|
|
60
|
-
# Module wasn't part of the inclusion list, skip
|
|
61
|
-
logger.debug(f"include excluded '{name}'")
|
|
62
|
-
return
|
|
63
|
-
|
|
64
|
-
# initialise and register
|
|
65
|
-
logger.debug(f"register '{name}'")
|
|
66
|
-
self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
|
|
67
|
-
namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
|
|
68
|
-
|
|
69
|
-
# Find the extractors within the given directory
|
|
70
|
-
utils.import_extractors(
|
|
71
|
-
path_extractors,
|
|
72
|
-
yara.compile(source=utils.MACO_YARA_RULE),
|
|
73
|
-
extractor_module_callback,
|
|
74
|
-
logger,
|
|
75
|
-
create_venv and os.path.isdir(path_extractors),
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
if not self.extractors:
|
|
79
|
-
raise ExtractorLoadError("no extractors were loaded")
|
|
80
|
-
logger.debug(f"found extractors {list(self.extractors.keys())}\n")
|
|
81
|
-
|
|
82
|
-
# compile yara rules gathered from extractors
|
|
83
|
-
self.rules = yara.compile(sources=namespaced_rules)
|
|
84
|
-
|
|
85
|
-
def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
|
|
86
|
-
"""Return extractors that should run based on yara rules."""
|
|
87
|
-
# execute yara rules on file to find extractors we should run
|
|
88
|
-
# yara can't run on a stream so we give it a bytestring
|
|
89
|
-
matches = self.rules.match(data=stream.read())
|
|
90
|
-
stream.seek(0)
|
|
91
|
-
if not matches:
|
|
92
|
-
return
|
|
93
|
-
# get all rules that hit for each extractor
|
|
94
|
-
runs = {}
|
|
95
|
-
for match in matches:
|
|
96
|
-
runs.setdefault(match.namespace, []).append(match)
|
|
97
|
-
|
|
98
|
-
return runs
|
|
99
|
-
|
|
100
|
-
def extract(
|
|
101
|
-
self,
|
|
102
|
-
stream: BinaryIO,
|
|
103
|
-
matches: List[yara.Match],
|
|
104
|
-
extractor_name: str,
|
|
105
|
-
) -> Dict[str, Any]:
|
|
106
|
-
"""Run extractor with stream and verify output matches the model."""
|
|
107
|
-
extractor = self.extractors[extractor_name]
|
|
108
|
-
resp = None
|
|
109
|
-
try:
|
|
110
|
-
if extractor["venv"]:
|
|
111
|
-
# Run extractor within a virtual environment
|
|
112
|
-
with NamedTemporaryFile() as sample_path:
|
|
113
|
-
sample_path.write(stream.read())
|
|
114
|
-
sample_path.flush()
|
|
115
|
-
return utils.run_in_venv(sample_path.name, **extractor)
|
|
116
|
-
else:
|
|
117
|
-
# Run extractor within on host environment
|
|
118
|
-
resp = extractor["module"]().run(stream, matches)
|
|
119
|
-
except Exception:
|
|
120
|
-
# caller can deal with the exception
|
|
121
|
-
raise
|
|
122
|
-
finally:
|
|
123
|
-
# make sure to reset where we are in the file
|
|
124
|
-
# otherwise follow on extractors are going to read 0 bytes
|
|
125
|
-
stream.seek(0)
|
|
126
|
-
|
|
127
|
-
# enforce types and verify properties, and remove defaults
|
|
128
|
-
if resp is not None:
|
|
129
|
-
resp = _verify_response(resp)
|
|
130
|
-
|
|
131
|
-
return resp
|
maco-1.2.0/pipelines/test.yaml
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
name: tests
|
|
2
|
-
|
|
3
|
-
trigger: ["*"]
|
|
4
|
-
pr: ["*"]
|
|
5
|
-
|
|
6
|
-
pool:
|
|
7
|
-
vmImage: "ubuntu-20.04"
|
|
8
|
-
|
|
9
|
-
jobs:
|
|
10
|
-
- job: run_test
|
|
11
|
-
strategy:
|
|
12
|
-
matrix:
|
|
13
|
-
Python3_8:
|
|
14
|
-
python.version: "3.8"
|
|
15
|
-
Python3_9:
|
|
16
|
-
python.version: "3.9"
|
|
17
|
-
Python3_10:
|
|
18
|
-
python.version: "3.10"
|
|
19
|
-
Python3_11:
|
|
20
|
-
python.version: "3.11"
|
|
21
|
-
Python3_12:
|
|
22
|
-
python.version: "3.12"
|
|
23
|
-
timeoutInMinutes: 10
|
|
24
|
-
|
|
25
|
-
steps:
|
|
26
|
-
- task: UsePythonVersion@0
|
|
27
|
-
displayName: Set python version
|
|
28
|
-
inputs:
|
|
29
|
-
versionSpec: "$(python.version)"
|
|
30
|
-
- script: |
|
|
31
|
-
[ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
|
|
32
|
-
[ -f $(pwd)/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/requirements.txt
|
|
33
|
-
[ -f $(pwd)/tests/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/tests/requirements.txt
|
|
34
|
-
sudo rm -rf /tmp/* /var/lib/apt/lists/* ~/.cache/pip
|
|
35
|
-
|
|
36
|
-
displayName: Setup environment
|
|
37
|
-
- script: |
|
|
38
|
-
[ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
|
|
39
|
-
export REPO_NAME=${BUILD_REPOSITORY_NAME##*/}
|
|
40
|
-
python -m pytest -p no:cacheprovider --durations=10 -rsx -vv -W ignore::DeprecationWarning
|
|
41
|
-
displayName: Test
|
maco-1.2.0/setup.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Setup script."""
|
|
3
|
-
|
|
4
|
-
from setuptools import find_packages, setup
|
|
5
|
-
|
|
6
|
-
setup(
|
|
7
|
-
python_requires=">=3.8",
|
|
8
|
-
use_scm_version=True,
|
|
9
|
-
setup_requires=["setuptools_scm"],
|
|
10
|
-
packages=find_packages(".", exclude=["test", "tests", "extractors"]),
|
|
11
|
-
include_package_data=True,
|
|
12
|
-
install_requires=[
|
|
13
|
-
r.strip() for r in open("requirements.txt", "r") if not r.startswith("#")
|
|
14
|
-
],
|
|
15
|
-
name="maco",
|
|
16
|
-
description="",
|
|
17
|
-
author="",
|
|
18
|
-
author_email="",
|
|
19
|
-
classifiers=[],
|
|
20
|
-
entry_points={
|
|
21
|
-
"console_scripts": [
|
|
22
|
-
"maco = maco.cli:main",
|
|
23
|
-
],
|
|
24
|
-
},
|
|
25
|
-
)
|
maco-1.2.0/tox.ini
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|