maco 1.2.17__py3-none-any.whl → 1.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extractor_setup/maco/__init__.py +0 -0
- extractor_setup/maco/base_test.py +98 -0
- extractor_setup/maco/cli.py +275 -0
- extractor_setup/maco/collector.py +220 -0
- extractor_setup/maco/exceptions.py +33 -0
- extractor_setup/maco/extractor.py +70 -0
- extractor_setup/maco/model/__init__.py +1 -0
- extractor_setup/maco/model/model.py +606 -0
- extractor_setup/maco/utils.py +587 -0
- extractor_setup/maco/yara.py +129 -0
- maco/utils.py +2 -2
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/METADATA +2 -1
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/RECORD +19 -9
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/top_level.txt +1 -0
- model_setup/maco/utils.py +2 -2
- pipelines/publish.yaml +30 -24
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/WHEEL +0 -0
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/entry_points.txt +0 -0
- {maco-1.2.17.dist-info → maco-1.2.18.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
"""Common utilities shared between the MACO collector and configextractor-py."""
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import inspect
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import logging.handlers
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
from importlib.machinery import SourceFileLoader
|
|
15
|
+
|
|
16
|
+
from multiprocess import Process, Queue
|
|
17
|
+
|
|
18
|
+
from maco import yara
|
|
19
|
+
|
|
20
|
+
if sys.version_info >= (3, 11):
|
|
21
|
+
import tomllib
|
|
22
|
+
else:
|
|
23
|
+
import tomli as tomllib
|
|
24
|
+
|
|
25
|
+
from base64 import b64decode
|
|
26
|
+
from copy import deepcopy
|
|
27
|
+
from glob import glob
|
|
28
|
+
from logging import Logger
|
|
29
|
+
from types import ModuleType
|
|
30
|
+
from typing import Callable, Dict, List, Tuple, Union
|
|
31
|
+
|
|
32
|
+
from uv import find_uv_bin
|
|
33
|
+
|
|
34
|
+
from maco import model
|
|
35
|
+
from maco.exceptions import AnalysisAbortedException
|
|
36
|
+
from maco.extractor import Extractor
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger("maco.lib.utils")
|
|
39
|
+
|
|
40
|
+
VENV_DIRECTORY_NAME = ".venv"
|
|
41
|
+
|
|
42
|
+
RELATIVE_FROM_RE = re.compile(rb"from (\.+)")
|
|
43
|
+
RELATIVE_FROM_IMPORT_RE = re.compile(rb"from (\.+) import")
|
|
44
|
+
|
|
45
|
+
UV_BIN = find_uv_bin()
|
|
46
|
+
|
|
47
|
+
PIP_CMD = f"{UV_BIN} pip"
|
|
48
|
+
VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Base64Decoder(json.JSONDecoder):
|
|
52
|
+
"""JSON decoder that also base64 encodes binary data."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, *args, **kwargs):
|
|
55
|
+
"""Initialize the decoder."""
|
|
56
|
+
json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
|
|
57
|
+
|
|
58
|
+
def object_hook(self, obj):
|
|
59
|
+
"""Hook to decode base64 encoded binary data.""" # noqa: DOC201
|
|
60
|
+
if "__class__" not in obj:
|
|
61
|
+
return obj
|
|
62
|
+
type = obj["__class__"]
|
|
63
|
+
if type == "bytes":
|
|
64
|
+
return b64decode(obj["data"])
|
|
65
|
+
return obj
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
VENV_SCRIPT = """
|
|
69
|
+
import importlib
|
|
70
|
+
import json
|
|
71
|
+
import os
|
|
72
|
+
import sys
|
|
73
|
+
import logging
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Respect cases where the extractor is tied to certain version of yara-python for processing
|
|
77
|
+
import yara
|
|
78
|
+
except:
|
|
79
|
+
# Otherwise fallback to MACO's interface for yara-python==4.5.x
|
|
80
|
+
from maco import yara
|
|
81
|
+
|
|
82
|
+
from base64 import b64encode
|
|
83
|
+
|
|
84
|
+
# ensure we have a logger to stderr
|
|
85
|
+
import logging
|
|
86
|
+
logger = logging.getLogger()
|
|
87
|
+
logger.setLevel(logging.DEBUG)
|
|
88
|
+
sh = logging.StreamHandler()
|
|
89
|
+
logger.addHandler(sh)
|
|
90
|
+
sh.setLevel(logging.DEBUG)
|
|
91
|
+
formatter = logging.Formatter(
|
|
92
|
+
fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
|
|
93
|
+
)
|
|
94
|
+
sh.setFormatter(formatter)
|
|
95
|
+
|
|
96
|
+
parent_package_path = "{parent_package_path}"
|
|
97
|
+
sys.path.insert(1, parent_package_path)
|
|
98
|
+
mod = importlib.import_module("{module_name}")
|
|
99
|
+
|
|
100
|
+
class Base64Encoder(json.JSONEncoder):
|
|
101
|
+
def default(self, o):
|
|
102
|
+
if isinstance(o, bytes):
|
|
103
|
+
return dict(__class__="bytes", data=b64encode(o).decode())
|
|
104
|
+
return json.JSONEncoder.default(self, o)
|
|
105
|
+
matches = []
|
|
106
|
+
if mod.{module_class}.yara_rule:
|
|
107
|
+
matches = yara.compile(source=mod.{module_class}.yara_rule).match("{sample_path}")
|
|
108
|
+
result = mod.{module_class}().run(open("{sample_path}", 'rb'), matches=matches)
|
|
109
|
+
|
|
110
|
+
with open("{output_path}", 'w') as fp:
|
|
111
|
+
if not result:
|
|
112
|
+
json.dump(dict(), fp)
|
|
113
|
+
else:
|
|
114
|
+
try:
|
|
115
|
+
json.dump(result.model_dump(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
|
|
116
|
+
except AttributeError:
|
|
117
|
+
# venv likely has an older version of Pydantic < 2 installed
|
|
118
|
+
json.dump(result.dict(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
MACO_YARA_RULE = r"""
|
|
122
|
+
rule MACO {
|
|
123
|
+
meta:
|
|
124
|
+
desc = "Used to match on Python files that contain MACO extractors"
|
|
125
|
+
strings:
|
|
126
|
+
$from = "from maco"
|
|
127
|
+
$import = "import maco"
|
|
128
|
+
$extractor = "Extractor"
|
|
129
|
+
$class = /class \w+\(([a-zA-Z.]+)?Extractor\)\:/
|
|
130
|
+
condition:
|
|
131
|
+
($from or $import) and $extractor and $class
|
|
132
|
+
}
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def maco_extractor_validation(module: ModuleType) -> bool:
|
|
137
|
+
"""Validation function for extractors.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
(bool): True if extractor belongs to MACO, False otherwise.
|
|
141
|
+
"""
|
|
142
|
+
if inspect.isclass(module):
|
|
143
|
+
# 'author' has to be implemented otherwise will raise an exception according to MACO
|
|
144
|
+
return hasattr(module, "author") and module.author
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def maco_extract_rules(module: Extractor) -> str:
|
|
149
|
+
"""Extracts YARA rules from extractor.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
(str): YARA rules
|
|
153
|
+
"""
|
|
154
|
+
return module.yara_rule
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger) -> Tuple[List[str], List[str]]:
|
|
158
|
+
"""Looks for extractors using YARA rules.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
root_directory (str): Root directory containing extractors
|
|
162
|
+
scanner (yara.Rules): Scanner to look for extractors using YARA rules
|
|
163
|
+
logger (Logger): Logger to use
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple[List[str], List[str]]: Returns a list of extractor directories and extractor files
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
extractor_dirs = set([root_directory])
|
|
170
|
+
extractor_files = []
|
|
171
|
+
|
|
172
|
+
def scan_and_repair(directory, package=None):
|
|
173
|
+
nodes = os.listdir(directory)
|
|
174
|
+
|
|
175
|
+
if "__init__.py" in nodes and not package and "-" not in os.path.basename(directory):
|
|
176
|
+
# Perhaps we've found the outermost package?
|
|
177
|
+
package = os.path.basename(directory)
|
|
178
|
+
|
|
179
|
+
for node in nodes:
|
|
180
|
+
path = os.path.join(directory, node)
|
|
181
|
+
if node == VENV_DIRECTORY_NAME:
|
|
182
|
+
# Ignore looking for extractors within packages
|
|
183
|
+
continue
|
|
184
|
+
elif not node.endswith(".py") and os.path.isfile(path):
|
|
185
|
+
# Ignore scanning non-Python files
|
|
186
|
+
continue
|
|
187
|
+
elif node in ["setup.py"]:
|
|
188
|
+
# Ignore setup files and markers for package directories
|
|
189
|
+
continue
|
|
190
|
+
elif "test" in node:
|
|
191
|
+
# Ignore test files
|
|
192
|
+
continue
|
|
193
|
+
elif "deprecated" in node:
|
|
194
|
+
# Ignore deprecated files
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
if os.path.isfile(os.path.join(directory, node)):
|
|
198
|
+
# Scan Python file for potential extractors
|
|
199
|
+
if package:
|
|
200
|
+
# Inspect the contents and look for any relative import issues
|
|
201
|
+
with open(path, "rb") as f:
|
|
202
|
+
data = f.read()
|
|
203
|
+
|
|
204
|
+
# Replace any relative importing with absolute
|
|
205
|
+
changed_imports = False
|
|
206
|
+
curr_dir = os.path.dirname(path)
|
|
207
|
+
split = curr_dir.split("/")[::-1]
|
|
208
|
+
for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
|
|
209
|
+
for match in pattern.findall(data):
|
|
210
|
+
depth = match.count(b".")
|
|
211
|
+
abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
|
|
212
|
+
abspath += "." if pattern == RELATIVE_FROM_RE else ""
|
|
213
|
+
data = data.replace(f"from {match.decode()}".encode(), f"from {abspath}".encode(), 1)
|
|
214
|
+
changed_imports = True
|
|
215
|
+
|
|
216
|
+
# only write extractor files if imports were changed
|
|
217
|
+
if changed_imports:
|
|
218
|
+
with open(path, "wb") as f:
|
|
219
|
+
f.write(data)
|
|
220
|
+
|
|
221
|
+
if scanner.match(path):
|
|
222
|
+
# Add directory to list of hits for venv creation
|
|
223
|
+
extractor_dirs.add(directory)
|
|
224
|
+
extractor_files.append(os.path.realpath(path))
|
|
225
|
+
else:
|
|
226
|
+
scan_and_repair(path, package)
|
|
227
|
+
|
|
228
|
+
# Search for extractors using YARA rules
|
|
229
|
+
logger.info("Searching for prospective extractors based on YARA rules..")
|
|
230
|
+
scan_and_repair(root_directory)
|
|
231
|
+
|
|
232
|
+
return extractor_dirs, extractor_files
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
|
|
236
|
+
venvs = []
|
|
237
|
+
env = deepcopy(os.environ)
|
|
238
|
+
stop_directory = os.path.dirname(sorted(directories)[0])
|
|
239
|
+
# Track directories that we've already visited
|
|
240
|
+
visited_dirs = []
|
|
241
|
+
for dir in directories:
|
|
242
|
+
# Recurse backwards through the directory structure to look for package requirements
|
|
243
|
+
while dir != stop_directory and dir not in visited_dirs:
|
|
244
|
+
req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
|
|
245
|
+
if req_files:
|
|
246
|
+
# create a virtual environment, otherwise directly install into current env
|
|
247
|
+
if create_venv:
|
|
248
|
+
venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
|
|
249
|
+
logger.info(f"Updating virtual environment {venv_path}")
|
|
250
|
+
env.update({"VIRTUAL_ENV": venv_path})
|
|
251
|
+
# Create a virtual environment for the directory
|
|
252
|
+
if not os.path.exists(venv_path):
|
|
253
|
+
cmd = f"{VENV_CREATE_CMD} --python {python_version}"
|
|
254
|
+
subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
|
|
255
|
+
|
|
256
|
+
# Install/Update the packages in the environment
|
|
257
|
+
install_command = PIP_CMD.split(" ") + ["install"]
|
|
258
|
+
# When running locally, only install packages to required spec.
|
|
259
|
+
# This prevents issues during maco development and building extractors against local libraries.
|
|
260
|
+
if create_venv:
|
|
261
|
+
# when running in custom virtual environment, always upgrade packages.
|
|
262
|
+
install_command.extend(["--upgrade", "--no-cache"])
|
|
263
|
+
|
|
264
|
+
# Update the pip install command depending on where the dependencies are coming from
|
|
265
|
+
if "requirements.txt" in req_files:
|
|
266
|
+
# Perform a pip install using the requirements flag
|
|
267
|
+
install_command.extend(["--requirements", "requirements.txt"])
|
|
268
|
+
elif "pyproject.toml" in req_files:
|
|
269
|
+
# Assume we're dealing with a project directory
|
|
270
|
+
pyproject_command = ["--editable", "."]
|
|
271
|
+
|
|
272
|
+
# Check to see if there are optional dependencies required
|
|
273
|
+
with open(os.path.join(dir, "pyproject.toml"), "rb") as f:
|
|
274
|
+
parsed_toml_project = tomllib.load(f).get("project", {})
|
|
275
|
+
for dep_name, dependencies in parsed_toml_project.get("optional-dependencies", {}).items():
|
|
276
|
+
# Look for the dependency that hints at use of MACO for the extractors
|
|
277
|
+
if "maco" in " ".join(dependencies):
|
|
278
|
+
pyproject_command = [f".[{dep_name}]"]
|
|
279
|
+
break
|
|
280
|
+
|
|
281
|
+
install_command.extend(pyproject_command)
|
|
282
|
+
|
|
283
|
+
# Always require maco-extractor to be installed
|
|
284
|
+
install_command.append("maco-extractor")
|
|
285
|
+
logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
|
|
286
|
+
# this uses VIRTUAL_ENV to control usage of a virtual environment
|
|
287
|
+
p = subprocess.run(
|
|
288
|
+
install_command,
|
|
289
|
+
cwd=dir,
|
|
290
|
+
capture_output=True,
|
|
291
|
+
env=env,
|
|
292
|
+
)
|
|
293
|
+
if p.returncode != 0:
|
|
294
|
+
if b"is being installed using the legacy" in p.stderr:
|
|
295
|
+
# Ignore these types of errors
|
|
296
|
+
continue
|
|
297
|
+
logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
298
|
+
else:
|
|
299
|
+
logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
|
|
300
|
+
if create_venv:
|
|
301
|
+
venvs.append(venv_path)
|
|
302
|
+
|
|
303
|
+
# Cleanup any build directories that are the product of package installation
|
|
304
|
+
expected_build_path = os.path.join(dir, "build")
|
|
305
|
+
if os.path.exists(expected_build_path):
|
|
306
|
+
shutil.rmtree(expected_build_path)
|
|
307
|
+
|
|
308
|
+
# Add directories to our visited list and check the parent of this directory on the next loop
|
|
309
|
+
visited_dirs.append(dir)
|
|
310
|
+
dir = os.path.dirname(dir)
|
|
311
|
+
return venvs
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def find_and_insert_venv(path: str, venvs: List[str]) -> Tuple[str, str]:
|
|
315
|
+
"""Finds the closest virtual environment to the extractor and inserts it into the PATH.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
path (str): Path of extractor
|
|
319
|
+
venvs (List[str]): List of virtual environments
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
(Tuple[str, str]): Virtual environment and site-packages path that's closest to the extractor
|
|
323
|
+
"""
|
|
324
|
+
venv = None
|
|
325
|
+
for venv in sorted(venvs, reverse=True):
|
|
326
|
+
venv_parent = os.path.dirname(venv)
|
|
327
|
+
if path.startswith(f"{venv_parent}/"):
|
|
328
|
+
# Found the virtual environment that's the closest to extractor
|
|
329
|
+
break
|
|
330
|
+
|
|
331
|
+
if not venv:
|
|
332
|
+
return None, None
|
|
333
|
+
|
|
334
|
+
if venv:
|
|
335
|
+
# Insert the venv's site-packages into the PATH temporarily to load the module
|
|
336
|
+
for site_package in glob(os.path.join(venv, "lib/python*/site-packages")):
|
|
337
|
+
if site_package not in sys.path:
|
|
338
|
+
sys.path.insert(2, site_package)
|
|
339
|
+
break
|
|
340
|
+
|
|
341
|
+
return venv, site_package
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def register_extractor_module(
|
|
345
|
+
extractor_source_file: str,
|
|
346
|
+
module_name: str,
|
|
347
|
+
venvs: List[str],
|
|
348
|
+
extractor_module_callback: Callable[[ModuleType, str], None],
|
|
349
|
+
logger: Logger,
|
|
350
|
+
):
|
|
351
|
+
"""Register the extractor module in isolation.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
extractor_source_file (str): Path to source file of extractor
|
|
355
|
+
module_name (str): The name of the module relative to the package directory
|
|
356
|
+
venvs (List[str]): List of virtual environments
|
|
357
|
+
extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
|
|
358
|
+
logger (Logger): Logger to use
|
|
359
|
+
|
|
360
|
+
"""
|
|
361
|
+
try:
|
|
362
|
+
logger.info(f"Inspecting '{extractor_source_file}' for extractors..")
|
|
363
|
+
venv, site_packages = find_and_insert_venv(extractor_source_file, venvs)
|
|
364
|
+
loader = SourceFileLoader(
|
|
365
|
+
module_name,
|
|
366
|
+
extractor_source_file,
|
|
367
|
+
)
|
|
368
|
+
extractor_module_callback(loader.load_module(), venv)
|
|
369
|
+
finally:
|
|
370
|
+
# Cleanup virtual environment that was loaded into PATH
|
|
371
|
+
if venv and site_packages in sys.path:
|
|
372
|
+
sys.path.remove(site_packages)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def register_extractors(
|
|
376
|
+
current_directory: str,
|
|
377
|
+
venvs: List[str],
|
|
378
|
+
extractor_files: List[str],
|
|
379
|
+
extractor_module_callback: Callable[[ModuleType, str], None],
|
|
380
|
+
logger: Logger,
|
|
381
|
+
):
|
|
382
|
+
"""Register extractors with in the current directory.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
current_directory (str): Current directory to register extractors found
|
|
386
|
+
venvs (List[str]): List of virtual environments
|
|
387
|
+
extractor_files (List[str]): List of extractor files found
|
|
388
|
+
extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
|
|
389
|
+
logger (Logger): Logger to use
|
|
390
|
+
"""
|
|
391
|
+
package_name = os.path.basename(current_directory)
|
|
392
|
+
parent_directory = os.path.dirname(current_directory)
|
|
393
|
+
if venvs and package_name in sys.modules:
|
|
394
|
+
# this may happen as part of testing if some part of the extractor code was directly imported
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"Looks like {package_name} is already loaded. "
|
|
397
|
+
"If your maco extractor overlaps an existing package name this could cause problems."
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
# Modify the PATH so we can recognize this new package on import
|
|
402
|
+
sys.path.insert(1, current_directory)
|
|
403
|
+
sys.path.insert(1, parent_directory)
|
|
404
|
+
|
|
405
|
+
# Load the potential extractors directly from the source file
|
|
406
|
+
registration_processes = []
|
|
407
|
+
for extractor_source_file in extractor_files:
|
|
408
|
+
module_name = extractor_source_file.replace(f"{parent_directory}/", "").replace("/", ".")[:-3]
|
|
409
|
+
p = Process(
|
|
410
|
+
target=register_extractor_module,
|
|
411
|
+
args=(extractor_source_file, module_name, venvs, extractor_module_callback, logger),
|
|
412
|
+
)
|
|
413
|
+
p.start()
|
|
414
|
+
registration_processes.append(p)
|
|
415
|
+
|
|
416
|
+
for p in registration_processes:
|
|
417
|
+
p.join()
|
|
418
|
+
|
|
419
|
+
finally:
|
|
420
|
+
# Cleanup changes made to PATH
|
|
421
|
+
sys.path.remove(parent_directory)
|
|
422
|
+
sys.path.remove(current_directory)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def proxy_logging(queue: Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
|
|
426
|
+
"""Ensures logging is set up correctly for a child process and then executes the callback."""
|
|
427
|
+
logger = logging.getLogger()
|
|
428
|
+
qh = logging.handlers.QueueHandler(queue)
|
|
429
|
+
qh.setLevel(logging.DEBUG)
|
|
430
|
+
logger.addHandler(qh)
|
|
431
|
+
callback(*args, **kwargs, logger=logger)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def import_extractors(
|
|
435
|
+
extractor_module_callback: Callable[[ModuleType, str], bool],
|
|
436
|
+
*,
|
|
437
|
+
root_directory: str,
|
|
438
|
+
scanner: yara.Rules,
|
|
439
|
+
create_venv: bool,
|
|
440
|
+
logger: Logger,
|
|
441
|
+
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
442
|
+
skip_install: bool = False,
|
|
443
|
+
):
|
|
444
|
+
"""Import extractors in a given directory.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
extractor_module_callback (Callable[[ModuleType, str], bool]): Callback used to register extractors
|
|
448
|
+
root_directory (str): Root directory to look for extractors
|
|
449
|
+
scanner (yara.Rules): Scanner to look for extractors that match YARA rule
|
|
450
|
+
create_venv (bool): Create/Use virtual environments
|
|
451
|
+
logger (Logger): Logger to use
|
|
452
|
+
python_version (str): Version of python to use when creating virtual environments
|
|
453
|
+
skip_install (bool): Skip installation of Python dependencies for extractors
|
|
454
|
+
"""
|
|
455
|
+
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
456
|
+
|
|
457
|
+
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
458
|
+
logger.debug(extractor_files)
|
|
459
|
+
|
|
460
|
+
if not skip_install:
|
|
461
|
+
# Install packages into the current environment or dynamically created virtual environments
|
|
462
|
+
venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
|
|
463
|
+
else:
|
|
464
|
+
# Look for pre-existing virtual environments, if any
|
|
465
|
+
logger.info("Checking for pre-existing virtual environment(s)..")
|
|
466
|
+
venvs = [
|
|
467
|
+
os.path.join(root, VENV_DIRECTORY_NAME)
|
|
468
|
+
for root, dirs, _ in os.walk(root_directory)
|
|
469
|
+
if VENV_DIRECTORY_NAME in dirs
|
|
470
|
+
]
|
|
471
|
+
|
|
472
|
+
# With the environment prepared, we can now hunt for the extractors and register them
|
|
473
|
+
logger.info("Registering extractors..")
|
|
474
|
+
register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
# holds cached extractors when not running in venv mode
|
|
478
|
+
_loaded_extractors: Dict[str, Extractor] = {}
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def run_extractor(
|
|
482
|
+
sample_path,
|
|
483
|
+
module_name,
|
|
484
|
+
extractor_class,
|
|
485
|
+
module_path,
|
|
486
|
+
venv,
|
|
487
|
+
venv_script=VENV_SCRIPT,
|
|
488
|
+
json_decoder=Base64Decoder,
|
|
489
|
+
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
490
|
+
"""Runs the maco extractor against sample either in current process or child process.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
sample_path (str): Path to sample
|
|
494
|
+
module_name (str): Name of extractor module
|
|
495
|
+
extractor_class (str): Name of extractor class in module
|
|
496
|
+
module_path (str): Path to Python module containing extractor
|
|
497
|
+
venv (str): Path to virtual environment associated to extractor
|
|
498
|
+
venv_script (str): Script to run extractor in a virtual environment
|
|
499
|
+
json_decoder (Base64Decoder): Decoder used for JSON
|
|
500
|
+
|
|
501
|
+
Raises:
|
|
502
|
+
AnalysisAbortedException: Raised when extractor voluntarily terminates execution
|
|
503
|
+
Exception: Raised when extractor raises an exception
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Union[Dict[str, dict], model.ExtractorModel]: Results from extractor
|
|
507
|
+
"""
|
|
508
|
+
if not venv:
|
|
509
|
+
key = f"{module_name}_{extractor_class}"
|
|
510
|
+
if key not in _loaded_extractors:
|
|
511
|
+
# dynamic import of extractor
|
|
512
|
+
try:
|
|
513
|
+
# Add the correct directory to the PATH before attempting to load the extractor
|
|
514
|
+
import_path = module_path[: -4 - len(module_name)]
|
|
515
|
+
sys.path.insert(1, import_path)
|
|
516
|
+
mod = importlib.import_module(module_name)
|
|
517
|
+
extractor_cls = mod.__getattribute__(extractor_class)
|
|
518
|
+
extractor = extractor_cls()
|
|
519
|
+
|
|
520
|
+
# Add to cache
|
|
521
|
+
_loaded_extractors[key] = extractor
|
|
522
|
+
finally:
|
|
523
|
+
sys.path.pop(1)
|
|
524
|
+
|
|
525
|
+
else:
|
|
526
|
+
# retrieve cached extractor
|
|
527
|
+
extractor = _loaded_extractors[key]
|
|
528
|
+
if extractor.yara_compiled:
|
|
529
|
+
matches = extractor.yara_compiled.match(sample_path)
|
|
530
|
+
loaded = extractor.run(open(sample_path, "rb"), matches=matches)
|
|
531
|
+
else:
|
|
532
|
+
# execute extractor in child process with separate virtual environment
|
|
533
|
+
# Write temporary script in the same directory as extractor to resolve relative imports
|
|
534
|
+
python_exe = os.path.join(venv, "bin", "python")
|
|
535
|
+
dirname = os.path.dirname(module_path)
|
|
536
|
+
with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
|
|
537
|
+
with tempfile.NamedTemporaryFile() as output:
|
|
538
|
+
parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
|
|
539
|
+
root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
|
|
540
|
+
|
|
541
|
+
script.write(
|
|
542
|
+
venv_script.format(
|
|
543
|
+
parent_package_path=parent_package_path,
|
|
544
|
+
module_name=module_name,
|
|
545
|
+
module_class=extractor_class,
|
|
546
|
+
sample_path=sample_path,
|
|
547
|
+
output_path=output.name,
|
|
548
|
+
)
|
|
549
|
+
)
|
|
550
|
+
script.flush()
|
|
551
|
+
cwd = root_directory
|
|
552
|
+
custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
|
|
553
|
+
|
|
554
|
+
if custom_module.startswith("src."):
|
|
555
|
+
# src layout found, which means the actual module content is within 'src' directory
|
|
556
|
+
custom_module = custom_module[4:]
|
|
557
|
+
cwd = os.path.join(cwd, "src")
|
|
558
|
+
|
|
559
|
+
# run the maco extractor in full venv process isolation (slow)
|
|
560
|
+
proc = subprocess.run(
|
|
561
|
+
[python_exe, "-m", custom_module],
|
|
562
|
+
cwd=cwd,
|
|
563
|
+
capture_output=True,
|
|
564
|
+
)
|
|
565
|
+
stderr = proc.stderr.decode()
|
|
566
|
+
try:
|
|
567
|
+
# Load results and return them
|
|
568
|
+
output.seek(0)
|
|
569
|
+
loaded = json.load(output, cls=json_decoder)
|
|
570
|
+
except Exception as e:
|
|
571
|
+
# If there was an error raised during runtime, then propagate
|
|
572
|
+
delim = f'File "{module_path}"'
|
|
573
|
+
exception = stderr
|
|
574
|
+
if delim in exception:
|
|
575
|
+
exception = f"{delim}{exception.split(delim, 1)[1]}"
|
|
576
|
+
if "maco.exceptions.AnalysisAbortedException" in exception:
|
|
577
|
+
# Extractor voluntarily terminated, re-raise exception to be handled by collector
|
|
578
|
+
raise AnalysisAbortedException(
|
|
579
|
+
exception.split("maco.exceptions.AnalysisAbortedException: ")[-1]
|
|
580
|
+
)
|
|
581
|
+
else:
|
|
582
|
+
# print extractor logging at error level
|
|
583
|
+
logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
|
|
584
|
+
raise Exception(exception) from e
|
|
585
|
+
# ensure that extractor logging is available
|
|
586
|
+
logger.info(f"maco extractor stderr:\n{stderr}")
|
|
587
|
+
return loaded
|