maco 1.2.14__py3-none-any.whl → 1.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- demo_extractors/complex/complex.py +12 -0
- demo_extractors/complex/complex_utils.py +11 -2
- demo_extractors/elfy.py +12 -0
- demo_extractors/limit_other.py +15 -0
- demo_extractors/nothing.py +11 -3
- demo_extractors/shared.py +6 -0
- demo_extractors/terminator.py +12 -1
- maco/base_test.py +24 -7
- maco/cli.py +19 -5
- maco/collector.py +25 -9
- maco/exceptions.py +31 -1
- maco/extractor.py +7 -8
- maco/model/model.py +34 -0
- maco/utils.py +139 -83
- maco/yara.py +47 -5
- {maco-1.2.14.dist-info → maco-1.2.16.dist-info}/METADATA +3 -3
- maco-1.2.16.dist-info/RECORD +49 -0
- {maco-1.2.14.dist-info → maco-1.2.16.dist-info}/WHEEL +1 -1
- model_setup/maco/base_test.py +24 -7
- model_setup/maco/cli.py +19 -5
- model_setup/maco/collector.py +25 -9
- model_setup/maco/exceptions.py +31 -1
- model_setup/maco/extractor.py +7 -8
- model_setup/maco/model/model.py +34 -0
- model_setup/maco/utils.py +139 -83
- model_setup/maco/yara.py +47 -5
- tests/extractors/basic.py +10 -2
- tests/extractors/basic_longer.py +9 -2
- tests/extractors/bob/bob.py +2 -0
- tests/extractors/import_rewriting/__init__.py +0 -0
- tests/extractors/import_rewriting/importer.py +10341 -0
- tests/extractors/test_basic.py +4 -0
- maco-1.2.14.dist-info/RECORD +0 -47
- {maco-1.2.14.dist-info → maco-1.2.16.dist-info}/entry_points.txt +0 -0
- {maco-1.2.14.dist-info → maco-1.2.16.dist-info/licenses}/LICENSE.md +0 -0
- {maco-1.2.14.dist-info → maco-1.2.16.dist-info}/top_level.txt +0 -0
model_setup/maco/collector.py
CHANGED
|
@@ -13,18 +13,20 @@ from multiprocess import Manager, Process, Queue
|
|
|
13
13
|
from pydantic import BaseModel
|
|
14
14
|
|
|
15
15
|
from maco import extractor, model, utils, yara
|
|
16
|
-
from maco.exceptions import AnalysisAbortedException
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class ExtractorLoadError(Exception):
|
|
20
|
-
pass
|
|
21
|
-
|
|
16
|
+
from maco.exceptions import AnalysisAbortedException, ExtractorLoadError
|
|
22
17
|
|
|
23
18
|
logger = logging.getLogger("maco.lib.helpers")
|
|
24
19
|
|
|
25
20
|
|
|
26
21
|
def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
|
|
27
|
-
"""Enforce types and verify properties, and remove defaults.
|
|
22
|
+
"""Enforce types and verify properties, and remove defaults.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
resp (Union[BaseModel, dict])): results from extractor
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
(Dict): results from extractor after verification
|
|
29
|
+
"""
|
|
28
30
|
if not resp:
|
|
29
31
|
return None
|
|
30
32
|
# check the response is valid for its own model
|
|
@@ -62,6 +64,8 @@ class ExtractorRegistration(TypedDict):
|
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
class Collector:
|
|
67
|
+
"""Discover and load extractors from file system."""
|
|
68
|
+
|
|
65
69
|
def __init__(
|
|
66
70
|
self,
|
|
67
71
|
path_extractors: str,
|
|
@@ -70,7 +74,11 @@ class Collector:
|
|
|
70
74
|
create_venv: bool = False,
|
|
71
75
|
skip_install: bool = False,
|
|
72
76
|
):
|
|
73
|
-
"""Discover and load extractors from file system.
|
|
77
|
+
"""Discover and load extractors from file system.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ExtractorLoadError: when no extractors are found
|
|
81
|
+
"""
|
|
74
82
|
# maco requires the extractor to be imported directly, so ensure they are available on the path
|
|
75
83
|
full_path_extractors = os.path.abspath(path_extractors)
|
|
76
84
|
full_path_above_extractors = os.path.dirname(full_path_extractors)
|
|
@@ -175,7 +183,15 @@ class Collector:
|
|
|
175
183
|
stream: BinaryIO,
|
|
176
184
|
extractor_name: str,
|
|
177
185
|
) -> Dict[str, Any]:
|
|
178
|
-
"""Run extractor with stream and verify output matches the model.
|
|
186
|
+
"""Run extractor with stream and verify output matches the model.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
stream (BinaryIO): Binary stream to analyze
|
|
190
|
+
extractor_name (str): Name of extractor to analyze stream
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
(Dict[str, Any]): Results from extractor
|
|
194
|
+
"""
|
|
179
195
|
extractor = self.extractors[extractor_name]
|
|
180
196
|
try:
|
|
181
197
|
# Run extractor on a copy of the sample
|
model_setup/maco/exceptions.py
CHANGED
|
@@ -1,3 +1,33 @@
|
|
|
1
|
+
"""Exception classes for extractors."""
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
# Can be raised by extractors to abort analysis of a sample
|
|
2
5
|
# ie. Can abort if preliminary checks at start of run indicate the file shouldn't be analyzed by extractor
|
|
3
|
-
class AnalysisAbortedException(Exception):
|
|
6
|
+
class AnalysisAbortedException(Exception):
|
|
7
|
+
"""Raised when extractors voluntarily abort analysis of a sample."""
|
|
8
|
+
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExtractorLoadError(Exception):
|
|
13
|
+
"""Raised when extractors cannot be loaded."""
|
|
14
|
+
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class InvalidExtractor(ValueError):
|
|
19
|
+
"""Raised when an extractor is invalid."""
|
|
20
|
+
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NoHitException(Exception):
|
|
25
|
+
"""Raised when the YARA rule of an extractor doesn't hit."""
|
|
26
|
+
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SyntaxError(Exception):
|
|
31
|
+
"""Raised when there's a syntax error in the YARA rule."""
|
|
32
|
+
|
|
33
|
+
pass
|
model_setup/maco/extractor.py
CHANGED
|
@@ -4,14 +4,8 @@ import logging
|
|
|
4
4
|
import textwrap
|
|
5
5
|
from typing import BinaryIO, List, Optional, Union
|
|
6
6
|
|
|
7
|
-
from maco import yara
|
|
8
|
-
|
|
9
|
-
from . import model
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class InvalidExtractor(ValueError):
|
|
13
|
-
pass
|
|
14
|
-
|
|
7
|
+
from maco import model, yara
|
|
8
|
+
from maco.exceptions import InvalidExtractor
|
|
15
9
|
|
|
16
10
|
DEFAULT_YARA_RULE = """
|
|
17
11
|
rule {name}
|
|
@@ -37,6 +31,11 @@ class Extractor:
|
|
|
37
31
|
logger: logging.Logger = None # logger for use when debugging
|
|
38
32
|
|
|
39
33
|
def __init__(self) -> None:
|
|
34
|
+
"""Initialise the extractor.
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
InvalidExtractor: When the extractor is invalid.
|
|
38
|
+
"""
|
|
40
39
|
self.name = name = type(self).__name__
|
|
41
40
|
self.logger = logging.getLogger(f"maco.extractor.{name}")
|
|
42
41
|
self.logger.debug(f"initialise '{name}'")
|
model_setup/maco/model/model.py
CHANGED
|
@@ -29,6 +29,8 @@ class Encryption(ForbidModel):
|
|
|
29
29
|
"""Encryption usage."""
|
|
30
30
|
|
|
31
31
|
class UsageEnum(str, Enum):
|
|
32
|
+
"""Purpose of the encryption."""
|
|
33
|
+
|
|
32
34
|
config = "config"
|
|
33
35
|
communication = "communication"
|
|
34
36
|
binary = "binary"
|
|
@@ -52,6 +54,8 @@ class Encryption(ForbidModel):
|
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
class CategoryEnum(str, Enum):
|
|
57
|
+
"""Category of the malware."""
|
|
58
|
+
|
|
55
59
|
# Software that shows you extra promotions that you cannot control as you use your PC.
|
|
56
60
|
# You wouldn't see the extra ads if you didn't have adware installed.
|
|
57
61
|
adware = "adware"
|
|
@@ -274,6 +278,8 @@ class ExtractorModel(ForbidModel):
|
|
|
274
278
|
"""Binary data extracted by decoder."""
|
|
275
279
|
|
|
276
280
|
class TypeEnum(str, Enum):
|
|
281
|
+
"""Type of binary data."""
|
|
282
|
+
|
|
277
283
|
payload = "payload" # contained within the original file
|
|
278
284
|
config = "config" # sometimes malware uses json/formatted text for config
|
|
279
285
|
other = "other"
|
|
@@ -289,6 +295,8 @@ class ExtractorModel(ForbidModel):
|
|
|
289
295
|
# convenience for ret.encryption.append(ret.Encryption(*properties))
|
|
290
296
|
# Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
|
|
291
297
|
class Encryption(Encryption):
|
|
298
|
+
"""Encryption usage."""
|
|
299
|
+
|
|
292
300
|
pass
|
|
293
301
|
|
|
294
302
|
encryption: Union[List[Encryption], Encryption, None] = None # encryption information for the binary
|
|
@@ -383,6 +391,18 @@ class ExtractorModel(ForbidModel):
|
|
|
383
391
|
|
|
384
392
|
proxy: List[Proxy] = []
|
|
385
393
|
|
|
394
|
+
class ICMP(ForbidModel):
|
|
395
|
+
"""Usage of ICMP."""
|
|
396
|
+
|
|
397
|
+
type: Optional[int] = None
|
|
398
|
+
code: Optional[int] = None
|
|
399
|
+
header: Optional[str] = None # Some malware uses non-standard header fields
|
|
400
|
+
hostname: Optional[str] = None
|
|
401
|
+
|
|
402
|
+
usage: Optional[ConnUsageEnum] = None
|
|
403
|
+
|
|
404
|
+
icmp: List[ICMP] = []
|
|
405
|
+
|
|
386
406
|
#
|
|
387
407
|
# inter process communication (IPC)
|
|
388
408
|
#
|
|
@@ -436,6 +456,8 @@ class ExtractorModel(ForbidModel):
|
|
|
436
456
|
"""Direct usage of DNS."""
|
|
437
457
|
|
|
438
458
|
class RecordTypeEnum(str, Enum):
|
|
459
|
+
"""DNS record types."""
|
|
460
|
+
|
|
439
461
|
A = "A"
|
|
440
462
|
AAAA = "AAAA"
|
|
441
463
|
AFSDB = "AFSDB"
|
|
@@ -512,6 +534,8 @@ class ExtractorModel(ForbidModel):
|
|
|
512
534
|
# convenience for ret.encryption.append(ret.Encryption(*properties))
|
|
513
535
|
# Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
|
|
514
536
|
class Encryption(Encryption):
|
|
537
|
+
"""Encryption usage."""
|
|
538
|
+
|
|
515
539
|
pass
|
|
516
540
|
|
|
517
541
|
encryption: List[Encryption] = []
|
|
@@ -530,6 +554,8 @@ class ExtractorModel(ForbidModel):
|
|
|
530
554
|
"""Cryptocoin usage (ransomware/miner)."""
|
|
531
555
|
|
|
532
556
|
class UsageEnum(str, Enum):
|
|
557
|
+
"""Cryptocoin usage."""
|
|
558
|
+
|
|
533
559
|
ransomware = "ransomware" # request money to unlock
|
|
534
560
|
miner = "miner" # use gpu/cpu to mint coins
|
|
535
561
|
other = "other"
|
|
@@ -543,7 +569,11 @@ class ExtractorModel(ForbidModel):
|
|
|
543
569
|
cryptocurrency: List[Cryptocurrency] = []
|
|
544
570
|
|
|
545
571
|
class Path(ForbidModel):
|
|
572
|
+
"""Path used by malware."""
|
|
573
|
+
|
|
546
574
|
class UsageEnum(str, Enum):
|
|
575
|
+
"""Purpose of the path."""
|
|
576
|
+
|
|
547
577
|
c2 = "c2" # file/folder issues commands to malware
|
|
548
578
|
config = "config" # config is loaded from this path
|
|
549
579
|
install = "install" # install directory/filename for malware
|
|
@@ -559,7 +589,11 @@ class ExtractorModel(ForbidModel):
|
|
|
559
589
|
paths: List[Path] = [] # files/directories used by malware
|
|
560
590
|
|
|
561
591
|
class Registry(ForbidModel):
|
|
592
|
+
"""Registry usage by malware."""
|
|
593
|
+
|
|
562
594
|
class UsageEnum(str, Enum):
|
|
595
|
+
"""Registry usage."""
|
|
596
|
+
|
|
563
597
|
persistence = "persistence" # stay alive
|
|
564
598
|
store_data = "store_data" # generated encryption keys or config
|
|
565
599
|
store_payload = "store_payload" # malware hidden in registry key
|
model_setup/maco/utils.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
"""Common utilities shared between the MACO collector and configextractor-py."""
|
|
2
|
+
|
|
2
3
|
import importlib
|
|
3
|
-
import importlib.machinery
|
|
4
|
-
import importlib.util
|
|
5
4
|
import inspect
|
|
6
5
|
import json
|
|
7
6
|
import logging
|
|
@@ -12,8 +11,9 @@ import shutil
|
|
|
12
11
|
import subprocess
|
|
13
12
|
import sys
|
|
14
13
|
import tempfile
|
|
14
|
+
from importlib.machinery import SourceFileLoader
|
|
15
15
|
|
|
16
|
-
from multiprocess import Queue
|
|
16
|
+
from multiprocess import Process, Queue
|
|
17
17
|
|
|
18
18
|
from maco import yara
|
|
19
19
|
|
|
@@ -26,15 +26,14 @@ from base64 import b64decode
|
|
|
26
26
|
from copy import deepcopy
|
|
27
27
|
from glob import glob
|
|
28
28
|
from logging import Logger
|
|
29
|
-
from pkgutil import walk_packages
|
|
30
29
|
from types import ModuleType
|
|
31
|
-
from typing import Callable, Dict, List,
|
|
30
|
+
from typing import Callable, Dict, List, Tuple, Union
|
|
32
31
|
|
|
33
32
|
from uv import find_uv_bin
|
|
34
33
|
|
|
35
34
|
from maco import model
|
|
36
|
-
from maco.extractor import Extractor
|
|
37
35
|
from maco.exceptions import AnalysisAbortedException
|
|
36
|
+
from maco.extractor import Extractor
|
|
38
37
|
|
|
39
38
|
logger = logging.getLogger("maco.lib.utils")
|
|
40
39
|
|
|
@@ -50,10 +49,14 @@ VENV_CREATE_CMD = f"{UV_BIN} venv"
|
|
|
50
49
|
|
|
51
50
|
|
|
52
51
|
class Base64Decoder(json.JSONDecoder):
|
|
52
|
+
"""JSON decoder that also base64 encodes binary data."""
|
|
53
|
+
|
|
53
54
|
def __init__(self, *args, **kwargs):
|
|
55
|
+
"""Initialize the decoder."""
|
|
54
56
|
json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
|
|
55
57
|
|
|
56
58
|
def object_hook(self, obj):
|
|
59
|
+
"""Hook to decode base64 encoded binary data.""" # noqa: DOC201
|
|
57
60
|
if "__class__" not in obj:
|
|
58
61
|
return obj
|
|
59
62
|
type = obj["__class__"]
|
|
@@ -131,17 +134,38 @@ rule MACO {
|
|
|
131
134
|
|
|
132
135
|
|
|
133
136
|
def maco_extractor_validation(module: ModuleType) -> bool:
|
|
137
|
+
"""Validation function for extractors.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
(bool): True if extractor belongs to MACO, False otherwise.
|
|
141
|
+
"""
|
|
134
142
|
if inspect.isclass(module):
|
|
135
143
|
# 'author' has to be implemented otherwise will raise an exception according to MACO
|
|
136
144
|
return hasattr(module, "author") and module.author
|
|
137
145
|
return False
|
|
138
146
|
|
|
139
147
|
|
|
140
|
-
def maco_extract_rules(module: Extractor) ->
|
|
148
|
+
def maco_extract_rules(module: Extractor) -> str:
|
|
149
|
+
"""Extracts YARA rules from extractor.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
(str): YARA rules
|
|
153
|
+
"""
|
|
141
154
|
return module.yara_rule
|
|
142
155
|
|
|
143
156
|
|
|
144
157
|
def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger) -> Tuple[List[str], List[str]]:
|
|
158
|
+
"""Looks for extractors using YARA rules.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
root_directory (str): Root directory containing extractors
|
|
162
|
+
scanner (yara.Rules): Scanner to look for extractors using YARA rules
|
|
163
|
+
logger (Logger): Logger to use
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple[List[str], List[str]]: Returns a list of extractor directories and extractor files
|
|
167
|
+
|
|
168
|
+
"""
|
|
145
169
|
extractor_dirs = set([root_directory])
|
|
146
170
|
extractor_files = []
|
|
147
171
|
|
|
@@ -177,17 +201,22 @@ def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger
|
|
|
177
201
|
with open(path, "rb") as f:
|
|
178
202
|
data = f.read()
|
|
179
203
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
204
|
+
# Replace any relative importing with absolute
|
|
205
|
+
changed_imports = False
|
|
206
|
+
curr_dir = os.path.dirname(path)
|
|
207
|
+
split = curr_dir.split("/")[::-1]
|
|
208
|
+
for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
|
|
209
|
+
for match in pattern.findall(data):
|
|
210
|
+
depth = match.count(b".")
|
|
211
|
+
abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
|
|
212
|
+
abspath += "." if pattern == RELATIVE_FROM_RE else ""
|
|
213
|
+
data = data.replace(f"from {match.decode()}".encode(), f"from {abspath}".encode(), 1)
|
|
214
|
+
changed_imports = True
|
|
215
|
+
|
|
216
|
+
# only write extractor files if imports were changed
|
|
217
|
+
if changed_imports:
|
|
218
|
+
with open(path, "wb") as f:
|
|
219
|
+
f.write(data)
|
|
191
220
|
|
|
192
221
|
if scanner.match(path):
|
|
193
222
|
# Add directory to list of hits for venv creation
|
|
@@ -282,7 +311,16 @@ def _install_required_packages(create_venv: bool, directories: List[str], python
|
|
|
282
311
|
return venvs
|
|
283
312
|
|
|
284
313
|
|
|
285
|
-
def find_and_insert_venv(path: str, venvs: List[str]):
|
|
314
|
+
def find_and_insert_venv(path: str, venvs: List[str]) -> Tuple[str, str]:
|
|
315
|
+
"""Finds the closest virtual environment to the extractor and inserts it into the PATH.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
path (str): Path of extractor
|
|
319
|
+
venvs (List[str]): List of virtual environments
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
(Tuple[str, str]): Virtual environment and site-packages path that's closest to the extractor
|
|
323
|
+
"""
|
|
286
324
|
venv = None
|
|
287
325
|
for venv in sorted(venvs, reverse=True):
|
|
288
326
|
venv_parent = os.path.dirname(venv)
|
|
@@ -303,14 +341,53 @@ def find_and_insert_venv(path: str, venvs: List[str]):
|
|
|
303
341
|
return venv, site_package
|
|
304
342
|
|
|
305
343
|
|
|
344
|
+
def register_extractor_module(
|
|
345
|
+
extractor_source_file: str,
|
|
346
|
+
module_name: str,
|
|
347
|
+
venvs: List[str],
|
|
348
|
+
extractor_module_callback: Callable[[ModuleType, str], None],
|
|
349
|
+
logger: Logger,
|
|
350
|
+
):
|
|
351
|
+
"""Register the extractor module in isolation.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
extractor_source_file (str): Path to source file of extractor
|
|
355
|
+
module_name (str): The name of the module relative to the package directory
|
|
356
|
+
venvs (List[str]): List of virtual environments
|
|
357
|
+
extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
|
|
358
|
+
logger (Logger): Logger to use
|
|
359
|
+
|
|
360
|
+
"""
|
|
361
|
+
try:
|
|
362
|
+
logger.info(f"Inspecting '{extractor_source_file}' for extractors..")
|
|
363
|
+
venv, site_packages = find_and_insert_venv(extractor_source_file, venvs)
|
|
364
|
+
loader = SourceFileLoader(
|
|
365
|
+
module_name,
|
|
366
|
+
extractor_source_file,
|
|
367
|
+
)
|
|
368
|
+
extractor_module_callback(loader.load_module(), venv)
|
|
369
|
+
finally:
|
|
370
|
+
# Cleanup virtual environment that was loaded into PATH
|
|
371
|
+
if venv and site_packages in sys.path:
|
|
372
|
+
sys.path.remove(site_packages)
|
|
373
|
+
|
|
374
|
+
|
|
306
375
|
def register_extractors(
|
|
307
376
|
current_directory: str,
|
|
308
377
|
venvs: List[str],
|
|
309
378
|
extractor_files: List[str],
|
|
310
379
|
extractor_module_callback: Callable[[ModuleType, str], None],
|
|
311
380
|
logger: Logger,
|
|
312
|
-
default_loaded_modules: Set[str] = set(sys.modules.keys()),
|
|
313
381
|
):
|
|
382
|
+
"""Register extractors with in the current directory.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
current_directory (str): Current directory to register extractors found
|
|
386
|
+
venvs (List[str]): List of virtual environments
|
|
387
|
+
extractor_files (List[str]): List of extractor files found
|
|
388
|
+
extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
|
|
389
|
+
logger (Logger): Logger to use
|
|
390
|
+
"""
|
|
314
391
|
package_name = os.path.basename(current_directory)
|
|
315
392
|
parent_directory = os.path.dirname(current_directory)
|
|
316
393
|
if venvs and package_name in sys.modules:
|
|
@@ -325,74 +402,25 @@ def register_extractors(
|
|
|
325
402
|
sys.path.insert(1, current_directory)
|
|
326
403
|
sys.path.insert(1, parent_directory)
|
|
327
404
|
|
|
328
|
-
#
|
|
329
|
-
|
|
330
|
-
|
|
405
|
+
# Load the potential extractors directly from the source file
|
|
406
|
+
registration_processes = []
|
|
407
|
+
for extractor_source_file in extractor_files:
|
|
408
|
+
module_name = extractor_source_file.replace(f"{parent_directory}/", "").replace("/", ".")[:-3]
|
|
409
|
+
p = Process(
|
|
410
|
+
target=register_extractor_module,
|
|
411
|
+
args=(extractor_source_file, module_name, venvs, extractor_module_callback, logger),
|
|
412
|
+
)
|
|
413
|
+
p.start()
|
|
414
|
+
registration_processes.append(p)
|
|
331
415
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
if ispkg:
|
|
335
|
-
# Skip packages
|
|
336
|
-
continue
|
|
416
|
+
for p in registration_processes:
|
|
417
|
+
p.join()
|
|
337
418
|
|
|
338
|
-
module_path = os.path.realpath(os.path.join(module_path.path, module_name.rsplit(".", 1)[1]) + ".py")
|
|
339
|
-
if module_path in extractor_files:
|
|
340
|
-
# Cross this extractor off the list of extractors to find
|
|
341
|
-
logger.debug(f"Inspecting '{module_name}' for extractors..")
|
|
342
|
-
extractor_files.remove(module_path)
|
|
343
|
-
try:
|
|
344
|
-
# This is an extractor we've been looking for, load the module and invoke callback
|
|
345
|
-
venv, site_packages = find_and_insert_venv(module_path, venvs)
|
|
346
|
-
module = importlib.import_module(module_name)
|
|
347
|
-
module.__file__ = os.path.realpath(module.__file__)
|
|
348
|
-
|
|
349
|
-
# Patch the original directory information into the module
|
|
350
|
-
original_package_name = os.path.basename(current_directory)
|
|
351
|
-
module.__name__ = module.__name__.replace(package_name, original_package_name)
|
|
352
|
-
module.__package__ = module.__package__.replace(package_name, original_package_name)
|
|
353
|
-
extractor_module_callback(module, venv)
|
|
354
|
-
finally:
|
|
355
|
-
# Cleanup virtual environment that was loaded into PATH
|
|
356
|
-
if venv and site_packages in sys.path:
|
|
357
|
-
sys.path.remove(site_packages)
|
|
358
|
-
|
|
359
|
-
if not extractor_files:
|
|
360
|
-
return
|
|
361
419
|
finally:
|
|
362
420
|
# Cleanup changes made to PATH
|
|
363
421
|
sys.path.remove(parent_directory)
|
|
364
422
|
sys.path.remove(current_directory)
|
|
365
423
|
|
|
366
|
-
if package_venv and package_site_packages in sys.path:
|
|
367
|
-
sys.path.remove(package_site_packages)
|
|
368
|
-
|
|
369
|
-
# Remove any modules that were loaded to deconflict with later modules loads
|
|
370
|
-
[sys.modules.pop(k) for k in set(sys.modules.keys()) - default_loaded_modules]
|
|
371
|
-
|
|
372
|
-
# If there still exists extractor files we haven't found yet, try searching in the available subdirectories
|
|
373
|
-
if extractor_files:
|
|
374
|
-
for dir in os.listdir(current_directory):
|
|
375
|
-
path = os.path.join(current_directory, dir)
|
|
376
|
-
if dir == "__pycache__":
|
|
377
|
-
# Ignore the cache created
|
|
378
|
-
continue
|
|
379
|
-
elif dir.endswith(".egg-info"):
|
|
380
|
-
# Ignore these directories
|
|
381
|
-
continue
|
|
382
|
-
elif dir.startswith("."):
|
|
383
|
-
# Ignore hidden directories
|
|
384
|
-
continue
|
|
385
|
-
|
|
386
|
-
if os.path.isdir(path):
|
|
387
|
-
# Check subdirectory to find the rest of the detected extractors
|
|
388
|
-
register_extractors(
|
|
389
|
-
path, venvs, extractor_files, extractor_module_callback, logger, default_loaded_modules
|
|
390
|
-
)
|
|
391
|
-
|
|
392
|
-
if not extractor_files:
|
|
393
|
-
# We were able to find all the extractor files
|
|
394
|
-
break
|
|
395
|
-
|
|
396
424
|
|
|
397
425
|
def proxy_logging(queue: Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
|
|
398
426
|
"""Ensures logging is set up correctly for a child process and then executes the callback."""
|
|
@@ -413,6 +441,17 @@ def import_extractors(
|
|
|
413
441
|
python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
414
442
|
skip_install: bool = False,
|
|
415
443
|
):
|
|
444
|
+
"""Import extractors in a given directory.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
extractor_module_callback (Callable[[ModuleType, str], bool]): Callback used to register extractors
|
|
448
|
+
root_directory (str): Root directory to look for extractors
|
|
449
|
+
scanner (yara.Rules): Scanner to look for extractors that match YARA rule
|
|
450
|
+
create_venv (bool): Create/Use virtual environments
|
|
451
|
+
logger (Logger): Logger to use
|
|
452
|
+
python_version (str): Version of python to use when creating virtual environments
|
|
453
|
+
skip_install (bool): Skip installation of Python dependencies for extractors
|
|
454
|
+
"""
|
|
416
455
|
extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
|
|
417
456
|
|
|
418
457
|
logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
|
|
@@ -448,7 +487,24 @@ def run_extractor(
|
|
|
448
487
|
venv_script=VENV_SCRIPT,
|
|
449
488
|
json_decoder=Base64Decoder,
|
|
450
489
|
) -> Union[Dict[str, dict], model.ExtractorModel]:
|
|
451
|
-
"""Runs the maco extractor against sample either in current process or child process.
|
|
490
|
+
"""Runs the maco extractor against sample either in current process or child process.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
sample_path (str): Path to sample
|
|
494
|
+
module_name (str): Name of extractor module
|
|
495
|
+
extractor_class (str): Name of extractor class in module
|
|
496
|
+
module_path (str): Path to Python module containing extractor
|
|
497
|
+
venv (str): Path to virtual environment associated to extractor
|
|
498
|
+
venv_script (str): Script to run extractor in a virtual environment
|
|
499
|
+
json_decoder (Base64Decoder): Decoder used for JSON
|
|
500
|
+
|
|
501
|
+
Raises:
|
|
502
|
+
AnalysisAbortedException: Raised when extractor voluntarily terminates execution
|
|
503
|
+
Exception: Raised when extractor raises an exception
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Union[Dict[str, dict], model.ExtractorModel]: Results from extractor
|
|
507
|
+
"""
|
|
452
508
|
if not venv:
|
|
453
509
|
key = f"{module_name}_{extractor_class}"
|
|
454
510
|
if key not in _loaded_extractors:
|
model_setup/maco/yara.py
CHANGED
|
@@ -1,25 +1,34 @@
|
|
|
1
|
+
"""yara-python facade that uses yara-x."""
|
|
2
|
+
|
|
1
3
|
import re
|
|
2
4
|
from collections import namedtuple
|
|
3
5
|
from itertools import cycle
|
|
4
|
-
from typing import Dict
|
|
6
|
+
from typing import Dict, List
|
|
5
7
|
|
|
6
8
|
import yara_x
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
+
from maco.exceptions import SyntaxError
|
|
10
11
|
|
|
11
|
-
|
|
12
|
+
RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
# Create interfaces that resembles yara-python (but is running yara-x under the hood)
|
|
15
16
|
class StringMatchInstance:
|
|
17
|
+
"""Instance of a string match."""
|
|
18
|
+
|
|
16
19
|
def __init__(self, match: yara_x.Match, file_content: bytes):
|
|
20
|
+
"""Initializes StringMatchInstance."""
|
|
17
21
|
self.matched_data = file_content[match.offset : match.offset + match.length]
|
|
18
22
|
self.matched_length = match.length
|
|
19
23
|
self.offset = match.offset
|
|
20
24
|
self.xor_key = match.xor_key
|
|
21
25
|
|
|
22
26
|
def plaintext(self) -> bytes:
|
|
27
|
+
"""Plaintext of the matched data.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
(bytes): Plaintext of the matched cipher text
|
|
31
|
+
"""
|
|
23
32
|
if not self.xor_key:
|
|
24
33
|
# No need to XOR the matched data
|
|
25
34
|
return self.matched_data
|
|
@@ -28,17 +37,28 @@ class StringMatchInstance:
|
|
|
28
37
|
|
|
29
38
|
|
|
30
39
|
class StringMatch:
|
|
40
|
+
"""String match."""
|
|
41
|
+
|
|
31
42
|
def __init__(self, pattern: yara_x.Pattern, file_content: bytes):
|
|
43
|
+
"""Initializes StringMatch."""
|
|
32
44
|
self.identifier = pattern.identifier
|
|
33
45
|
self.instances = [StringMatchInstance(match, file_content) for match in pattern.matches]
|
|
34
46
|
self._is_xor = any([match.xor_key for match in pattern.matches])
|
|
35
47
|
|
|
36
48
|
def is_xor(self):
|
|
49
|
+
"""Checks if string match is xor'd.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
(bool): True if match is xor'd
|
|
53
|
+
"""
|
|
37
54
|
return self._is_xor
|
|
38
55
|
|
|
39
56
|
|
|
40
57
|
class Match:
|
|
58
|
+
"""Match."""
|
|
59
|
+
|
|
41
60
|
def __init__(self, rule: yara_x.Rule, file_content: bytes):
|
|
61
|
+
"""Initializes Match."""
|
|
42
62
|
self.rule = rule.identifier
|
|
43
63
|
self.namespace = rule.namespace
|
|
44
64
|
self.tags = list(rule.tags) or []
|
|
@@ -50,7 +70,14 @@ class Match:
|
|
|
50
70
|
|
|
51
71
|
|
|
52
72
|
class Rules:
|
|
73
|
+
"""Rules."""
|
|
74
|
+
|
|
53
75
|
def __init__(self, source: str = None, sources: Dict[str, str] = None):
|
|
76
|
+
"""Initializes Rules.
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
SyntaxError: Raised when there's a syntax error in the YARA rule.
|
|
80
|
+
"""
|
|
54
81
|
Rule = namedtuple("Rule", "identifier namespace is_global")
|
|
55
82
|
if source:
|
|
56
83
|
sources = {"default": source}
|
|
@@ -69,10 +96,20 @@ class Rules:
|
|
|
69
96
|
raise SyntaxError(e)
|
|
70
97
|
|
|
71
98
|
def __iter__(self):
|
|
99
|
+
"""Iterate over rules.
|
|
100
|
+
|
|
101
|
+
Yields:
|
|
102
|
+
YARA rules
|
|
103
|
+
"""
|
|
72
104
|
for rule in self._rules:
|
|
73
105
|
yield rule
|
|
74
106
|
|
|
75
|
-
def match(self, filepath: str = None, data: bytes = None):
|
|
107
|
+
def match(self, filepath: str = None, data: bytes = None) -> List[Match]:
|
|
108
|
+
"""Performs a scan to check for YARA rules matches based on the file, either given by path or buffer.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
(List[Match]): A list of YARA matches.
|
|
112
|
+
"""
|
|
76
113
|
if filepath:
|
|
77
114
|
with open(filepath, "rb") as fp:
|
|
78
115
|
data = fp.read()
|
|
@@ -81,4 +118,9 @@ class Rules:
|
|
|
81
118
|
|
|
82
119
|
|
|
83
120
|
def compile(source: str = None, sources: Dict[str, str] = None) -> Rules:
|
|
121
|
+
"""Compiles YARA rules from source or from sources.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
(Rules): a Rules object
|
|
125
|
+
"""
|
|
84
126
|
return Rules(source, sources)
|