maco 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
 
4
4
  from maco import extractor, model, yara
5
5
 
6
- from . import complex_utils
6
+ from complex import complex_utils
7
7
 
8
8
 
9
9
  class Complex(extractor.Extractor):
@@ -50,7 +50,7 @@ class Complex(extractor.Extractor):
50
50
  other = complex_utils.getdata()["result"]
51
51
  self.logger.debug("got data from lib")
52
52
  # example - accessing yara strings
53
- strings = {y[2].decode("utf8") for x in matches for y in x.strings}
53
+ strings = sorted({z.plaintext().decode("utf8") for x in matches for y in x.strings for z in y.instances})
54
54
  self.logger.debug(f"{strings=}")
55
55
  # construct model of results
56
56
  tmp = model.ExtractorModel(family=self.family)
maco/collector.py CHANGED
@@ -3,15 +3,14 @@
3
3
  import inspect
4
4
  import logging
5
5
  import os
6
-
6
+ from multiprocessing import Manager, Process
7
7
  from tempfile import NamedTemporaryFile
8
- from typing import Any, BinaryIO, Dict, List
9
8
  from types import ModuleType
9
+ from typing import Any, BinaryIO, Dict, List, Union
10
10
 
11
- from maco import yara
12
11
  from pydantic import BaseModel
13
12
 
14
- from maco import extractor, model, utils
13
+ from maco import extractor, model, utils, yara
15
14
 
16
15
 
17
16
  class ExtractorLoadError(Exception):
@@ -21,12 +20,14 @@ class ExtractorLoadError(Exception):
21
20
  logger = logging.getLogger("maco.lib.helpers")
22
21
 
23
22
 
24
- def _verify_response(resp: BaseModel) -> Dict:
23
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
25
24
  """Enforce types and verify properties, and remove defaults."""
25
+ if not resp:
26
+ return None
26
27
  # check the response is valid for its own model
27
28
  # this is useful if a restriction on the 'other' dictionary is needed
28
29
  resp_model = type(resp)
29
- if resp_model != model.ExtractorModel:
30
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
30
31
  resp = resp_model.model_validate(resp)
31
32
  # check the response is valid according to the ExtractorModel
32
33
  resp = model.ExtractorModel.model_validate(resp)
@@ -43,44 +44,59 @@ class Collector:
43
44
  ):
44
45
  """Discover and load extractors from file system."""
45
46
  path_extractors = os.path.realpath(path_extractors)
46
- self.path = path_extractors
47
- self.extractors = {}
48
- namespaced_rules = {}
49
-
50
- def extractor_module_callback(module: ModuleType, venv: str):
51
- members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
52
- for member in members:
53
- name, member = member
54
- if exclude and name in exclude:
55
- # Module is part of the exclusion list, skip
56
- logger.debug(f"exclude excluded '{name}'")
57
- return
58
-
59
- if include and name not in include:
60
- # Module wasn't part of the inclusion list, skip
61
- logger.debug(f"include excluded '{name}'")
62
- return
63
-
64
- # initialise and register
65
- logger.debug(f"register '{name}'")
66
- self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
67
- namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
68
-
69
- # Find the extractors within the given directory
70
- utils.import_extractors(
71
- path_extractors,
72
- yara.compile(source=utils.MACO_YARA_RULE),
73
- extractor_module_callback,
74
- logger,
75
- create_venv and os.path.isdir(path_extractors),
76
- )
77
-
78
- if not self.extractors:
79
- raise ExtractorLoadError("no extractors were loaded")
80
- logger.debug(f"found extractors {list(self.extractors.keys())}\n")
81
-
82
- # compile yara rules gathered from extractors
83
- self.rules = yara.compile(sources=namespaced_rules)
47
+ self.path: str = path_extractors
48
+ self.extractors: Dict[str, Dict[str, str]] = {}
49
+
50
+ with Manager() as manager:
51
+ extractors = manager.dict()
52
+ namespaced_rules = manager.dict()
53
+
54
+ def extractor_module_callback(module: ModuleType, venv: str):
55
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
56
+ for member in members:
57
+ name, member = member
58
+ if exclude and name in exclude:
59
+ # Module is part of the exclusion list, skip
60
+ logger.debug(f"exclude excluded '{name}'")
61
+ return
62
+
63
+ if include and name not in include:
64
+ # Module wasn't part of the inclusion list, skip
65
+ logger.debug(f"include excluded '{name}'")
66
+ return
67
+
68
+ # initialise and register
69
+ logger.debug(f"register '{name}'")
70
+ extractors[name] = dict(
71
+ venv=venv,
72
+ module_path=module.__file__,
73
+ module_name=member.__module__,
74
+ extractor_class=member.__name__,
75
+ )
76
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
77
+
78
+ # Find the extractors within the given directory
79
+ # Execute within a child process to ensure main process interpreter is kept clean
80
+ p = Process(
81
+ target=utils.import_extractors,
82
+ args=(
83
+ path_extractors,
84
+ yara.compile(source=utils.MACO_YARA_RULE),
85
+ extractor_module_callback,
86
+ logger,
87
+ create_venv and os.path.isdir(path_extractors),
88
+ ),
89
+ )
90
+ p.start()
91
+ p.join()
92
+
93
+ self.extractors = dict(extractors)
94
+ if not self.extractors:
95
+ raise ExtractorLoadError("no extractors were loaded")
96
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
97
+
98
+ # compile yara rules gathered from extractors
99
+ self.rules = yara.compile(sources=dict(namespaced_rules))
84
100
 
85
101
  def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
86
102
  """Return extractors that should run based on yara rules."""
@@ -105,17 +121,13 @@ class Collector:
105
121
  ) -> Dict[str, Any]:
106
122
  """Run extractor with stream and verify output matches the model."""
107
123
  extractor = self.extractors[extractor_name]
108
- resp = None
109
124
  try:
110
- if extractor["venv"]:
111
- # Run extractor within a virtual environment
112
- with NamedTemporaryFile() as sample_path:
113
- sample_path.write(stream.read())
114
- sample_path.flush()
115
- return utils.run_in_venv(sample_path.name, **extractor)
116
- else:
117
- # Run extractor within on host environment
118
- resp = extractor["module"]().run(stream, matches)
125
+ # Run extractor on a copy of the sample
126
+ with NamedTemporaryFile() as sample_path:
127
+ sample_path.write(stream.read())
128
+ sample_path.flush()
129
+ # enforce types and verify properties, and remove defaults
130
+ return _verify_response(utils.run_extractor(sample_path.name, **extractor))
119
131
  except Exception:
120
132
  # caller can deal with the exception
121
133
  raise
@@ -123,9 +135,3 @@ class Collector:
123
135
  # make sure to reset where we are in the file
124
136
  # otherwise follow on extractors are going to read 0 bytes
125
137
  stream.seek(0)
126
-
127
- # enforce types and verify properties, and remove defaults
128
- if resp is not None:
129
- resp = _verify_response(resp)
130
-
131
- return resp
maco/utils.py CHANGED
@@ -6,9 +6,11 @@ import inspect
6
6
  import json
7
7
  import os
8
8
  import re
9
+ import shutil
9
10
  import subprocess
10
11
  import sys
11
12
  import tempfile
13
+
12
14
  from maco import yara
13
15
 
14
16
  if sys.version_info >= (3, 11):
@@ -21,8 +23,8 @@ from copy import deepcopy
21
23
  from glob import glob
22
24
  from logging import Logger
23
25
  from pkgutil import walk_packages
24
- from typing import Callable, Dict, Tuple, List, Set
25
26
  from types import ModuleType
27
+ from typing import Callable, Dict, List, Set, Tuple
26
28
 
27
29
  from maco.extractor import Extractor
28
30
 
@@ -67,7 +69,11 @@ import importlib
67
69
  import json
68
70
  import os
69
71
  import sys
70
- import yara
72
+
73
+ try:
74
+ from maco import yara
75
+ except:
76
+ import yara
71
77
 
72
78
  from base64 import b64encode
73
79
  parent_package_path = "{parent_package_path}"
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
244
250
  logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
245
251
  venvs.append(venv_path)
246
252
 
253
+ # Cleanup any build directories that are the product of package installation
254
+ expected_build_path = os.path.join(dir, "build")
255
+ if os.path.exists(expected_build_path):
256
+ shutil.rmtree(expected_build_path)
257
+
247
258
  # Add directories to our visited list and check the parent of this directory on the next loop
248
259
  visited_dirs.append(dir)
249
260
  dir = os.path.dirname(dir)
@@ -399,21 +410,23 @@ def import_extractors(
399
410
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
400
411
 
401
412
 
402
- def run_in_venv(
413
+ def run_extractor(
403
414
  sample_path,
404
- module,
415
+ module_name,
416
+ extractor_class,
405
417
  module_path,
406
418
  venv,
407
419
  venv_script=VENV_SCRIPT,
408
420
  json_decoder=Base64Decoder,
409
421
  ) -> Dict[str, dict]:
410
422
  # Write temporary script in the same directory as extractor to resolve relative imports
411
- python_exe = os.path.join(venv, "bin", "python")
423
+ python_exe = sys.executable
424
+ if venv:
425
+ # If there is a linked virtual environment, execute within that environment
426
+ python_exe = os.path.join(venv, "bin", "python")
412
427
  dirname = os.path.dirname(module_path)
413
428
  with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
414
429
  with tempfile.NamedTemporaryFile() as output:
415
- module_name = module.__module__
416
- module_class = module.__name__
417
430
  parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
418
431
  root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
419
432
 
@@ -421,7 +434,7 @@ def run_in_venv(
421
434
  venv_script.format(
422
435
  parent_package_path=parent_package_path,
423
436
  module_name=module_name,
424
- module_class=module_class,
437
+ module_class=extractor_class,
425
438
  sample_path=sample_path,
426
439
  output_path=output.name,
427
440
  )
maco/yara.py CHANGED
@@ -1,11 +1,11 @@
1
1
  import re
2
- import yara
3
- import yara_x
4
-
5
2
  from collections import namedtuple
6
3
  from itertools import cycle
7
4
  from typing import Dict
8
5
 
6
+ import yara
7
+ import yara_x
8
+
9
9
  RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
10
10
 
11
11
 
@@ -42,7 +42,7 @@ class Match:
42
42
  def __init__(self, rule: yara_x.Rule, file_content: bytes):
43
43
  self.rule = rule.identifier
44
44
  self.namespace = rule.namespace
45
- self.tags = rule.tags if hasattr(rule, "tags") else []
45
+ self.tags = list(rule.tags) or []
46
46
  self.meta = dict()
47
47
  # Ensure metadata doesn't get overwritten
48
48
  for k, v in rule.metadata:
@@ -1,8 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Author: sl-govau
5
- Author-email:
6
5
  Maintainer: cccs-rs
7
6
  License: MIT License
8
7
 
@@ -31,11 +30,11 @@ Requires-Python: >=3.8
31
30
  Description-Content-Type: text/markdown
32
31
  License-File: LICENSE.md
33
32
  Requires-Dist: cart
34
- Requires-Dist: pydantic >=2.0.0
33
+ Requires-Dist: pydantic>=2.0.0
34
+ Requires-Dist: tomli>=1.1.0; python_version < "3.11"
35
35
  Requires-Dist: uv
36
36
  Requires-Dist: yara-python
37
- Requires-Dist: yara-x ==0.10.0
38
- Requires-Dist: tomli >=1.1.0 ; python_version < "3.11"
37
+ Requires-Dist: yara-x==0.11.0
39
38
 
40
39
  # Maco - Malware config extractor framework
41
40
 
@@ -3,38 +3,39 @@ demo_extractors/limit_other.py,sha256=oscnNFkwD9Dm8Lae66GsRaAwUoTWUmkYpJu_wIsJ6s
3
3
  demo_extractors/nothing.py,sha256=3aeQJTY-dakmVXmyfmrRM8YCQVT7q3bq880DFH1Ol_Y,607
4
4
  demo_extractors/shared.py,sha256=Wlvy77SCAR97gxi8uUhGYyjxGmDb-pOSvN8b1rXrVWs,304
5
5
  demo_extractors/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- demo_extractors/complex/complex.py,sha256=U4yVLFTBeW9ORyL7Q4Hu2vzJPSCFgR9u--lIjMl2tZU,2269
6
+ demo_extractors/complex/complex.py,sha256=TkNmR9UUYo1f2JVpJhGasLG-5wHZI05JYxMIXj16GKM,2307
7
7
  demo_extractors/complex/complex_utils.py,sha256=aec8kJsYUrMPo-waihkVLt-0QpiOPkw7dDqfT9MNuHk,123
8
8
  maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  maco/base_test.py,sha256=7uZTprPoFZRnPaO_hLkMGz5rVW9F9TozAUWtNeXouig,2555
10
10
  maco/cli.py,sha256=56xM2qqLNlvQgWBF3Uc1fDbG8_46PSx66Wqj2mMxl1E,7713
11
- maco/collector.py,sha256=Ja8WaoVVr00zMvGFwIJ5TtVV3f5LxMukbGcP13QQnPc,4811
11
+ maco/collector.py,sha256=uBk_RrnJAoiQkBxnNA-B7zIPj9fDm5gkFb4D_VyMWi8,5342
12
12
  maco/extractor.py,sha256=4ZQd8OfvEQYUIkUS3LzZ5tcioembuLhT9_uRVNKSsyM,2750
13
- maco/utils.py,sha256=JyAeuZpz_1Zm-hTUl54fVuI6FAoGOtcQel5zYQlKdXU,17826
14
- maco/yara.py,sha256=9YnN8Q0XAOJSnrQLuvBYZmwq7ROzDMICjlQfq0rU8rg,2946
13
+ maco/utils.py,sha256=lkZ4yb-LjkzjVpyY6INNWKyimYg5k3Y2N_Hr9CdrFbw,18232
14
+ maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
15
15
  maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
16
16
  maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
17
17
  model_setup/maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  model_setup/maco/base_test.py,sha256=7uZTprPoFZRnPaO_hLkMGz5rVW9F9TozAUWtNeXouig,2555
19
19
  model_setup/maco/cli.py,sha256=56xM2qqLNlvQgWBF3Uc1fDbG8_46PSx66Wqj2mMxl1E,7713
20
- model_setup/maco/collector.py,sha256=Ja8WaoVVr00zMvGFwIJ5TtVV3f5LxMukbGcP13QQnPc,4811
20
+ model_setup/maco/collector.py,sha256=uBk_RrnJAoiQkBxnNA-B7zIPj9fDm5gkFb4D_VyMWi8,5342
21
21
  model_setup/maco/extractor.py,sha256=4ZQd8OfvEQYUIkUS3LzZ5tcioembuLhT9_uRVNKSsyM,2750
22
- model_setup/maco/utils.py,sha256=JyAeuZpz_1Zm-hTUl54fVuI6FAoGOtcQel5zYQlKdXU,17826
23
- model_setup/maco/yara.py,sha256=9YnN8Q0XAOJSnrQLuvBYZmwq7ROzDMICjlQfq0rU8rg,2946
22
+ model_setup/maco/utils.py,sha256=lkZ4yb-LjkzjVpyY6INNWKyimYg5k3Y2N_Hr9CdrFbw,18232
23
+ model_setup/maco/yara.py,sha256=vPzCqauVp52ivcTdt8zwrYqDdkLutGlesma9DhKPzHw,2925
24
24
  model_setup/maco/model/__init__.py,sha256=SJrwdn12wklUFm2KoIgWjX_KgvJxCM7Ca9ntXOneuzc,31
25
25
  model_setup/maco/model/model.py,sha256=ngen4ViyLdRo_z_TqZBjw2DN0NrRLpuxOy15-6QmtNw,23536
26
- pipelines/publish.yaml,sha256=_1vDLcsOwDM3mIZCEvhB7UJOvyHvyiOSjg3BmmrYsWM,1483
27
- pipelines/test.yaml,sha256=WNeL3ZSvzYDbTas_4vqzLRwMingo63cUGgU5y-iMsN4,1288
26
+ pipelines/publish.yaml,sha256=xt3WNU-5kIICJgKIiiE94M3dWjS3uEiun-n4OmIssK8,1471
27
+ pipelines/test.yaml,sha256=3KOoo-8SqP_bTAscsz5V3xxnuL91J-62mTjnQD1Btag,1019
28
28
  tests/data/example.txt.cart,sha256=j4ZdDnFNVq7lb-Qi4pY4evOXKQPKG-GSg-n-uEqPhV0,289
29
+ tests/data/trigger_complex.txt,sha256=uqnLSrnyDGCmXwuPmZ2s8vdhH0hJs8DxvyaW_tuYY24,64
29
30
  tests/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
31
  tests/extractors/basic.py,sha256=r5eLCL6Ynr14nCBgtbLvUbm0NdrXizyc9c-4xBCNShU,828
31
32
  tests/extractors/basic_longer.py,sha256=1ClU2QD-Y0TOl_loNFvEqIEpTR5TSVJ6zg9ZmC-ESJo,860
32
33
  tests/extractors/test_basic.py,sha256=FLKekfSGM69HaiF7Vu_7D7KDXHZko-9hZkMO8_DoyYA,697
33
34
  tests/extractors/bob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  tests/extractors/bob/bob.py,sha256=Gy5p8KssJX87cwa9vVv8UBODF_ulbUteZXh15frW2hs,247
35
- maco-1.2.0.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
36
- maco-1.2.0.dist-info/METADATA,sha256=pqS4bzX_eaOjO3XuJKWYrRz49UnF32Xq4sYHKU1hRBc,15629
37
- maco-1.2.0.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
38
- maco-1.2.0.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
39
- maco-1.2.0.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
40
- maco-1.2.0.dist-info/RECORD,,
36
+ maco-1.2.1.dist-info/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
37
+ maco-1.2.1.dist-info/METADATA,sha256=iaJIFA_TcNHwAxT6ysbTLSaRmLib7lF2BC3IBXOoQg4,15610
38
+ maco-1.2.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
39
+ maco-1.2.1.dist-info/entry_points.txt,sha256=TpcwG1gedIg8Y7a9ZOv8aQpuwEUftCefDrAjzeP-o6U,39
40
+ maco-1.2.1.dist-info/top_level.txt,sha256=iMRwuzmrHA3zSwiSeMIl6FWhzRpn_st-I4fAv-kw5_o,49
41
+ maco-1.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.4.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -3,15 +3,14 @@
3
3
  import inspect
4
4
  import logging
5
5
  import os
6
-
6
+ from multiprocessing import Manager, Process
7
7
  from tempfile import NamedTemporaryFile
8
- from typing import Any, BinaryIO, Dict, List
9
8
  from types import ModuleType
9
+ from typing import Any, BinaryIO, Dict, List, Union
10
10
 
11
- from maco import yara
12
11
  from pydantic import BaseModel
13
12
 
14
- from maco import extractor, model, utils
13
+ from maco import extractor, model, utils, yara
15
14
 
16
15
 
17
16
  class ExtractorLoadError(Exception):
@@ -21,12 +20,14 @@ class ExtractorLoadError(Exception):
21
20
  logger = logging.getLogger("maco.lib.helpers")
22
21
 
23
22
 
24
- def _verify_response(resp: BaseModel) -> Dict:
23
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
25
24
  """Enforce types and verify properties, and remove defaults."""
25
+ if not resp:
26
+ return None
26
27
  # check the response is valid for its own model
27
28
  # this is useful if a restriction on the 'other' dictionary is needed
28
29
  resp_model = type(resp)
29
- if resp_model != model.ExtractorModel:
30
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
30
31
  resp = resp_model.model_validate(resp)
31
32
  # check the response is valid according to the ExtractorModel
32
33
  resp = model.ExtractorModel.model_validate(resp)
@@ -43,44 +44,59 @@ class Collector:
43
44
  ):
44
45
  """Discover and load extractors from file system."""
45
46
  path_extractors = os.path.realpath(path_extractors)
46
- self.path = path_extractors
47
- self.extractors = {}
48
- namespaced_rules = {}
49
-
50
- def extractor_module_callback(module: ModuleType, venv: str):
51
- members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
52
- for member in members:
53
- name, member = member
54
- if exclude and name in exclude:
55
- # Module is part of the exclusion list, skip
56
- logger.debug(f"exclude excluded '{name}'")
57
- return
58
-
59
- if include and name not in include:
60
- # Module wasn't part of the inclusion list, skip
61
- logger.debug(f"include excluded '{name}'")
62
- return
63
-
64
- # initialise and register
65
- logger.debug(f"register '{name}'")
66
- self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
67
- namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
68
-
69
- # Find the extractors within the given directory
70
- utils.import_extractors(
71
- path_extractors,
72
- yara.compile(source=utils.MACO_YARA_RULE),
73
- extractor_module_callback,
74
- logger,
75
- create_venv and os.path.isdir(path_extractors),
76
- )
77
-
78
- if not self.extractors:
79
- raise ExtractorLoadError("no extractors were loaded")
80
- logger.debug(f"found extractors {list(self.extractors.keys())}\n")
81
-
82
- # compile yara rules gathered from extractors
83
- self.rules = yara.compile(sources=namespaced_rules)
47
+ self.path: str = path_extractors
48
+ self.extractors: Dict[str, Dict[str, str]] = {}
49
+
50
+ with Manager() as manager:
51
+ extractors = manager.dict()
52
+ namespaced_rules = manager.dict()
53
+
54
+ def extractor_module_callback(module: ModuleType, venv: str):
55
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
56
+ for member in members:
57
+ name, member = member
58
+ if exclude and name in exclude:
59
+ # Module is part of the exclusion list, skip
60
+ logger.debug(f"exclude excluded '{name}'")
61
+ return
62
+
63
+ if include and name not in include:
64
+ # Module wasn't part of the inclusion list, skip
65
+ logger.debug(f"include excluded '{name}'")
66
+ return
67
+
68
+ # initialise and register
69
+ logger.debug(f"register '{name}'")
70
+ extractors[name] = dict(
71
+ venv=venv,
72
+ module_path=module.__file__,
73
+ module_name=member.__module__,
74
+ extractor_class=member.__name__,
75
+ )
76
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
77
+
78
+ # Find the extractors within the given directory
79
+ # Execute within a child process to ensure main process interpreter is kept clean
80
+ p = Process(
81
+ target=utils.import_extractors,
82
+ args=(
83
+ path_extractors,
84
+ yara.compile(source=utils.MACO_YARA_RULE),
85
+ extractor_module_callback,
86
+ logger,
87
+ create_venv and os.path.isdir(path_extractors),
88
+ ),
89
+ )
90
+ p.start()
91
+ p.join()
92
+
93
+ self.extractors = dict(extractors)
94
+ if not self.extractors:
95
+ raise ExtractorLoadError("no extractors were loaded")
96
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
97
+
98
+ # compile yara rules gathered from extractors
99
+ self.rules = yara.compile(sources=dict(namespaced_rules))
84
100
 
85
101
  def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
86
102
  """Return extractors that should run based on yara rules."""
@@ -105,17 +121,13 @@ class Collector:
105
121
  ) -> Dict[str, Any]:
106
122
  """Run extractor with stream and verify output matches the model."""
107
123
  extractor = self.extractors[extractor_name]
108
- resp = None
109
124
  try:
110
- if extractor["venv"]:
111
- # Run extractor within a virtual environment
112
- with NamedTemporaryFile() as sample_path:
113
- sample_path.write(stream.read())
114
- sample_path.flush()
115
- return utils.run_in_venv(sample_path.name, **extractor)
116
- else:
117
- # Run extractor within on host environment
118
- resp = extractor["module"]().run(stream, matches)
125
+ # Run extractor on a copy of the sample
126
+ with NamedTemporaryFile() as sample_path:
127
+ sample_path.write(stream.read())
128
+ sample_path.flush()
129
+ # enforce types and verify properties, and remove defaults
130
+ return _verify_response(utils.run_extractor(sample_path.name, **extractor))
119
131
  except Exception:
120
132
  # caller can deal with the exception
121
133
  raise
@@ -123,9 +135,3 @@ class Collector:
123
135
  # make sure to reset where we are in the file
124
136
  # otherwise follow on extractors are going to read 0 bytes
125
137
  stream.seek(0)
126
-
127
- # enforce types and verify properties, and remove defaults
128
- if resp is not None:
129
- resp = _verify_response(resp)
130
-
131
- return resp
model_setup/maco/utils.py CHANGED
@@ -6,9 +6,11 @@ import inspect
6
6
  import json
7
7
  import os
8
8
  import re
9
+ import shutil
9
10
  import subprocess
10
11
  import sys
11
12
  import tempfile
13
+
12
14
  from maco import yara
13
15
 
14
16
  if sys.version_info >= (3, 11):
@@ -21,8 +23,8 @@ from copy import deepcopy
21
23
  from glob import glob
22
24
  from logging import Logger
23
25
  from pkgutil import walk_packages
24
- from typing import Callable, Dict, Tuple, List, Set
25
26
  from types import ModuleType
27
+ from typing import Callable, Dict, List, Set, Tuple
26
28
 
27
29
  from maco.extractor import Extractor
28
30
 
@@ -67,7 +69,11 @@ import importlib
67
69
  import json
68
70
  import os
69
71
  import sys
70
- import yara
72
+
73
+ try:
74
+ from maco import yara
75
+ except:
76
+ import yara
71
77
 
72
78
  from base64 import b64encode
73
79
  parent_package_path = "{parent_package_path}"
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
244
250
  logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
245
251
  venvs.append(venv_path)
246
252
 
253
+ # Cleanup any build directories that are the product of package installation
254
+ expected_build_path = os.path.join(dir, "build")
255
+ if os.path.exists(expected_build_path):
256
+ shutil.rmtree(expected_build_path)
257
+
247
258
  # Add directories to our visited list and check the parent of this directory on the next loop
248
259
  visited_dirs.append(dir)
249
260
  dir = os.path.dirname(dir)
@@ -399,21 +410,23 @@ def import_extractors(
399
410
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
400
411
 
401
412
 
402
- def run_in_venv(
413
+ def run_extractor(
403
414
  sample_path,
404
- module,
415
+ module_name,
416
+ extractor_class,
405
417
  module_path,
406
418
  venv,
407
419
  venv_script=VENV_SCRIPT,
408
420
  json_decoder=Base64Decoder,
409
421
  ) -> Dict[str, dict]:
410
422
  # Write temporary script in the same directory as extractor to resolve relative imports
411
- python_exe = os.path.join(venv, "bin", "python")
423
+ python_exe = sys.executable
424
+ if venv:
425
+ # If there is a linked virtual environment, execute within that environment
426
+ python_exe = os.path.join(venv, "bin", "python")
412
427
  dirname = os.path.dirname(module_path)
413
428
  with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
414
429
  with tempfile.NamedTemporaryFile() as output:
415
- module_name = module.__module__
416
- module_class = module.__name__
417
430
  parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
418
431
  root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
419
432
 
@@ -421,7 +434,7 @@ def run_in_venv(
421
434
  venv_script.format(
422
435
  parent_package_path=parent_package_path,
423
436
  module_name=module_name,
424
- module_class=module_class,
437
+ module_class=extractor_class,
425
438
  sample_path=sample_path,
426
439
  output_path=output.name,
427
440
  )
model_setup/maco/yara.py CHANGED
@@ -1,11 +1,11 @@
1
1
  import re
2
- import yara
3
- import yara_x
4
-
5
2
  from collections import namedtuple
6
3
  from itertools import cycle
7
4
  from typing import Dict
8
5
 
6
+ import yara
7
+ import yara_x
8
+
9
9
  RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
10
10
 
11
11
 
@@ -42,7 +42,7 @@ class Match:
42
42
  def __init__(self, rule: yara_x.Rule, file_content: bytes):
43
43
  self.rule = rule.identifier
44
44
  self.namespace = rule.namespace
45
- self.tags = rule.tags if hasattr(rule, "tags") else []
45
+ self.tags = list(rule.tags) or []
46
46
  self.meta = dict()
47
47
  # Ensure metadata doesn't get overwritten
48
48
  for k, v in rule.metadata:
pipelines/publish.yaml CHANGED
@@ -9,7 +9,7 @@ trigger:
9
9
  pr: none
10
10
 
11
11
  pool:
12
- vmImage: "ubuntu-20.04"
12
+ vmImage: "ubuntu-22.04"
13
13
 
14
14
  jobs:
15
15
  - job: test
@@ -20,12 +20,12 @@ jobs:
20
20
  python.version: '3.8'
21
21
  Python39:
22
22
  python.version: '3.9'
23
- # Python310:
24
- # python.version: '3.10'
25
- # Python311:
26
- # python.version: '3.11'
27
- # Python312:
28
- # python.version: '3.12'
23
+ Python310:
24
+ python.version: '3.10'
25
+ Python311:
26
+ python.version: '3.11'
27
+ Python312:
28
+ python.version: '3.12'
29
29
  steps:
30
30
  - task: UsePythonVersion@0
31
31
  displayName: 'Use Python $(python.version)'
pipelines/test.yaml CHANGED
@@ -4,7 +4,7 @@ trigger: ["*"]
4
4
  pr: ["*"]
5
5
 
6
6
  pool:
7
- vmImage: "ubuntu-20.04"
7
+ vmImage: "ubuntu-22.04"
8
8
 
9
9
  jobs:
10
10
  - job: run_test
@@ -27,15 +27,19 @@ jobs:
27
27
  displayName: Set python version
28
28
  inputs:
29
29
  versionSpec: "$(python.version)"
30
+
30
31
  - script: |
31
- [ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
32
- [ -f $(pwd)/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/requirements.txt
33
- [ -f $(pwd)/tests/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/tests/requirements.txt
34
- sudo rm -rf /tmp/* /var/lib/apt/lists/* ~/.cache/pip
32
+ runtests=true
33
+ if [ ! -d "$(pwd)/tests" ]; then
34
+ echo "No tests found"
35
+ runtest=false
36
+ else
37
+ python -m pip install -U tox
38
+ fi
39
+ echo "##vso[task.setvariable variable=runtests;]$runtests"
40
+ displayName: Install tox
35
41
 
36
- displayName: Setup environment
37
42
  - script: |
38
- [ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
39
- export REPO_NAME=${BUILD_REPOSITORY_NAME##*/}
40
- python -m pytest -p no:cacheprovider --durations=10 -rsx -vv -W ignore::DeprecationWarning
41
- displayName: Test
43
+ python -m tox -e py
44
+ displayName: "Run tests"
45
+ condition: and(succeeded(), eq(variables.runtests, true))
@@ -0,0 +1,6 @@
1
+ file to trigger demo extractors
2
+
3
+ self_trigger
4
+
5
+ Complex
6
+ Paradise