maco 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {maco-1.2.0/maco.egg-info → maco-1.2.1}/PKG-INFO +2 -3
  2. {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/complex.py +3 -3
  3. maco-1.2.1/maco/collector.py +137 -0
  4. {maco-1.2.0 → maco-1.2.1}/maco/utils.py +21 -8
  5. {maco-1.2.0/model_setup → maco-1.2.1}/maco/yara.py +4 -4
  6. {maco-1.2.0 → maco-1.2.1/maco.egg-info}/PKG-INFO +2 -3
  7. {maco-1.2.0 → maco-1.2.1}/maco.egg-info/SOURCES.txt +2 -1
  8. {maco-1.2.0 → maco-1.2.1}/maco.egg-info/requires.txt +1 -1
  9. maco-1.2.1/model_setup/maco/collector.py +137 -0
  10. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/utils.py +21 -8
  11. {maco-1.2.0 → maco-1.2.1/model_setup}/maco/yara.py +4 -4
  12. {maco-1.2.0 → maco-1.2.1}/pipelines/publish.yaml +7 -7
  13. maco-1.2.1/pipelines/test.yaml +45 -0
  14. {maco-1.2.0 → maco-1.2.1}/pyproject.toml +5 -10
  15. {maco-1.2.0 → maco-1.2.1}/requirements.txt +1 -1
  16. maco-1.2.1/tests/data/trigger_complex.txt +6 -0
  17. maco-1.2.1/tests/test_demo_extractors.py +60 -0
  18. maco-1.2.1/tox.ini +10 -0
  19. maco-1.2.0/maco/collector.py +0 -131
  20. maco-1.2.0/model_setup/maco/collector.py +0 -131
  21. maco-1.2.0/pipelines/test.yaml +0 -41
  22. maco-1.2.0/setup.py +0 -25
  23. maco-1.2.0/tox.ini +0 -10
  24. {maco-1.2.0 → maco-1.2.1}/.gitignore +0 -0
  25. {maco-1.2.0 → maco-1.2.1}/.vscode/settings.json +0 -0
  26. {maco-1.2.0 → maco-1.2.1}/LICENSE.md +0 -0
  27. {maco-1.2.0 → maco-1.2.1}/README.md +0 -0
  28. {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/__init__.py +0 -0
  29. {maco-1.2.0 → maco-1.2.1}/demo_extractors/complex/complex_utils.py +0 -0
  30. {maco-1.2.0 → maco-1.2.1}/demo_extractors/elfy.py +0 -0
  31. {maco-1.2.0 → maco-1.2.1}/demo_extractors/limit_other.py +0 -0
  32. {maco-1.2.0 → maco-1.2.1}/demo_extractors/nothing.py +0 -0
  33. {maco-1.2.0 → maco-1.2.1}/demo_extractors/shared.py +0 -0
  34. {maco-1.2.0 → maco-1.2.1}/maco/__init__.py +0 -0
  35. {maco-1.2.0 → maco-1.2.1}/maco/base_test.py +0 -0
  36. {maco-1.2.0 → maco-1.2.1}/maco/cli.py +0 -0
  37. {maco-1.2.0 → maco-1.2.1}/maco/extractor.py +0 -0
  38. {maco-1.2.0 → maco-1.2.1}/maco/model/__init__.py +0 -0
  39. {maco-1.2.0 → maco-1.2.1}/maco/model/model.py +0 -0
  40. {maco-1.2.0 → maco-1.2.1}/maco.egg-info/dependency_links.txt +0 -0
  41. {maco-1.2.0 → maco-1.2.1}/maco.egg-info/entry_points.txt +0 -0
  42. {maco-1.2.0 → maco-1.2.1}/maco.egg-info/top_level.txt +0 -0
  43. {maco-1.2.0 → maco-1.2.1}/model_setup/LICENSE.md +0 -0
  44. {maco-1.2.0 → maco-1.2.1}/model_setup/README.md +0 -0
  45. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/__init__.py +0 -0
  46. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/base_test.py +0 -0
  47. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/cli.py +0 -0
  48. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/extractor.py +0 -0
  49. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/model/__init__.py +0 -0
  50. {maco-1.2.0 → maco-1.2.1}/model_setup/maco/model/model.py +0 -0
  51. {maco-1.2.0 → maco-1.2.1}/model_setup/pyproject.toml +0 -0
  52. {maco-1.2.0 → maco-1.2.1}/model_setup/setup.py +0 -0
  53. {maco-1.2.0 → maco-1.2.1}/setup.cfg +0 -0
  54. {maco-1.2.0 → maco-1.2.1}/tests/data/example.txt.cart +0 -0
  55. {maco-1.2.0 → maco-1.2.1}/tests/extractors/__init__.py +0 -0
  56. {maco-1.2.0 → maco-1.2.1}/tests/extractors/basic.py +0 -0
  57. {maco-1.2.0 → maco-1.2.1}/tests/extractors/basic_longer.py +0 -0
  58. {maco-1.2.0 → maco-1.2.1}/tests/extractors/bob/__init__.py +0 -0
  59. {maco-1.2.0 → maco-1.2.1}/tests/extractors/bob/bob.py +0 -0
  60. {maco-1.2.0 → maco-1.2.1}/tests/extractors/test_basic.py +0 -0
  61. {maco-1.2.0 → maco-1.2.1}/tests/pytest.ini +0 -0
  62. {maco-1.2.0 → maco-1.2.1}/tests/requirements.txt +0 -0
  63. {maco-1.2.0 → maco-1.2.1}/tests/test_base_test.py +0 -0
  64. {maco-1.2.0 → maco-1.2.1}/tests/test_detection.py +0 -0
  65. {maco-1.2.0 → maco-1.2.1}/tests/test_extractor.py +0 -0
  66. {maco-1.2.0 → maco-1.2.1}/tests/test_helpers.py +0 -0
  67. {maco-1.2.0 → maco-1.2.1}/tests/test_model.py +0 -0
@@ -1,8 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Author: sl-govau
5
- Author-email:
6
5
  Maintainer: cccs-rs
7
6
  License: MIT License
8
7
 
@@ -35,7 +34,7 @@ Requires-Dist: pydantic>=2.0.0
35
34
  Requires-Dist: tomli>=1.1.0; python_version < "3.11"
36
35
  Requires-Dist: uv
37
36
  Requires-Dist: yara-python
38
- Requires-Dist: yara-x==0.10.0
37
+ Requires-Dist: yara-x==0.11.0
39
38
 
40
39
  # Maco - Malware config extractor framework
41
40
 
@@ -1,9 +1,9 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
 
4
4
  from maco import extractor, model, yara
5
5
 
6
- from . import complex_utils
6
+ from complex import complex_utils
7
7
 
8
8
 
9
9
  class Complex(extractor.Extractor):
@@ -50,7 +50,7 @@ class Complex(extractor.Extractor):
50
50
  other = complex_utils.getdata()["result"]
51
51
  self.logger.debug("got data from lib")
52
52
  # example - accessing yara strings
53
- strings = {y[2].decode("utf8") for x in matches for y in x.strings}
53
+ strings = sorted({z.plaintext().decode("utf8") for x in matches for y in x.strings for z in y.instances})
54
54
  self.logger.debug(f"{strings=}")
55
55
  # construct model of results
56
56
  tmp = model.ExtractorModel(family=self.family)
@@ -0,0 +1,137 @@
1
+ """Convenience functions for discovering your extractors."""
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ from multiprocessing import Manager, Process
7
+ from tempfile import NamedTemporaryFile
8
+ from types import ModuleType
9
+ from typing import Any, BinaryIO, Dict, List, Union
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from maco import extractor, model, utils, yara
14
+
15
+
16
+ class ExtractorLoadError(Exception):
17
+ pass
18
+
19
+
20
+ logger = logging.getLogger("maco.lib.helpers")
21
+
22
+
23
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
24
+ """Enforce types and verify properties, and remove defaults."""
25
+ if not resp:
26
+ return None
27
+ # check the response is valid for its own model
28
+ # this is useful if a restriction on the 'other' dictionary is needed
29
+ resp_model = type(resp)
30
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
31
+ resp = resp_model.model_validate(resp)
32
+ # check the response is valid according to the ExtractorModel
33
+ resp = model.ExtractorModel.model_validate(resp)
34
+ # coerce sets to correct types
35
+ # otherwise we end up with sets where we expect lists
36
+ resp = model.ExtractorModel(**resp.model_dump())
37
+ # dump model to dict
38
+ return resp.model_dump(exclude_defaults=True)
39
+
40
+
41
+ class Collector:
42
+ def __init__(
43
+ self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
44
+ ):
45
+ """Discover and load extractors from file system."""
46
+ path_extractors = os.path.realpath(path_extractors)
47
+ self.path: str = path_extractors
48
+ self.extractors: Dict[str, Dict[str, str]] = {}
49
+
50
+ with Manager() as manager:
51
+ extractors = manager.dict()
52
+ namespaced_rules = manager.dict()
53
+
54
+ def extractor_module_callback(module: ModuleType, venv: str):
55
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
56
+ for member in members:
57
+ name, member = member
58
+ if exclude and name in exclude:
59
+ # Module is part of the exclusion list, skip
60
+ logger.debug(f"exclude excluded '{name}'")
61
+ return
62
+
63
+ if include and name not in include:
64
+ # Module wasn't part of the inclusion list, skip
65
+ logger.debug(f"include excluded '{name}'")
66
+ return
67
+
68
+ # initialise and register
69
+ logger.debug(f"register '{name}'")
70
+ extractors[name] = dict(
71
+ venv=venv,
72
+ module_path=module.__file__,
73
+ module_name=member.__module__,
74
+ extractor_class=member.__name__,
75
+ )
76
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
77
+
78
+ # Find the extractors within the given directory
79
+ # Execute within a child process to ensure main process interpreter is kept clean
80
+ p = Process(
81
+ target=utils.import_extractors,
82
+ args=(
83
+ path_extractors,
84
+ yara.compile(source=utils.MACO_YARA_RULE),
85
+ extractor_module_callback,
86
+ logger,
87
+ create_venv and os.path.isdir(path_extractors),
88
+ ),
89
+ )
90
+ p.start()
91
+ p.join()
92
+
93
+ self.extractors = dict(extractors)
94
+ if not self.extractors:
95
+ raise ExtractorLoadError("no extractors were loaded")
96
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
97
+
98
+ # compile yara rules gathered from extractors
99
+ self.rules = yara.compile(sources=dict(namespaced_rules))
100
+
101
+ def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
102
+ """Return extractors that should run based on yara rules."""
103
+ # execute yara rules on file to find extractors we should run
104
+ # yara can't run on a stream so we give it a bytestring
105
+ matches = self.rules.match(data=stream.read())
106
+ stream.seek(0)
107
+ if not matches:
108
+ return
109
+ # get all rules that hit for each extractor
110
+ runs = {}
111
+ for match in matches:
112
+ runs.setdefault(match.namespace, []).append(match)
113
+
114
+ return runs
115
+
116
+ def extract(
117
+ self,
118
+ stream: BinaryIO,
119
+ matches: List[yara.Match],
120
+ extractor_name: str,
121
+ ) -> Dict[str, Any]:
122
+ """Run extractor with stream and verify output matches the model."""
123
+ extractor = self.extractors[extractor_name]
124
+ try:
125
+ # Run extractor on a copy of the sample
126
+ with NamedTemporaryFile() as sample_path:
127
+ sample_path.write(stream.read())
128
+ sample_path.flush()
129
+ # enforce types and verify properties, and remove defaults
130
+ return _verify_response(utils.run_extractor(sample_path.name, **extractor))
131
+ except Exception:
132
+ # caller can deal with the exception
133
+ raise
134
+ finally:
135
+ # make sure to reset where we are in the file
136
+ # otherwise follow on extractors are going to read 0 bytes
137
+ stream.seek(0)
@@ -6,9 +6,11 @@ import inspect
6
6
  import json
7
7
  import os
8
8
  import re
9
+ import shutil
9
10
  import subprocess
10
11
  import sys
11
12
  import tempfile
13
+
12
14
  from maco import yara
13
15
 
14
16
  if sys.version_info >= (3, 11):
@@ -21,8 +23,8 @@ from copy import deepcopy
21
23
  from glob import glob
22
24
  from logging import Logger
23
25
  from pkgutil import walk_packages
24
- from typing import Callable, Dict, Tuple, List, Set
25
26
  from types import ModuleType
27
+ from typing import Callable, Dict, List, Set, Tuple
26
28
 
27
29
  from maco.extractor import Extractor
28
30
 
@@ -67,7 +69,11 @@ import importlib
67
69
  import json
68
70
  import os
69
71
  import sys
70
- import yara
72
+
73
+ try:
74
+ from maco import yara
75
+ except:
76
+ import yara
71
77
 
72
78
  from base64 import b64encode
73
79
  parent_package_path = "{parent_package_path}"
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
244
250
  logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
245
251
  venvs.append(venv_path)
246
252
 
253
+ # Cleanup any build directories that are the product of package installation
254
+ expected_build_path = os.path.join(dir, "build")
255
+ if os.path.exists(expected_build_path):
256
+ shutil.rmtree(expected_build_path)
257
+
247
258
  # Add directories to our visited list and check the parent of this directory on the next loop
248
259
  visited_dirs.append(dir)
249
260
  dir = os.path.dirname(dir)
@@ -399,21 +410,23 @@ def import_extractors(
399
410
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
400
411
 
401
412
 
402
- def run_in_venv(
413
+ def run_extractor(
403
414
  sample_path,
404
- module,
415
+ module_name,
416
+ extractor_class,
405
417
  module_path,
406
418
  venv,
407
419
  venv_script=VENV_SCRIPT,
408
420
  json_decoder=Base64Decoder,
409
421
  ) -> Dict[str, dict]:
410
422
  # Write temporary script in the same directory as extractor to resolve relative imports
411
- python_exe = os.path.join(venv, "bin", "python")
423
+ python_exe = sys.executable
424
+ if venv:
425
+ # If there is a linked virtual environment, execute within that environment
426
+ python_exe = os.path.join(venv, "bin", "python")
412
427
  dirname = os.path.dirname(module_path)
413
428
  with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
414
429
  with tempfile.NamedTemporaryFile() as output:
415
- module_name = module.__module__
416
- module_class = module.__name__
417
430
  parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
418
431
  root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
419
432
 
@@ -421,7 +434,7 @@ def run_in_venv(
421
434
  venv_script.format(
422
435
  parent_package_path=parent_package_path,
423
436
  module_name=module_name,
424
- module_class=module_class,
437
+ module_class=extractor_class,
425
438
  sample_path=sample_path,
426
439
  output_path=output.name,
427
440
  )
@@ -1,11 +1,11 @@
1
1
  import re
2
- import yara
3
- import yara_x
4
-
5
2
  from collections import namedtuple
6
3
  from itertools import cycle
7
4
  from typing import Dict
8
5
 
6
+ import yara
7
+ import yara_x
8
+
9
9
  RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
10
10
 
11
11
 
@@ -42,7 +42,7 @@ class Match:
42
42
  def __init__(self, rule: yara_x.Rule, file_content: bytes):
43
43
  self.rule = rule.identifier
44
44
  self.namespace = rule.namespace
45
- self.tags = rule.tags if hasattr(rule, "tags") else []
45
+ self.tags = list(rule.tags) or []
46
46
  self.meta = dict()
47
47
  # Ensure metadata doesn't get overwritten
48
48
  for k, v in rule.metadata:
@@ -1,8 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maco
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Author: sl-govau
5
- Author-email:
6
5
  Maintainer: cccs-rs
7
6
  License: MIT License
8
7
 
@@ -35,7 +34,7 @@ Requires-Dist: pydantic>=2.0.0
35
34
  Requires-Dist: tomli>=1.1.0; python_version < "3.11"
36
35
  Requires-Dist: uv
37
36
  Requires-Dist: yara-python
38
- Requires-Dist: yara-x==0.10.0
37
+ Requires-Dist: yara-x==0.11.0
39
38
 
40
39
  # Maco - Malware config extractor framework
41
40
 
@@ -3,7 +3,6 @@ LICENSE.md
3
3
  README.md
4
4
  pyproject.toml
5
5
  requirements.txt
6
- setup.py
7
6
  tox.ini
8
7
  .vscode/settings.json
9
8
  demo_extractors/elfy.py
@@ -47,11 +46,13 @@ pipelines/test.yaml
47
46
  tests/pytest.ini
48
47
  tests/requirements.txt
49
48
  tests/test_base_test.py
49
+ tests/test_demo_extractors.py
50
50
  tests/test_detection.py
51
51
  tests/test_extractor.py
52
52
  tests/test_helpers.py
53
53
  tests/test_model.py
54
54
  tests/data/example.txt.cart
55
+ tests/data/trigger_complex.txt
55
56
  tests/extractors/__init__.py
56
57
  tests/extractors/basic.py
57
58
  tests/extractors/basic_longer.py
@@ -2,7 +2,7 @@ cart
2
2
  pydantic>=2.0.0
3
3
  uv
4
4
  yara-python
5
- yara-x==0.10.0
5
+ yara-x==0.11.0
6
6
 
7
7
  [:python_version < "3.11"]
8
8
  tomli>=1.1.0
@@ -0,0 +1,137 @@
1
+ """Convenience functions for discovering your extractors."""
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ from multiprocessing import Manager, Process
7
+ from tempfile import NamedTemporaryFile
8
+ from types import ModuleType
9
+ from typing import Any, BinaryIO, Dict, List, Union
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from maco import extractor, model, utils, yara
14
+
15
+
16
+ class ExtractorLoadError(Exception):
17
+ pass
18
+
19
+
20
+ logger = logging.getLogger("maco.lib.helpers")
21
+
22
+
23
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
24
+ """Enforce types and verify properties, and remove defaults."""
25
+ if not resp:
26
+ return None
27
+ # check the response is valid for its own model
28
+ # this is useful if a restriction on the 'other' dictionary is needed
29
+ resp_model = type(resp)
30
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
31
+ resp = resp_model.model_validate(resp)
32
+ # check the response is valid according to the ExtractorModel
33
+ resp = model.ExtractorModel.model_validate(resp)
34
+ # coerce sets to correct types
35
+ # otherwise we end up with sets where we expect lists
36
+ resp = model.ExtractorModel(**resp.model_dump())
37
+ # dump model to dict
38
+ return resp.model_dump(exclude_defaults=True)
39
+
40
+
41
+ class Collector:
42
+ def __init__(
43
+ self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
44
+ ):
45
+ """Discover and load extractors from file system."""
46
+ path_extractors = os.path.realpath(path_extractors)
47
+ self.path: str = path_extractors
48
+ self.extractors: Dict[str, Dict[str, str]] = {}
49
+
50
+ with Manager() as manager:
51
+ extractors = manager.dict()
52
+ namespaced_rules = manager.dict()
53
+
54
+ def extractor_module_callback(module: ModuleType, venv: str):
55
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
56
+ for member in members:
57
+ name, member = member
58
+ if exclude and name in exclude:
59
+ # Module is part of the exclusion list, skip
60
+ logger.debug(f"exclude excluded '{name}'")
61
+ return
62
+
63
+ if include and name not in include:
64
+ # Module wasn't part of the inclusion list, skip
65
+ logger.debug(f"include excluded '{name}'")
66
+ return
67
+
68
+ # initialise and register
69
+ logger.debug(f"register '{name}'")
70
+ extractors[name] = dict(
71
+ venv=venv,
72
+ module_path=module.__file__,
73
+ module_name=member.__module__,
74
+ extractor_class=member.__name__,
75
+ )
76
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
77
+
78
+ # Find the extractors within the given directory
79
+ # Execute within a child process to ensure main process interpreter is kept clean
80
+ p = Process(
81
+ target=utils.import_extractors,
82
+ args=(
83
+ path_extractors,
84
+ yara.compile(source=utils.MACO_YARA_RULE),
85
+ extractor_module_callback,
86
+ logger,
87
+ create_venv and os.path.isdir(path_extractors),
88
+ ),
89
+ )
90
+ p.start()
91
+ p.join()
92
+
93
+ self.extractors = dict(extractors)
94
+ if not self.extractors:
95
+ raise ExtractorLoadError("no extractors were loaded")
96
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
97
+
98
+ # compile yara rules gathered from extractors
99
+ self.rules = yara.compile(sources=dict(namespaced_rules))
100
+
101
+ def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
102
+ """Return extractors that should run based on yara rules."""
103
+ # execute yara rules on file to find extractors we should run
104
+ # yara can't run on a stream so we give it a bytestring
105
+ matches = self.rules.match(data=stream.read())
106
+ stream.seek(0)
107
+ if not matches:
108
+ return
109
+ # get all rules that hit for each extractor
110
+ runs = {}
111
+ for match in matches:
112
+ runs.setdefault(match.namespace, []).append(match)
113
+
114
+ return runs
115
+
116
+ def extract(
117
+ self,
118
+ stream: BinaryIO,
119
+ matches: List[yara.Match],
120
+ extractor_name: str,
121
+ ) -> Dict[str, Any]:
122
+ """Run extractor with stream and verify output matches the model."""
123
+ extractor = self.extractors[extractor_name]
124
+ try:
125
+ # Run extractor on a copy of the sample
126
+ with NamedTemporaryFile() as sample_path:
127
+ sample_path.write(stream.read())
128
+ sample_path.flush()
129
+ # enforce types and verify properties, and remove defaults
130
+ return _verify_response(utils.run_extractor(sample_path.name, **extractor))
131
+ except Exception:
132
+ # caller can deal with the exception
133
+ raise
134
+ finally:
135
+ # make sure to reset where we are in the file
136
+ # otherwise follow on extractors are going to read 0 bytes
137
+ stream.seek(0)
@@ -6,9 +6,11 @@ import inspect
6
6
  import json
7
7
  import os
8
8
  import re
9
+ import shutil
9
10
  import subprocess
10
11
  import sys
11
12
  import tempfile
13
+
12
14
  from maco import yara
13
15
 
14
16
  if sys.version_info >= (3, 11):
@@ -21,8 +23,8 @@ from copy import deepcopy
21
23
  from glob import glob
22
24
  from logging import Logger
23
25
  from pkgutil import walk_packages
24
- from typing import Callable, Dict, Tuple, List, Set
25
26
  from types import ModuleType
27
+ from typing import Callable, Dict, List, Set, Tuple
26
28
 
27
29
  from maco.extractor import Extractor
28
30
 
@@ -67,7 +69,11 @@ import importlib
67
69
  import json
68
70
  import os
69
71
  import sys
70
- import yara
72
+
73
+ try:
74
+ from maco import yara
75
+ except:
76
+ import yara
71
77
 
72
78
  from base64 import b64encode
73
79
  parent_package_path = "{parent_package_path}"
@@ -244,6 +250,11 @@ def create_virtual_environments(directories: List[str], python_version: str, log
244
250
  logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}")
245
251
  venvs.append(venv_path)
246
252
 
253
+ # Cleanup any build directories that are the product of package installation
254
+ expected_build_path = os.path.join(dir, "build")
255
+ if os.path.exists(expected_build_path):
256
+ shutil.rmtree(expected_build_path)
257
+
247
258
  # Add directories to our visited list and check the parent of this directory on the next loop
248
259
  visited_dirs.append(dir)
249
260
  dir = os.path.dirname(dir)
@@ -399,21 +410,23 @@ def import_extractors(
399
410
  register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
400
411
 
401
412
 
402
- def run_in_venv(
413
+ def run_extractor(
403
414
  sample_path,
404
- module,
415
+ module_name,
416
+ extractor_class,
405
417
  module_path,
406
418
  venv,
407
419
  venv_script=VENV_SCRIPT,
408
420
  json_decoder=Base64Decoder,
409
421
  ) -> Dict[str, dict]:
410
422
  # Write temporary script in the same directory as extractor to resolve relative imports
411
- python_exe = os.path.join(venv, "bin", "python")
423
+ python_exe = sys.executable
424
+ if venv:
425
+ # If there is a linked virtual environment, execute within that environment
426
+ python_exe = os.path.join(venv, "bin", "python")
412
427
  dirname = os.path.dirname(module_path)
413
428
  with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
414
429
  with tempfile.NamedTemporaryFile() as output:
415
- module_name = module.__module__
416
- module_class = module.__name__
417
430
  parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
418
431
  root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
419
432
 
@@ -421,7 +434,7 @@ def run_in_venv(
421
434
  venv_script.format(
422
435
  parent_package_path=parent_package_path,
423
436
  module_name=module_name,
424
- module_class=module_class,
437
+ module_class=extractor_class,
425
438
  sample_path=sample_path,
426
439
  output_path=output.name,
427
440
  )
@@ -1,11 +1,11 @@
1
1
  import re
2
- import yara
3
- import yara_x
4
-
5
2
  from collections import namedtuple
6
3
  from itertools import cycle
7
4
  from typing import Dict
8
5
 
6
+ import yara
7
+ import yara_x
8
+
9
9
  RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
10
10
 
11
11
 
@@ -42,7 +42,7 @@ class Match:
42
42
  def __init__(self, rule: yara_x.Rule, file_content: bytes):
43
43
  self.rule = rule.identifier
44
44
  self.namespace = rule.namespace
45
- self.tags = rule.tags if hasattr(rule, "tags") else []
45
+ self.tags = list(rule.tags) or []
46
46
  self.meta = dict()
47
47
  # Ensure metadata doesn't get overwritten
48
48
  for k, v in rule.metadata:
@@ -9,7 +9,7 @@ trigger:
9
9
  pr: none
10
10
 
11
11
  pool:
12
- vmImage: "ubuntu-20.04"
12
+ vmImage: "ubuntu-22.04"
13
13
 
14
14
  jobs:
15
15
  - job: test
@@ -20,12 +20,12 @@ jobs:
20
20
  python.version: '3.8'
21
21
  Python39:
22
22
  python.version: '3.9'
23
- # Python310:
24
- # python.version: '3.10'
25
- # Python311:
26
- # python.version: '3.11'
27
- # Python312:
28
- # python.version: '3.12'
23
+ Python310:
24
+ python.version: '3.10'
25
+ Python311:
26
+ python.version: '3.11'
27
+ Python312:
28
+ python.version: '3.12'
29
29
  steps:
30
30
  - task: UsePythonVersion@0
31
31
  displayName: 'Use Python $(python.version)'
@@ -0,0 +1,45 @@
1
+ name: tests
2
+
3
+ trigger: ["*"]
4
+ pr: ["*"]
5
+
6
+ pool:
7
+ vmImage: "ubuntu-22.04"
8
+
9
+ jobs:
10
+ - job: run_test
11
+ strategy:
12
+ matrix:
13
+ Python3_8:
14
+ python.version: "3.8"
15
+ Python3_9:
16
+ python.version: "3.9"
17
+ Python3_10:
18
+ python.version: "3.10"
19
+ Python3_11:
20
+ python.version: "3.11"
21
+ Python3_12:
22
+ python.version: "3.12"
23
+ timeoutInMinutes: 10
24
+
25
+ steps:
26
+ - task: UsePythonVersion@0
27
+ displayName: Set python version
28
+ inputs:
29
+ versionSpec: "$(python.version)"
30
+
31
+ - script: |
32
+ runtests=true
33
+ if [ ! -d "$(pwd)/tests" ]; then
34
+ echo "No tests found"
35
+ runtest=false
36
+ else
37
+ python -m pip install -U tox
38
+ fi
39
+ echo "##vso[task.setvariable variable=runtests;]$runtests"
40
+ displayName: Install tox
41
+
42
+ - script: |
43
+ python -m tox -e py
44
+ displayName: "Run tests"
45
+ condition: and(succeeded(), eq(variables.runtests, true))
@@ -4,19 +4,10 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "maco"
7
- dynamic = ["version"]
8
- dependencies = [
9
- "cart",
10
- "pydantic>=2.0.0",
11
- 'tomli >= 1.1.0 ; python_version < "3.11"',
12
- "uv",
13
- "yara-python",
14
- "yara-x==0.10.0",
15
- ]
7
+ dynamic = ["version", "readme", "dependencies"]
16
8
  requires-python = ">=3.8"
17
9
  authors = [{ name = "sl-govau" }]
18
10
  maintainers = [{ name = "cccs-rs" }]
19
- readme = "README.md"
20
11
  license = { file = "LICENSE.md" }
21
12
 
22
13
  classifiers = [
@@ -43,6 +34,10 @@ Issues = "https://github.com/CybercentreCanada/Maco/issues"
43
34
 
44
35
  [tool.setuptools_scm]
45
36
 
37
+ [tool.setuptools.dynamic]
38
+ readme = { file = ["README.md"], content-type = "text/markdown" }
39
+ dependencies = { file = ["requirements.txt"] }
40
+
46
41
  [tool.setuptools.packages.find]
47
42
  where = ["."]
48
43
  exclude = ["test", "tests", "extractors", "model_setup"]
@@ -3,4 +3,4 @@ pydantic>=2.0.0
3
3
  tomli >= 1.1.0 ; python_version < "3.11"
4
4
  uv
5
5
  yara-python
6
- yara-x==0.10.0
6
+ yara-x==0.11.0
@@ -0,0 +1,6 @@
1
+ file to trigger demo extractors
2
+
3
+ self_trigger
4
+
5
+ Complex
6
+ Paradise
@@ -0,0 +1,60 @@
1
+ import os
2
+ import unittest
3
+
4
+ from maco import cli
5
+ from maco.collector import Collector
6
+
7
+
8
+ class TestDemoExtractors(unittest.TestCase):
9
+ def test_complex(self):
10
+ path_file = os.path.normpath(
11
+ os.path.join(__file__, "../data/trigger_complex.txt")
12
+ )
13
+ collector = Collector(os.path.join(__file__, "../../demo_extractors"))
14
+ self.assertEqual(
15
+ set(collector.extractors.keys()),
16
+ {
17
+ "Elfy",
18
+ "Nothing",
19
+ "Complex",
20
+ "LimitOther",
21
+ },
22
+ )
23
+
24
+ with open(path_file, "rb") as stream:
25
+ ret = cli.process_file(
26
+ collector,
27
+ path_file,
28
+ stream,
29
+ pretty=True,
30
+ force=False,
31
+ include_base64=False,
32
+ )
33
+ self.assertEqual(
34
+ ret,
35
+ {
36
+ "Complex": {
37
+ "family": "complex",
38
+ "version": "5",
39
+ "decoded_strings": sorted(["Paradise", "Complex"]),
40
+ "binaries": [
41
+ {
42
+ "datatype": "payload",
43
+ "encryption": {"algorithm": "something"},
44
+ "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
45
+ "size": 9,
46
+ "hex_sample": "736F6D652064617461",
47
+ }
48
+ ],
49
+ "http": [
50
+ {
51
+ "protocol": "https",
52
+ "hostname": "blarg5.com",
53
+ "path": "/malz/64",
54
+ "usage": "c2",
55
+ }
56
+ ],
57
+ "encryption": [{"algorithm": "sha256"}],
58
+ }
59
+ },
60
+ )
maco-1.2.1/tox.ini ADDED
@@ -0,0 +1,10 @@
1
+ [tox]
2
+ envlist = py38,py39,py310,py311,py312
3
+ [testenv]
4
+ # install testing framework
5
+ deps =
6
+ pytest
7
+ -r requirements.txt
8
+ -r tests/requirements.txt
9
+ # run the tests
10
+ commands = python -m pytest -p no:cacheprovider --durations=10 -ra -q -k "not git and not extractors" -vv -W ignore::DeprecationWarning
@@ -1,131 +0,0 @@
1
- """Convenience functions for discovering your extractors."""
2
-
3
- import inspect
4
- import logging
5
- import os
6
-
7
- from tempfile import NamedTemporaryFile
8
- from typing import Any, BinaryIO, Dict, List
9
- from types import ModuleType
10
-
11
- from maco import yara
12
- from pydantic import BaseModel
13
-
14
- from maco import extractor, model, utils
15
-
16
-
17
- class ExtractorLoadError(Exception):
18
- pass
19
-
20
-
21
- logger = logging.getLogger("maco.lib.helpers")
22
-
23
-
24
- def _verify_response(resp: BaseModel) -> Dict:
25
- """Enforce types and verify properties, and remove defaults."""
26
- # check the response is valid for its own model
27
- # this is useful if a restriction on the 'other' dictionary is needed
28
- resp_model = type(resp)
29
- if resp_model != model.ExtractorModel:
30
- resp = resp_model.model_validate(resp)
31
- # check the response is valid according to the ExtractorModel
32
- resp = model.ExtractorModel.model_validate(resp)
33
- # coerce sets to correct types
34
- # otherwise we end up with sets where we expect lists
35
- resp = model.ExtractorModel(**resp.model_dump())
36
- # dump model to dict
37
- return resp.model_dump(exclude_defaults=True)
38
-
39
-
40
- class Collector:
41
- def __init__(
42
- self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
43
- ):
44
- """Discover and load extractors from file system."""
45
- path_extractors = os.path.realpath(path_extractors)
46
- self.path = path_extractors
47
- self.extractors = {}
48
- namespaced_rules = {}
49
-
50
- def extractor_module_callback(module: ModuleType, venv: str):
51
- members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
52
- for member in members:
53
- name, member = member
54
- if exclude and name in exclude:
55
- # Module is part of the exclusion list, skip
56
- logger.debug(f"exclude excluded '{name}'")
57
- return
58
-
59
- if include and name not in include:
60
- # Module wasn't part of the inclusion list, skip
61
- logger.debug(f"include excluded '{name}'")
62
- return
63
-
64
- # initialise and register
65
- logger.debug(f"register '{name}'")
66
- self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
67
- namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
68
-
69
- # Find the extractors within the given directory
70
- utils.import_extractors(
71
- path_extractors,
72
- yara.compile(source=utils.MACO_YARA_RULE),
73
- extractor_module_callback,
74
- logger,
75
- create_venv and os.path.isdir(path_extractors),
76
- )
77
-
78
- if not self.extractors:
79
- raise ExtractorLoadError("no extractors were loaded")
80
- logger.debug(f"found extractors {list(self.extractors.keys())}\n")
81
-
82
- # compile yara rules gathered from extractors
83
- self.rules = yara.compile(sources=namespaced_rules)
84
-
85
- def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
86
- """Return extractors that should run based on yara rules."""
87
- # execute yara rules on file to find extractors we should run
88
- # yara can't run on a stream so we give it a bytestring
89
- matches = self.rules.match(data=stream.read())
90
- stream.seek(0)
91
- if not matches:
92
- return
93
- # get all rules that hit for each extractor
94
- runs = {}
95
- for match in matches:
96
- runs.setdefault(match.namespace, []).append(match)
97
-
98
- return runs
99
-
100
- def extract(
101
- self,
102
- stream: BinaryIO,
103
- matches: List[yara.Match],
104
- extractor_name: str,
105
- ) -> Dict[str, Any]:
106
- """Run extractor with stream and verify output matches the model."""
107
- extractor = self.extractors[extractor_name]
108
- resp = None
109
- try:
110
- if extractor["venv"]:
111
- # Run extractor within a virtual environment
112
- with NamedTemporaryFile() as sample_path:
113
- sample_path.write(stream.read())
114
- sample_path.flush()
115
- return utils.run_in_venv(sample_path.name, **extractor)
116
- else:
117
- # Run extractor within on host environment
118
- resp = extractor["module"]().run(stream, matches)
119
- except Exception:
120
- # caller can deal with the exception
121
- raise
122
- finally:
123
- # make sure to reset where we are in the file
124
- # otherwise follow on extractors are going to read 0 bytes
125
- stream.seek(0)
126
-
127
- # enforce types and verify properties, and remove defaults
128
- if resp is not None:
129
- resp = _verify_response(resp)
130
-
131
- return resp
@@ -1,131 +0,0 @@
1
- """Convenience functions for discovering your extractors."""
2
-
3
- import inspect
4
- import logging
5
- import os
6
-
7
- from tempfile import NamedTemporaryFile
8
- from typing import Any, BinaryIO, Dict, List
9
- from types import ModuleType
10
-
11
- from maco import yara
12
- from pydantic import BaseModel
13
-
14
- from maco import extractor, model, utils
15
-
16
-
17
- class ExtractorLoadError(Exception):
18
- pass
19
-
20
-
21
- logger = logging.getLogger("maco.lib.helpers")
22
-
23
-
24
- def _verify_response(resp: BaseModel) -> Dict:
25
- """Enforce types and verify properties, and remove defaults."""
26
- # check the response is valid for its own model
27
- # this is useful if a restriction on the 'other' dictionary is needed
28
- resp_model = type(resp)
29
- if resp_model != model.ExtractorModel:
30
- resp = resp_model.model_validate(resp)
31
- # check the response is valid according to the ExtractorModel
32
- resp = model.ExtractorModel.model_validate(resp)
33
- # coerce sets to correct types
34
- # otherwise we end up with sets where we expect lists
35
- resp = model.ExtractorModel(**resp.model_dump())
36
- # dump model to dict
37
- return resp.model_dump(exclude_defaults=True)
38
-
39
-
40
- class Collector:
41
- def __init__(
42
- self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
43
- ):
44
- """Discover and load extractors from file system."""
45
- path_extractors = os.path.realpath(path_extractors)
46
- self.path = path_extractors
47
- self.extractors = {}
48
- namespaced_rules = {}
49
-
50
- def extractor_module_callback(module: ModuleType, venv: str):
51
- members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
52
- for member in members:
53
- name, member = member
54
- if exclude and name in exclude:
55
- # Module is part of the exclusion list, skip
56
- logger.debug(f"exclude excluded '{name}'")
57
- return
58
-
59
- if include and name not in include:
60
- # Module wasn't part of the inclusion list, skip
61
- logger.debug(f"include excluded '{name}'")
62
- return
63
-
64
- # initialise and register
65
- logger.debug(f"register '{name}'")
66
- self.extractors[name] = dict(module=member, venv=venv, module_path=module.__file__)
67
- namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
68
-
69
- # Find the extractors within the given directory
70
- utils.import_extractors(
71
- path_extractors,
72
- yara.compile(source=utils.MACO_YARA_RULE),
73
- extractor_module_callback,
74
- logger,
75
- create_venv and os.path.isdir(path_extractors),
76
- )
77
-
78
- if not self.extractors:
79
- raise ExtractorLoadError("no extractors were loaded")
80
- logger.debug(f"found extractors {list(self.extractors.keys())}\n")
81
-
82
- # compile yara rules gathered from extractors
83
- self.rules = yara.compile(sources=namespaced_rules)
84
-
85
- def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
86
- """Return extractors that should run based on yara rules."""
87
- # execute yara rules on file to find extractors we should run
88
- # yara can't run on a stream so we give it a bytestring
89
- matches = self.rules.match(data=stream.read())
90
- stream.seek(0)
91
- if not matches:
92
- return
93
- # get all rules that hit for each extractor
94
- runs = {}
95
- for match in matches:
96
- runs.setdefault(match.namespace, []).append(match)
97
-
98
- return runs
99
-
100
- def extract(
101
- self,
102
- stream: BinaryIO,
103
- matches: List[yara.Match],
104
- extractor_name: str,
105
- ) -> Dict[str, Any]:
106
- """Run extractor with stream and verify output matches the model."""
107
- extractor = self.extractors[extractor_name]
108
- resp = None
109
- try:
110
- if extractor["venv"]:
111
- # Run extractor within a virtual environment
112
- with NamedTemporaryFile() as sample_path:
113
- sample_path.write(stream.read())
114
- sample_path.flush()
115
- return utils.run_in_venv(sample_path.name, **extractor)
116
- else:
117
- # Run extractor within on host environment
118
- resp = extractor["module"]().run(stream, matches)
119
- except Exception:
120
- # caller can deal with the exception
121
- raise
122
- finally:
123
- # make sure to reset where we are in the file
124
- # otherwise follow on extractors are going to read 0 bytes
125
- stream.seek(0)
126
-
127
- # enforce types and verify properties, and remove defaults
128
- if resp is not None:
129
- resp = _verify_response(resp)
130
-
131
- return resp
@@ -1,41 +0,0 @@
1
- name: tests
2
-
3
- trigger: ["*"]
4
- pr: ["*"]
5
-
6
- pool:
7
- vmImage: "ubuntu-20.04"
8
-
9
- jobs:
10
- - job: run_test
11
- strategy:
12
- matrix:
13
- Python3_8:
14
- python.version: "3.8"
15
- Python3_9:
16
- python.version: "3.9"
17
- Python3_10:
18
- python.version: "3.10"
19
- Python3_11:
20
- python.version: "3.11"
21
- Python3_12:
22
- python.version: "3.12"
23
- timeoutInMinutes: 10
24
-
25
- steps:
26
- - task: UsePythonVersion@0
27
- displayName: Set python version
28
- inputs:
29
- versionSpec: "$(python.version)"
30
- - script: |
31
- [ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
32
- [ -f $(pwd)/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/requirements.txt
33
- [ -f $(pwd)/tests/requirements.txt ] && sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir -r $(pwd)/tests/requirements.txt
34
- sudo rm -rf /tmp/* /var/lib/apt/lists/* ~/.cache/pip
35
-
36
- displayName: Setup environment
37
- - script: |
38
- [ ! -d "$(pwd)/tests" ] && echo "No tests found" && exit
39
- export REPO_NAME=${BUILD_REPOSITORY_NAME##*/}
40
- python -m pytest -p no:cacheprovider --durations=10 -rsx -vv -W ignore::DeprecationWarning
41
- displayName: Test
maco-1.2.0/setup.py DELETED
@@ -1,25 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Setup script."""
3
-
4
- from setuptools import find_packages, setup
5
-
6
- setup(
7
- python_requires=">=3.8",
8
- use_scm_version=True,
9
- setup_requires=["setuptools_scm"],
10
- packages=find_packages(".", exclude=["test", "tests", "extractors"]),
11
- include_package_data=True,
12
- install_requires=[
13
- r.strip() for r in open("requirements.txt", "r") if not r.startswith("#")
14
- ],
15
- name="maco",
16
- description="",
17
- author="",
18
- author_email="",
19
- classifiers=[],
20
- entry_points={
21
- "console_scripts": [
22
- "maco = maco.cli:main",
23
- ],
24
- },
25
- )
maco-1.2.0/tox.ini DELETED
@@ -1,10 +0,0 @@
1
- [tox]
2
- envlist = py38,py39
3
- [testenv]
4
- # install testing framework
5
- deps =
6
- pytest
7
- -r requirements.txt
8
- -r tests/requirements.txt
9
- # run the tests
10
- commands = pytest ./tests
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes