maco 1.1.14__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. maco-1.2.1/PKG-INFO +274 -0
  2. {maco-1.1.14 → maco-1.2.1}/demo_extractors/complex/complex.py +5 -9
  3. {maco-1.1.14 → maco-1.2.1}/demo_extractors/elfy.py +2 -6
  4. {maco-1.1.14 → maco-1.2.1}/demo_extractors/limit_other.py +2 -6
  5. {maco-1.1.14 → maco-1.2.1}/demo_extractors/nothing.py +2 -6
  6. maco-1.2.1/maco/collector.py +137 -0
  7. {maco-1.1.14 → maco-1.2.1}/maco/extractor.py +4 -3
  8. maco-1.2.1/maco/utils.py +466 -0
  9. maco-1.2.1/maco/yara.py +85 -0
  10. maco-1.2.1/maco.egg-info/PKG-INFO +274 -0
  11. {maco-1.1.14 → maco-1.2.1}/maco.egg-info/SOURCES.txt +16 -2
  12. {maco-1.1.14 → maco-1.2.1}/maco.egg-info/requires.txt +2 -0
  13. maco-1.2.1/maco.egg-info/top_level.txt +6 -0
  14. maco-1.2.1/model_setup/LICENSE.md +11 -0
  15. maco-1.2.1/model_setup/README.md +236 -0
  16. maco-1.2.1/model_setup/maco/base_test.py +75 -0
  17. maco-1.2.1/model_setup/maco/cli.py +229 -0
  18. maco-1.2.1/model_setup/maco/collector.py +137 -0
  19. maco-1.2.1/model_setup/maco/extractor.py +71 -0
  20. maco-1.2.1/model_setup/maco/model/__init__.py +1 -0
  21. maco-1.2.1/model_setup/maco/model/model.py +538 -0
  22. maco-1.2.1/model_setup/maco/utils.py +466 -0
  23. maco-1.2.1/model_setup/maco/yara.py +85 -0
  24. maco-1.2.1/model_setup/pyproject.toml +39 -0
  25. {maco-1.1.14 → maco-1.2.1}/pipelines/publish.yaml +7 -7
  26. maco-1.2.1/pipelines/test.yaml +45 -0
  27. maco-1.2.1/pyproject.toml +43 -0
  28. {maco-1.1.14 → maco-1.2.1}/requirements.txt +3 -1
  29. maco-1.2.1/tests/data/trigger_complex.txt +6 -0
  30. {maco-1.1.14 → maco-1.2.1}/tests/extractors/basic.py +1 -3
  31. {maco-1.1.14 → maco-1.2.1}/tests/extractors/basic_longer.py +1 -3
  32. maco-1.2.1/tests/extractors/bob/__init__.py +0 -0
  33. {maco-1.1.14 → maco-1.2.1}/tests/extractors/bob/bob.py +1 -3
  34. maco-1.2.1/tests/test_demo_extractors.py +60 -0
  35. {maco-1.1.14 → maco-1.2.1}/tests/test_detection.py +11 -21
  36. {maco-1.1.14 → maco-1.2.1}/tests/test_extractor.py +0 -2
  37. maco-1.2.1/tox.ini +10 -0
  38. maco-1.1.14/PKG-INFO +0 -11
  39. maco-1.1.14/maco/collector.py +0 -128
  40. maco-1.1.14/maco/utils.py +0 -339
  41. maco-1.1.14/maco.egg-info/PKG-INFO +0 -11
  42. maco-1.1.14/maco.egg-info/top_level.txt +0 -1
  43. maco-1.1.14/pipelines/test.yaml +0 -41
  44. maco-1.1.14/setup.py +0 -25
  45. maco-1.1.14/tox.ini +0 -10
  46. {maco-1.1.14 → maco-1.2.1}/.gitignore +0 -0
  47. {maco-1.1.14 → maco-1.2.1}/.vscode/settings.json +0 -0
  48. {maco-1.1.14 → maco-1.2.1}/LICENSE.md +0 -0
  49. {maco-1.1.14 → maco-1.2.1}/README.md +0 -0
  50. {maco-1.1.14 → maco-1.2.1}/demo_extractors/complex/__init__.py +0 -0
  51. {maco-1.1.14 → maco-1.2.1}/demo_extractors/complex/complex_utils.py +0 -0
  52. {maco-1.1.14 → maco-1.2.1}/demo_extractors/shared.py +0 -0
  53. {maco-1.1.14 → maco-1.2.1}/maco/__init__.py +0 -0
  54. {maco-1.1.14 → maco-1.2.1}/maco/base_test.py +0 -0
  55. {maco-1.1.14 → maco-1.2.1}/maco/cli.py +0 -0
  56. {maco-1.1.14 → maco-1.2.1}/maco/model/__init__.py +0 -0
  57. {maco-1.1.14 → maco-1.2.1}/maco/model/model.py +0 -0
  58. {maco-1.1.14 → maco-1.2.1}/maco.egg-info/dependency_links.txt +0 -0
  59. {maco-1.1.14 → maco-1.2.1}/maco.egg-info/entry_points.txt +0 -0
  60. {maco-1.1.14/tests/extractors → maco-1.2.1/model_setup/maco}/__init__.py +0 -0
  61. {maco-1.1.14 → maco-1.2.1}/model_setup/setup.py +0 -0
  62. {maco-1.1.14 → maco-1.2.1}/setup.cfg +0 -0
  63. {maco-1.1.14 → maco-1.2.1}/tests/data/example.txt.cart +0 -0
  64. {maco-1.1.14/tests/extractors/bob → maco-1.2.1/tests/extractors}/__init__.py +0 -0
  65. {maco-1.1.14 → maco-1.2.1}/tests/extractors/test_basic.py +0 -0
  66. {maco-1.1.14 → maco-1.2.1}/tests/pytest.ini +0 -0
  67. {maco-1.1.14 → maco-1.2.1}/tests/requirements.txt +0 -0
  68. {maco-1.1.14 → maco-1.2.1}/tests/test_base_test.py +0 -0
  69. {maco-1.1.14 → maco-1.2.1}/tests/test_helpers.py +0 -0
  70. {maco-1.1.14 → maco-1.2.1}/tests/test_model.py +0 -0
maco-1.2.1/PKG-INFO ADDED
@@ -0,0 +1,274 @@
1
+ Metadata-Version: 2.1
2
+ Name: maco
3
+ Version: 1.2.1
4
+ Author: sl-govau
5
+ Maintainer: cccs-rs
6
+ License: MIT License
7
+
8
+ Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
9
+
10
+ Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
+
18
+ Project-URL: Repository, https://github.com/CybercentreCanada/Maco
19
+ Project-URL: Issues, https://github.com/CybercentreCanada/Maco/issues
20
+ Classifier: Development Status :: 5 - Production/Stable
21
+ Classifier: Intended Audience :: Developers
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: License :: OSI Approved :: MIT License
24
+ Classifier: Programming Language :: Python :: 3.8
25
+ Classifier: Programming Language :: Python :: 3.9
26
+ Classifier: Programming Language :: Python :: 3.10
27
+ Classifier: Programming Language :: Python :: 3.11
28
+ Classifier: Programming Language :: Python :: 3.12
29
+ Requires-Python: >=3.8
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE.md
32
+ Requires-Dist: cart
33
+ Requires-Dist: pydantic>=2.0.0
34
+ Requires-Dist: tomli>=1.1.0; python_version < "3.11"
35
+ Requires-Dist: uv
36
+ Requires-Dist: yara-python
37
+ Requires-Dist: yara-x==0.11.0
38
+
39
+ # Maco - Malware config extractor framework
40
+
41
+ ## Maco is a framework for ***ma***lware ***co***nfig extractors.
42
+
43
+ It aims to solve two problems:
44
+
45
+ - Define a standardize ontology (or model) for extractor output. This greatly helps for databasing extracted values.
46
+ - Provide a standard way of identifying which parsers to run and how to execute them.
47
+
48
+ ## Maco components
49
+
50
+ - `model.py`
51
+ - A data model for the common output of an extractor
52
+ - `extractor.py`
53
+ - Base class for extractors to implement
54
+ - `collector.py`
55
+ - Utilities for loading and running extractors
56
+ - `cli.py`
57
+ - A CLI tool `maco` to assist with running your extractors locally
58
+ - `base_test.py`
59
+ - Assist with writing unit tests for your extractors
60
+
61
+ **Note: If you're interested in using only the model in your project, you can `pip install maco-model` which is a smaller package containing only the model definition**
62
+
63
+ ## Project Integrations 🛠️
64
+
65
+ This framework is actively being used by:
66
+
67
+ | Project | Description | License |
68
+ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
69
+ | <a href="https://cybercentrecanada.github.io/assemblyline4_docs/"><img src="https://images.weserv.nl/?url=cybercentrecanada.github.io/assemblyline4_docs/images/crane.png?v=4&h=100&w=100&fit=cover&maxage=7d"></a> | A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format | [![License](https://img.shields.io/github/license/CybercentreCanada/assemblyline)](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md) |
70
+ | [configextractor-py](https://github.com/CybercentreCanada/configextractor-py) | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization | [![License](https://img.shields.io/github/license/CybercentreCanada/configextractor-py)](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
71
+ | <a href="https://github.com/jeFF0Falltrades/rat_king_parser"><img src="https://images.weserv.nl/?url=raw.githubusercontent.com/jeFF0Falltrades/rat_king_parser/master/.github/logo.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO | [![License](https://img.shields.io/github/license/jeFF0Falltrades/rat_king_parser)](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE) |
72
+ | <a href="https://github.com/apophis133/apophis-YARA-Rules"><img src="https://images.weserv.nl/?url=github.com/apophis133.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A parser/extractor repository that supports MACO for performing malware configuration extraction with YARA rule detection | |
73
+ | <a href="https://github.com/CAPESandbox/community"><img src="https://images.weserv.nl/?url=github.com/CAPESandbox.png?v=4&h=100&w=100&fit=cover&maxage=7d0&mask=circle"/> </a> | A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.<br>**Note: These MACO extractors wrap and parse the original CAPE extractors.** | [![License](https://img.shields.io/badge/license-GPL--3.0-informational)](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE) |
74
+
75
+ ## Model Example
76
+
77
+ See [the model definition](https://github.com/CybercentreCanada/Maco/blob/0f447a66de5e5ce8770ef3fe2325aec002842e63/maco/model.py#L127) for all the supported fields.
78
+ You can use the model independently of the rest of the framework.
79
+ This is still useful for compatibility between systems!
80
+
81
+ ```python
82
+ from maco import model
83
+ # 'family' is the only required property on the model
84
+ output = model.ExtractorModel(family="wanabee")
85
+ output.version = "2019" # variant first found in 2019
86
+ output.category.extend([model.CategoryEnum.cryptominer, model.CategoryEnum.clickfraud])
87
+ output.http.append(model.ExtractorModel.Http(protocol="https",
88
+ uri="https://bad-domain.com/c2_payload",
89
+ usage="c2"))
90
+ output.tcp.append(model.ExtractorModel.Connection(server_ip="127.0.0.1",
91
+ usage="ransom"))
92
+ output.campaign_id.append("859186-3224-9284")
93
+ output.inject_exe.append("explorer.exe")
94
+ output.binaries.append(
95
+ output.Binary(
96
+ data=b"sam I am",
97
+ datatype=output.Binary.TypeEnum.config,
98
+ encryption=output.Binary.Encryption(
99
+ algorithm="rot26",
100
+ mode="block",
101
+ ),
102
+ )
103
+ )
104
+ # data about the malware that doesn't fit the model
105
+ output.other["author_lunch"] = "green eggs and ham"
106
+ output.other["author_lunch_time"] = "3pm"
107
+ print(output.model_dump(exclude_defaults=True))
108
+
109
+ # Generated model
110
+ {
111
+ 'family': 'wanabee',
112
+ 'version': '2019',
113
+ 'category': ['cryptominer', 'clickfraud'],
114
+ 'campaign_id': ['859186-3224-9284'],
115
+ 'inject_exe': ['explorer.exe'],
116
+ 'other': {'author_lunch': 'green eggs and ham', 'author_lunch_time': '3pm'},
117
+ 'http': [{'uri': 'https://bad-domain.com/c2_payload', 'usage': 'c2', 'protocol': 'https'}],
118
+ 'tcp': [{'server_ip': '127.0.0.1', 'usage': 'ransom'}],
119
+ 'binaries': [{
120
+ 'datatype': 'config', 'data': b'sam I am',
121
+ 'encryption': {'algorithm': 'rot26', 'mode': 'block'}
122
+ }]
123
+ }
124
+ ```
125
+
126
+ And you can create model instances from dictionaries:
127
+
128
+ ```python
129
+ from maco import model
130
+ output = {
131
+ "family": "wanabee2",
132
+ "version": "2022",
133
+ "ssh": [
134
+ {
135
+ "username": "wanna",
136
+ "password": "bee2",
137
+ "hostname": "10.1.10.100",
138
+ }
139
+ ],
140
+ }
141
+ print(model.ExtractorModel(**output))
142
+
143
+ # Generated model
144
+ family='wanabee2' version='2022' category=[] attack=[] capability_enabled=[]
145
+ capability_disabled=[] campaign_id=[] identifier=[] decoded_strings=[]
146
+ password=[] mutex=[] pipe=[] sleep_delay=None inject_exe=[] other={}
147
+ binaries=[] ftp=[] smtp=[] http=[]
148
+ ssh=[SSH(username='wanna', password='bee2', hostname='10.1.10.100', port=None, usage=None)]
149
+ proxy=[] dns=[] tcp=[] udp=[] encryption=[] service=[] cryptocurrency=[]
150
+ paths=[] registry=[]
151
+ ```
152
+
153
+ ## Extractor Example
154
+
155
+ The following extractor will trigger on any file with more than 50 ELF sections,
156
+ and set some properties in the model.
157
+
158
+ Your extractors will do a better job of finding useful information than this one!
159
+
160
+ ```python
161
+ class Elfy(extractor.Extractor):
162
+ """Check basic elf property."""
163
+
164
+ family = "elfy"
165
+ author = "blue"
166
+ last_modified = "2022-06-14"
167
+ yara_rule = """
168
+ import "elf"
169
+
170
+ rule Elfy
171
+ {
172
+ condition:
173
+ elf.number_of_sections > 50
174
+ }
175
+ """
176
+
177
+ def run(
178
+ self, stream: BytesIO, matches: List[yara.Match]
179
+ ) -> Optional[model.ExtractorModel]:
180
+ # return config model formatted results
181
+ ret = model.ExtractorModel(family=self.family)
182
+ # the list for campaign_id already exists and is empty, so we just add an item
183
+ ret.campaign_id.append(str(len(stream.read())))
184
+ return ret
185
+ ```
186
+
187
+ ## Writing Extractors
188
+
189
+ There are several examples that use Maco in the '`demo_extractors`' folder.
190
+
191
+ Some things to keep in mind:
192
+
193
+ - The Yara rule names must be prefixed with the extractor class name.
194
+ - e.g. Class 'MyScript' has Yara rules named 'MyScriptDetect1' and 'MyScriptDetect2', not 'Detect1'
195
+ - You can load other scripts contained within the same folder via a Python relative import
196
+ - See `complex.py` for details
197
+ - You can standardise your usage of the '`other`' dict
198
+ - This is optional, see `limit_other.py` for details
199
+ - Consider instead making a PR with the properties you are frequently using
200
+
201
+ # Requirements
202
+
203
+ Python 3.8+.
204
+
205
+ Install this package with `pip install maco`.
206
+
207
+ All required Python packages are in the `requirements.txt`.
208
+
209
+ # CLI Usage
210
+
211
+ ```bash
212
+ > maco --help
213
+ usage: maco [-h] [-v] [--pretty] [--base64] [--logfile LOGFILE] [--include INCLUDE] [--exclude EXCLUDE] [-f] [--create_venv] extractors samples
214
+
215
+ Run extractors over samples.
216
+
217
+ positional arguments:
218
+ extractors path to extractors
219
+ samples path to samples
220
+
221
+ optional arguments:
222
+ -h, --help show this help message and exit
223
+ -v, --verbose print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug
224
+ --pretty pretty print json output
225
+ --base64 Include base64 encoded binary data in output (can be large, consider printing to file rather than console)
226
+ --logfile LOGFILE file to log output
227
+ --include INCLUDE comma separated extractors to run
228
+ --exclude EXCLUDE comma separated extractors to not run
229
+ -f, --force ignore yara rules and execute all extractors
230
+ --create_venv Creates venvs for every requirements.txt found (only applies when extractor path is a directory)
231
+ ```
232
+
233
+ ## CLI output example
234
+
235
+ The CLI is helpful for using your extractors in a standalone system, such as in a reverse engineering environment.
236
+
237
+ ```bash
238
+ > maco demo_extractors/ /usr/lib --include Complex
239
+ extractors loaded: ['Complex']
240
+
241
+ complex by blue 2022-06-14 TLP:WHITE
242
+ This script has multiple yara rules and coverage of the data model.
243
+
244
+ path: /usr/lib/udev/hwdb.bin
245
+ run Complex extractor from rules ['ComplexAlt']
246
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
247
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
248
+ "encryption": {"algorithm": "something"}}],
249
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/9956330", "usage": "c2"}],
250
+ "encryption": [{"algorithm": "sha256"}]}
251
+
252
+ path: /usr/lib/udev/hwdb.d/20-OUI.hwdb
253
+ run Complex extractor from rules ['ComplexAlt']
254
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
255
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
256
+ "encryption": {"algorithm": "something"}}],
257
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1986908", "usage": "c2"}],
258
+ "encryption": [{"algorithm": "sha256"}]}
259
+
260
+ path: /usr/lib/udev/hwdb.d/20-usb-vendor-model.hwdb
261
+ run Complex extractor from rules ['ComplexAlt']
262
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
263
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
264
+ "encryption": {"algorithm": "something"}}],
265
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1257481", "usage": "c2"}],
266
+ "encryption": [{"algorithm": "sha256"}]}
267
+
268
+
269
+ 15884 analysed, 3 hits, 3 extracted
270
+ ```
271
+
272
+ The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
273
+
274
+ e.g. `maco demo_extractors demo_extractors`
@@ -1,11 +1,9 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
 
4
- import yara
4
+ from maco import extractor, model, yara
5
5
 
6
- from maco import extractor, model
7
-
8
- from . import complex_utils
6
+ from complex import complex_utils
9
7
 
10
8
 
11
9
  class Complex(extractor.Extractor):
@@ -41,9 +39,7 @@ class Complex(extractor.Extractor):
41
39
  }
42
40
  """
43
41
 
44
- def run(
45
- self, stream: BytesIO, matches: List[yara.Match]
46
- ) -> Optional[model.ExtractorModel]:
42
+ def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
47
43
  self.logger.info("starting run")
48
44
  self.logger.debug(f"{[x.rule for x in matches]=}")
49
45
  data = stream.read()
@@ -54,7 +50,7 @@ class Complex(extractor.Extractor):
54
50
  other = complex_utils.getdata()["result"]
55
51
  self.logger.debug("got data from lib")
56
52
  # example - accessing yara strings
57
- strings = {y[2].decode("utf8") for x in matches for y in x.strings}
53
+ strings = sorted({z.plaintext().decode("utf8") for x in matches for y in x.strings for z in y.instances})
58
54
  self.logger.debug(f"{strings=}")
59
55
  # construct model of results
60
56
  tmp = model.ExtractorModel(family=self.family)
@@ -1,9 +1,7 @@
1
1
  from io import BytesIO
2
2
  from typing import Dict, List, Optional
3
3
 
4
- import yara
5
-
6
- from maco import extractor, model
4
+ from maco import extractor, model, yara
7
5
 
8
6
 
9
7
  class Elfy(extractor.Extractor):
@@ -22,9 +20,7 @@ class Elfy(extractor.Extractor):
22
20
  }
23
21
  """
24
22
 
25
- def run(
26
- self, stream: BytesIO, matches: List[yara.Match]
27
- ) -> Optional[model.ExtractorModel]:
23
+ def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
28
24
  # return config model formatted results
29
25
  ret = model.ExtractorModel(family=self.family)
30
26
  # the list for campaign_id already exists and is empty, so we just add an item
@@ -1,9 +1,7 @@
1
1
  from io import BytesIO
2
2
  from typing import Dict, List, Optional
3
3
 
4
- import yara
5
-
6
- from maco import extractor, model
4
+ from maco import extractor, model, yara
7
5
 
8
6
  from . import shared
9
7
 
@@ -25,9 +23,7 @@ class LimitOther(extractor.Extractor):
25
23
  }
26
24
  """
27
25
 
28
- def run(
29
- self, stream: BytesIO, matches: List[yara.Match]
30
- ) -> Optional[model.ExtractorModel]:
26
+ def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
31
27
  # use a custom model that inherits from ExtractorModel
32
28
  # this model defines what can go in the 'other' dict
33
29
  tmp = shared.MyCustomModel(family="specify_other")
@@ -1,9 +1,7 @@
1
1
  from io import BytesIO
2
2
  from typing import Dict, List, Optional
3
3
 
4
- import yara
5
-
6
- from maco import extractor, model
4
+ from maco import extractor, model, yara
7
5
 
8
6
 
9
7
  class Nothing(extractor.Extractor):
@@ -23,8 +21,6 @@ class Nothing(extractor.Extractor):
23
21
  }
24
22
  """
25
23
 
26
- def run(
27
- self, stream: BytesIO, matches: List[yara.Match]
28
- ) -> Optional[model.ExtractorModel]:
24
+ def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
29
25
  # return config model formatted results
30
26
  return
@@ -0,0 +1,137 @@
1
+ """Convenience functions for discovering your extractors."""
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ from multiprocessing import Manager, Process
7
+ from tempfile import NamedTemporaryFile
8
+ from types import ModuleType
9
+ from typing import Any, BinaryIO, Dict, List, Union
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from maco import extractor, model, utils, yara
14
+
15
+
16
+ class ExtractorLoadError(Exception):
17
+ pass
18
+
19
+
20
+ logger = logging.getLogger("maco.lib.helpers")
21
+
22
+
23
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
24
+ """Enforce types and verify properties, and remove defaults."""
25
+ if not resp:
26
+ return None
27
+ # check the response is valid for its own model
28
+ # this is useful if a restriction on the 'other' dictionary is needed
29
+ resp_model = type(resp)
30
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
31
+ resp = resp_model.model_validate(resp)
32
+ # check the response is valid according to the ExtractorModel
33
+ resp = model.ExtractorModel.model_validate(resp)
34
+ # coerce sets to correct types
35
+ # otherwise we end up with sets where we expect lists
36
+ resp = model.ExtractorModel(**resp.model_dump())
37
+ # dump model to dict
38
+ return resp.model_dump(exclude_defaults=True)
39
+
40
+
41
+ class Collector:
42
+ def __init__(
43
+ self, path_extractors: str, include: List[str] = None, exclude: List[str] = None, create_venv: bool = False
44
+ ):
45
+ """Discover and load extractors from file system."""
46
+ path_extractors = os.path.realpath(path_extractors)
47
+ self.path: str = path_extractors
48
+ self.extractors: Dict[str, Dict[str, str]] = {}
49
+
50
+ with Manager() as manager:
51
+ extractors = manager.dict()
52
+ namespaced_rules = manager.dict()
53
+
54
+ def extractor_module_callback(module: ModuleType, venv: str):
55
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
56
+ for member in members:
57
+ name, member = member
58
+ if exclude and name in exclude:
59
+ # Module is part of the exclusion list, skip
60
+ logger.debug(f"exclude excluded '{name}'")
61
+ return
62
+
63
+ if include and name not in include:
64
+ # Module wasn't part of the inclusion list, skip
65
+ logger.debug(f"include excluded '{name}'")
66
+ return
67
+
68
+ # initialise and register
69
+ logger.debug(f"register '{name}'")
70
+ extractors[name] = dict(
71
+ venv=venv,
72
+ module_path=module.__file__,
73
+ module_name=member.__module__,
74
+ extractor_class=member.__name__,
75
+ )
76
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
77
+
78
+ # Find the extractors within the given directory
79
+ # Execute within a child process to ensure main process interpreter is kept clean
80
+ p = Process(
81
+ target=utils.import_extractors,
82
+ args=(
83
+ path_extractors,
84
+ yara.compile(source=utils.MACO_YARA_RULE),
85
+ extractor_module_callback,
86
+ logger,
87
+ create_venv and os.path.isdir(path_extractors),
88
+ ),
89
+ )
90
+ p.start()
91
+ p.join()
92
+
93
+ self.extractors = dict(extractors)
94
+ if not self.extractors:
95
+ raise ExtractorLoadError("no extractors were loaded")
96
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
97
+
98
+ # compile yara rules gathered from extractors
99
+ self.rules = yara.compile(sources=dict(namespaced_rules))
100
+
101
+ def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
102
+ """Return extractors that should run based on yara rules."""
103
+ # execute yara rules on file to find extractors we should run
104
+ # yara can't run on a stream so we give it a bytestring
105
+ matches = self.rules.match(data=stream.read())
106
+ stream.seek(0)
107
+ if not matches:
108
+ return
109
+ # get all rules that hit for each extractor
110
+ runs = {}
111
+ for match in matches:
112
+ runs.setdefault(match.namespace, []).append(match)
113
+
114
+ return runs
115
+
116
+ def extract(
117
+ self,
118
+ stream: BinaryIO,
119
+ matches: List[yara.Match],
120
+ extractor_name: str,
121
+ ) -> Dict[str, Any]:
122
+ """Run extractor with stream and verify output matches the model."""
123
+ extractor = self.extractors[extractor_name]
124
+ try:
125
+ # Run extractor on a copy of the sample
126
+ with NamedTemporaryFile() as sample_path:
127
+ sample_path.write(stream.read())
128
+ sample_path.flush()
129
+ # enforce types and verify properties, and remove defaults
130
+ return _verify_response(utils.run_extractor(sample_path.name, **extractor))
131
+ except Exception:
132
+ # caller can deal with the exception
133
+ raise
134
+ finally:
135
+ # make sure to reset where we are in the file
136
+ # otherwise follow on extractors are going to read 0 bytes
137
+ stream.seek(0)
@@ -4,7 +4,7 @@ import logging
4
4
  import textwrap
5
5
  from typing import BinaryIO, List, Optional, Union
6
6
 
7
- import yara
7
+ from maco import yara
8
8
 
9
9
  from . import model
10
10
 
@@ -12,8 +12,8 @@ from . import model
12
12
  class InvalidExtractor(ValueError):
13
13
  pass
14
14
 
15
- DEFAULT_YARA_RULE = \
16
- """
15
+
16
+ DEFAULT_YARA_RULE = """
17
17
  rule {name}
18
18
  {{
19
19
  condition:
@@ -21,6 +21,7 @@ rule {name}
21
21
  }}
22
22
  """
23
23
 
24
+
24
25
  class Extractor:
25
26
  """Base class for an analysis extractor with common entrypoint and metadata.
26
27