maco-extractor 1.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
4
+
5
+ Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10
+
11
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,283 @@
1
+ Metadata-Version: 2.4
2
+ Name: maco-extractor
3
+ Version: 1.2.18
4
+ Summary: This package contains the essentials for creating Maco extractors and using them at runtime.
5
+ Author: sl-govau
6
+ Maintainer: cccs-rs
7
+ License: MIT License
8
+
9
+ Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
10
+
11
+ Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18
+
19
+ Project-URL: Repository, https://github.com/CybercentreCanada/Maco
20
+ Project-URL: Issues, https://github.com/CybercentreCanada/Maco/issues
21
+ Classifier: Development Status :: 5 - Production/Stable
22
+ Classifier: Intended Audience :: Developers
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Classifier: License :: OSI Approved :: MIT License
25
+ Classifier: Programming Language :: Python :: 3.8
26
+ Classifier: Programming Language :: Python :: 3.9
27
+ Classifier: Programming Language :: Python :: 3.10
28
+ Classifier: Programming Language :: Python :: 3.11
29
+ Classifier: Programming Language :: Python :: 3.12
30
+ Requires-Python: >=3.8
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE.md
33
+ Requires-Dist: pydantic>=2.0.0
34
+ Requires-Dist: yara-x
35
+ Dynamic: license-file
36
+
37
+ # Maco - Malware config extractor framework
38
+
39
+ ## Maco is a framework for <ins>ma</ins>lware <ins>co</ins>nfig extractors.
40
+
41
+ It aims to solve two problems:
42
+
43
+ - Define a standardize ontology (or model) for extractor output. This greatly helps for databasing extracted values.
44
+ - Provide a standard way of identifying which parsers to run and how to execute them.
45
+
46
+ ## Maco components
47
+
48
+ - `model.py`
49
+ - A data model for the common output of an extractor
50
+ - `extractor.py`
51
+ - Base class for extractors to implement
52
+ - `collector.py`
53
+ - Utilities for loading and running extractors
54
+ - `cli.py`
55
+ - A CLI tool `maco` to assist with running your extractors locally
56
+ - `base_test.py`
57
+ - Assist with writing unit tests for your extractors
58
+
59
+ **Note: If you're interested in using only the model in your project, you can `pip install maco-model` which is a smaller package containing only the model definition**
60
+
61
+ ## Project Integrations 🛠️
62
+
63
+ This framework is actively being used by:
64
+
65
+ | Project | Description | License |
66
+ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
67
+ | <a href="https://cybercentrecanada.github.io/assemblyline4_docs/"><img src="https://images.weserv.nl/?url=cybercentrecanada.github.io/assemblyline4_docs/images/crane.png?v=4&h=100&w=100&fit=cover&maxage=7d"></a> | A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format | [![License](https://img.shields.io/github/license/CybercentreCanada/assemblyline)](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md) |
68
+ | [configextractor-py](https://github.com/CybercentreCanada/configextractor-py) | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization | [![License](https://img.shields.io/github/license/CybercentreCanada/configextractor-py)](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
69
+ | <a href="https://github.com/jeFF0Falltrades/rat_king_parser"><img src="https://images.weserv.nl/?url=raw.githubusercontent.com/jeFF0Falltrades/rat_king_parser/master/.github/logo.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO | [![License](https://img.shields.io/github/license/jeFF0Falltrades/rat_king_parser)](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE) |
70
+ | <a href="https://github.com/CAPESandbox/community"><img src="https://images.weserv.nl/?url=github.com/CAPESandbox.png?v=4&h=100&w=100&fit=cover&maxage=7d0&mask=circle"/> </a> | A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.<br>**Note: These MACO extractors wrap and parse the original CAPE extractors.** | [![License](https://img.shields.io/badge/license-GPL--3.0-informational)](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE) |
71
+
72
+ ## Model Example
73
+
74
+ See [the model definition](https://github.com/CybercentreCanada/Maco/blob/0f447a66de5e5ce8770ef3fe2325aec002842e63/maco/model.py#L127) for all the supported fields.
75
+ You can use the model independently of the rest of the framework.
76
+ This is still useful for compatibility between systems!
77
+
78
+ ```python
79
+ from maco import model
80
+ # 'family' is the only required property on the model
81
+ output = model.ExtractorModel(family="wanabee")
82
+ output.version = "2019" # variant first found in 2019
83
+ output.category.extend([model.CategoryEnum.cryptominer, model.CategoryEnum.clickfraud])
84
+ output.http.append(model.ExtractorModel.Http(protocol="https",
85
+ uri="https://bad-domain.com/c2_payload",
86
+ usage="c2"))
87
+ output.tcp.append(model.ExtractorModel.Connection(server_ip="127.0.0.1",
88
+ usage="ransom"))
89
+ output.campaign_id.append("859186-3224-9284")
90
+ output.inject_exe.append("explorer.exe")
91
+ output.binaries.append(
92
+ output.Binary(
93
+ data=b"sam I am",
94
+ datatype=output.Binary.TypeEnum.config,
95
+ encryption=output.Binary.Encryption(
96
+ algorithm="rot26",
97
+ mode="block",
98
+ ),
99
+ )
100
+ )
101
+ # data about the malware that doesn't fit the model
102
+ output.other["author_lunch"] = "green eggs and ham"
103
+ output.other["author_lunch_time"] = "3pm"
104
+ print(output.model_dump(exclude_defaults=True))
105
+
106
+ # Generated model
107
+ {
108
+ 'family': 'wanabee',
109
+ 'version': '2019',
110
+ 'category': ['cryptominer', 'clickfraud'],
111
+ 'campaign_id': ['859186-3224-9284'],
112
+ 'inject_exe': ['explorer.exe'],
113
+ 'other': {'author_lunch': 'green eggs and ham', 'author_lunch_time': '3pm'},
114
+ 'http': [{'uri': 'https://bad-domain.com/c2_payload', 'usage': 'c2', 'protocol': 'https'}],
115
+ 'tcp': [{'server_ip': '127.0.0.1', 'usage': 'ransom'}],
116
+ 'binaries': [{
117
+ 'datatype': 'config', 'data': b'sam I am',
118
+ 'encryption': {'algorithm': 'rot26', 'mode': 'block'}
119
+ }]
120
+ }
121
+ ```
122
+
123
+ And you can create model instances from dictionaries:
124
+
125
+ ```python
126
+ from maco import model
127
+ output = {
128
+ "family": "wanabee2",
129
+ "version": "2022",
130
+ "ssh": [
131
+ {
132
+ "username": "wanna",
133
+ "password": "bee2",
134
+ "hostname": "10.1.10.100",
135
+ }
136
+ ],
137
+ }
138
+ print(model.ExtractorModel(**output))
139
+
140
+ # Generated model
141
+ family='wanabee2' version='2022' category=[] attack=[] capability_enabled=[]
142
+ capability_disabled=[] campaign_id=[] identifier=[] decoded_strings=[]
143
+ password=[] mutex=[] pipe=[] sleep_delay=None inject_exe=[] other={}
144
+ binaries=[] ftp=[] smtp=[] http=[]
145
+ ssh=[SSH(username='wanna', password='bee2', hostname='10.1.10.100', port=None, usage=None)]
146
+ proxy=[] dns=[] tcp=[] udp=[] encryption=[] service=[] cryptocurrency=[]
147
+ paths=[] registry=[]
148
+ ```
149
+
150
+ ## Extractor Example
151
+
152
+ The following extractor will trigger on any file with more than 50 ELF sections,
153
+ and set some properties in the model.
154
+
155
+ Your extractors will do a better job of finding useful information than this one!
156
+
157
+ ```python
158
+ class Elfy(extractor.Extractor):
159
+ """Check basic elf property."""
160
+
161
+ family = "elfy"
162
+ author = "blue"
163
+ last_modified = "2022-06-14"
164
+ yara_rule = """
165
+ import "elf"
166
+
167
+ rule Elfy
168
+ {
169
+ condition:
170
+ elf.number_of_sections > 50
171
+ }
172
+ """
173
+
174
+ def run(
175
+ self, stream: BytesIO, matches: List[yara.Match]
176
+ ) -> Optional[model.ExtractorModel]:
177
+ # return config model formatted results
178
+ ret = model.ExtractorModel(family=self.family)
179
+ # the list for campaign_id already exists and is empty, so we just add an item
180
+ ret.campaign_id.append(str(len(stream.read())))
181
+ return ret
182
+ ```
183
+
184
+ ## Writing Extractors
185
+
186
+ There are several examples that use Maco in the '`demo_extractors`' folder.
187
+
188
+ Some things to keep in mind:
189
+
190
+ - The Yara rule names must be prefixed with the extractor class name.
191
+ - e.g. Class 'MyScript' has Yara rules named 'MyScriptDetect1' and 'MyScriptDetect2', not 'Detect1'
192
+ - You can load other scripts contained within the same folder via a Python relative import
193
+ - See `complex.py` for details
194
+ - You can standardise your usage of the '`other`' dict
195
+ - This is optional, see `limit_other.py` for details
196
+ - Consider instead making a PR with the properties you are frequently using
197
+
198
+ # Requirements
199
+
200
+ Python 3.8+.
201
+
202
+ Install this package with `pip install maco`.
203
+
204
+ All required Python packages are in the `requirements.txt`.
205
+
206
+ # CLI Usage
207
+
208
+ ```bash
209
+ > maco --help
210
+ usage: maco [-h] [-v] [--pretty] [--base64] [--logfile LOGFILE] [--include INCLUDE] [--exclude EXCLUDE] [-f] [--create_venv] extractors samples
211
+
212
+ Run extractors over samples.
213
+
214
+ positional arguments:
215
+ extractors path to extractors
216
+ samples path to samples
217
+
218
+ optional arguments:
219
+ -h, --help show this help message and exit
220
+ -v, --verbose print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug
221
+ --pretty pretty print json output
222
+ --base64 Include base64 encoded binary data in output (can be large, consider printing to file rather than console)
223
+ --logfile LOGFILE file to log output
224
+ --include INCLUDE comma separated extractors to run
225
+ --exclude EXCLUDE comma separated extractors to not run
226
+ -f, --force ignore yara rules and execute all extractors
227
+ --create_venv Creates venvs for every requirements.txt found (only applies when extractor path is a directory)
228
+ ```
229
+
230
+ ## CLI output example
231
+
232
+ The CLI is helpful for using your extractors in a standalone system, such as in a reverse engineering environment.
233
+
234
+ ```bash
235
+ > maco demo_extractors/ /usr/lib --include Complex
236
+ extractors loaded: ['Complex']
237
+
238
+ complex by blue 2022-06-14 TLP:WHITE
239
+ This script has multiple yara rules and coverage of the data model.
240
+
241
+ path: /usr/lib/udev/hwdb.bin
242
+ run Complex extractor from rules ['ComplexAlt']
243
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
244
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
245
+ "encryption": {"algorithm": "something"}}],
246
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/9956330", "usage": "c2"}],
247
+ "encryption": [{"algorithm": "sha256"}]}
248
+
249
+ path: /usr/lib/udev/hwdb.d/20-OUI.hwdb
250
+ run Complex extractor from rules ['ComplexAlt']
251
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
252
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
253
+ "encryption": {"algorithm": "something"}}],
254
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1986908", "usage": "c2"}],
255
+ "encryption": [{"algorithm": "sha256"}]}
256
+
257
+ path: /usr/lib/udev/hwdb.d/20-usb-vendor-model.hwdb
258
+ run Complex extractor from rules ['ComplexAlt']
259
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
260
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
261
+ "encryption": {"algorithm": "something"}}],
262
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1257481", "usage": "c2"}],
263
+ "encryption": [{"algorithm": "sha256"}]}
264
+
265
+
266
+ 15884 analysed, 3 hits, 3 extracted
267
+ ```
268
+
269
+ The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
270
+
271
+ e.g. `maco demo_extractors demo_extractors`
272
+
273
+ # Contributions
274
+
275
+ Please use ruff to format and lint PRs. This may be the cause of PR test failures.
276
+
277
+ Ruff will attempt to fix most issues, but some may require manual resolution.
278
+
279
+ ```
280
+ pip install ruff
281
+ ruff format
282
+ ruff check --fix
283
+ ```
@@ -0,0 +1,247 @@
1
+ # Maco - Malware config extractor framework
2
+
3
+ ## Maco is a framework for <ins>ma</ins>lware <ins>co</ins>nfig extractors.
4
+
5
+ It aims to solve two problems:
6
+
7
+ - Define a standardize ontology (or model) for extractor output. This greatly helps for databasing extracted values.
8
+ - Provide a standard way of identifying which parsers to run and how to execute them.
9
+
10
+ ## Maco components
11
+
12
+ - `model.py`
13
+ - A data model for the common output of an extractor
14
+ - `extractor.py`
15
+ - Base class for extractors to implement
16
+ - `collector.py`
17
+ - Utilities for loading and running extractors
18
+ - `cli.py`
19
+ - A CLI tool `maco` to assist with running your extractors locally
20
+ - `base_test.py`
21
+ - Assist with writing unit tests for your extractors
22
+
23
+ **Note: If you're interested in using only the model in your project, you can `pip install maco-model` which is a smaller package containing only the model definition**
24
+
25
+ ## Project Integrations 🛠️
26
+
27
+ This framework is actively being used by:
28
+
29
+ | Project | Description | License |
30
+ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
31
+ | <a href="https://cybercentrecanada.github.io/assemblyline4_docs/"><img src="https://images.weserv.nl/?url=cybercentrecanada.github.io/assemblyline4_docs/images/crane.png?v=4&h=100&w=100&fit=cover&maxage=7d"></a> | A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format | [![License](https://img.shields.io/github/license/CybercentreCanada/assemblyline)](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md) |
32
+ | [configextractor-py](https://github.com/CybercentreCanada/configextractor-py) | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization | [![License](https://img.shields.io/github/license/CybercentreCanada/configextractor-py)](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
33
+ | <a href="https://github.com/jeFF0Falltrades/rat_king_parser"><img src="https://images.weserv.nl/?url=raw.githubusercontent.com/jeFF0Falltrades/rat_king_parser/master/.github/logo.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO | [![License](https://img.shields.io/github/license/jeFF0Falltrades/rat_king_parser)](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE) |
34
+ | <a href="https://github.com/CAPESandbox/community"><img src="https://images.weserv.nl/?url=github.com/CAPESandbox.png?v=4&h=100&w=100&fit=cover&maxage=7d0&mask=circle"/> </a> | A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.<br>**Note: These MACO extractors wrap and parse the original CAPE extractors.** | [![License](https://img.shields.io/badge/license-GPL--3.0-informational)](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE) |
35
+
36
+ ## Model Example
37
+
38
+ See [the model definition](https://github.com/CybercentreCanada/Maco/blob/0f447a66de5e5ce8770ef3fe2325aec002842e63/maco/model.py#L127) for all the supported fields.
39
+ You can use the model independently of the rest of the framework.
40
+ This is still useful for compatibility between systems!
41
+
42
+ ```python
43
+ from maco import model
44
+ # 'family' is the only required property on the model
45
+ output = model.ExtractorModel(family="wanabee")
46
+ output.version = "2019" # variant first found in 2019
47
+ output.category.extend([model.CategoryEnum.cryptominer, model.CategoryEnum.clickfraud])
48
+ output.http.append(model.ExtractorModel.Http(protocol="https",
49
+ uri="https://bad-domain.com/c2_payload",
50
+ usage="c2"))
51
+ output.tcp.append(model.ExtractorModel.Connection(server_ip="127.0.0.1",
52
+ usage="ransom"))
53
+ output.campaign_id.append("859186-3224-9284")
54
+ output.inject_exe.append("explorer.exe")
55
+ output.binaries.append(
56
+ output.Binary(
57
+ data=b"sam I am",
58
+ datatype=output.Binary.TypeEnum.config,
59
+ encryption=output.Binary.Encryption(
60
+ algorithm="rot26",
61
+ mode="block",
62
+ ),
63
+ )
64
+ )
65
+ # data about the malware that doesn't fit the model
66
+ output.other["author_lunch"] = "green eggs and ham"
67
+ output.other["author_lunch_time"] = "3pm"
68
+ print(output.model_dump(exclude_defaults=True))
69
+
70
+ # Generated model
71
+ {
72
+ 'family': 'wanabee',
73
+ 'version': '2019',
74
+ 'category': ['cryptominer', 'clickfraud'],
75
+ 'campaign_id': ['859186-3224-9284'],
76
+ 'inject_exe': ['explorer.exe'],
77
+ 'other': {'author_lunch': 'green eggs and ham', 'author_lunch_time': '3pm'},
78
+ 'http': [{'uri': 'https://bad-domain.com/c2_payload', 'usage': 'c2', 'protocol': 'https'}],
79
+ 'tcp': [{'server_ip': '127.0.0.1', 'usage': 'ransom'}],
80
+ 'binaries': [{
81
+ 'datatype': 'config', 'data': b'sam I am',
82
+ 'encryption': {'algorithm': 'rot26', 'mode': 'block'}
83
+ }]
84
+ }
85
+ ```
86
+
87
+ And you can create model instances from dictionaries:
88
+
89
+ ```python
90
+ from maco import model
91
+ output = {
92
+ "family": "wanabee2",
93
+ "version": "2022",
94
+ "ssh": [
95
+ {
96
+ "username": "wanna",
97
+ "password": "bee2",
98
+ "hostname": "10.1.10.100",
99
+ }
100
+ ],
101
+ }
102
+ print(model.ExtractorModel(**output))
103
+
104
+ # Generated model
105
+ family='wanabee2' version='2022' category=[] attack=[] capability_enabled=[]
106
+ capability_disabled=[] campaign_id=[] identifier=[] decoded_strings=[]
107
+ password=[] mutex=[] pipe=[] sleep_delay=None inject_exe=[] other={}
108
+ binaries=[] ftp=[] smtp=[] http=[]
109
+ ssh=[SSH(username='wanna', password='bee2', hostname='10.1.10.100', port=None, usage=None)]
110
+ proxy=[] dns=[] tcp=[] udp=[] encryption=[] service=[] cryptocurrency=[]
111
+ paths=[] registry=[]
112
+ ```
113
+
114
+ ## Extractor Example
115
+
116
+ The following extractor will trigger on any file with more than 50 ELF sections,
117
+ and set some properties in the model.
118
+
119
+ Your extractors will do a better job of finding useful information than this one!
120
+
121
+ ```python
122
+ class Elfy(extractor.Extractor):
123
+ """Check basic elf property."""
124
+
125
+ family = "elfy"
126
+ author = "blue"
127
+ last_modified = "2022-06-14"
128
+ yara_rule = """
129
+ import "elf"
130
+
131
+ rule Elfy
132
+ {
133
+ condition:
134
+ elf.number_of_sections > 50
135
+ }
136
+ """
137
+
138
+ def run(
139
+ self, stream: BytesIO, matches: List[yara.Match]
140
+ ) -> Optional[model.ExtractorModel]:
141
+ # return config model formatted results
142
+ ret = model.ExtractorModel(family=self.family)
143
+ # the list for campaign_id already exists and is empty, so we just add an item
144
+ ret.campaign_id.append(str(len(stream.read())))
145
+ return ret
146
+ ```
147
+
148
+ ## Writing Extractors
149
+
150
+ There are several examples that use Maco in the '`demo_extractors`' folder.
151
+
152
+ Some things to keep in mind:
153
+
154
+ - The Yara rule names must be prefixed with the extractor class name.
155
+ - e.g. Class 'MyScript' has Yara rules named 'MyScriptDetect1' and 'MyScriptDetect2', not 'Detect1'
156
+ - You can load other scripts contained within the same folder via a Python relative import
157
+ - See `complex.py` for details
158
+ - You can standardise your usage of the '`other`' dict
159
+ - This is optional, see `limit_other.py` for details
160
+ - Consider instead making a PR with the properties you are frequently using
161
+
162
+ # Requirements
163
+
164
+ Python 3.8+.
165
+
166
+ Install this package with `pip install maco`.
167
+
168
+ All required Python packages are in the `requirements.txt`.
169
+
170
+ # CLI Usage
171
+
172
+ ```bash
173
+ > maco --help
174
+ usage: maco [-h] [-v] [--pretty] [--base64] [--logfile LOGFILE] [--include INCLUDE] [--exclude EXCLUDE] [-f] [--create_venv] extractors samples
175
+
176
+ Run extractors over samples.
177
+
178
+ positional arguments:
179
+ extractors path to extractors
180
+ samples path to samples
181
+
182
+ optional arguments:
183
+ -h, --help show this help message and exit
184
+ -v, --verbose print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug
185
+ --pretty pretty print json output
186
+ --base64 Include base64 encoded binary data in output (can be large, consider printing to file rather than console)
187
+ --logfile LOGFILE file to log output
188
+ --include INCLUDE comma separated extractors to run
189
+ --exclude EXCLUDE comma separated extractors to not run
190
+ -f, --force ignore yara rules and execute all extractors
191
+ --create_venv Creates venvs for every requirements.txt found (only applies when extractor path is a directory)
192
+ ```
193
+
194
+ ## CLI output example
195
+
196
+ The CLI is helpful for using your extractors in a standalone system, such as in a reverse engineering environment.
197
+
198
+ ```bash
199
+ > maco demo_extractors/ /usr/lib --include Complex
200
+ extractors loaded: ['Complex']
201
+
202
+ complex by blue 2022-06-14 TLP:WHITE
203
+ This script has multiple yara rules and coverage of the data model.
204
+
205
+ path: /usr/lib/udev/hwdb.bin
206
+ run Complex extractor from rules ['ComplexAlt']
207
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
208
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
209
+ "encryption": {"algorithm": "something"}}],
210
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/9956330", "usage": "c2"}],
211
+ "encryption": [{"algorithm": "sha256"}]}
212
+
213
+ path: /usr/lib/udev/hwdb.d/20-OUI.hwdb
214
+ run Complex extractor from rules ['ComplexAlt']
215
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
216
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
217
+ "encryption": {"algorithm": "something"}}],
218
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1986908", "usage": "c2"}],
219
+ "encryption": [{"algorithm": "sha256"}]}
220
+
221
+ path: /usr/lib/udev/hwdb.d/20-usb-vendor-model.hwdb
222
+ run Complex extractor from rules ['ComplexAlt']
223
+ {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
224
+ "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
225
+ "encryption": {"algorithm": "something"}}],
226
+ "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1257481", "usage": "c2"}],
227
+ "encryption": [{"algorithm": "sha256"}]}
228
+
229
+
230
+ 15884 analysed, 3 hits, 3 extracted
231
+ ```
232
+
233
+ The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
234
+
235
+ e.g. `maco demo_extractors demo_extractors`
236
+
237
+ # Contributions
238
+
239
+ Please use ruff to format and lint PRs. This may be the cause of PR test failures.
240
+
241
+ Ruff will attempt to fix most issues, but some may require manual resolution.
242
+
243
+ ```
244
+ pip install ruff
245
+ ruff format
246
+ ruff check --fix
247
+ ```
File without changes
@@ -0,0 +1,98 @@
1
+ """Foundation for unit testing an extractor.
2
+
3
+ Example:
4
+ from maco import base_test
5
+ class TestExample(base_test.BaseTest):
6
+ name = "Example"
7
+ path = os.path.join(__file__, "../../extractors")
8
+ def test_run(self):
9
+ data = b"data with Example information"
10
+ ret = self.extract(io.BytesIO(data))
11
+ self.assertEqual(ret["family"], "example")
12
+ """
13
+
14
+ import importlib
15
+ import io
16
+ import os
17
+ import unittest
18
+
19
+ import cart
20
+
21
+ from maco import collector
22
+ from maco.exceptions import NoHitException
23
+
24
+
25
+ class BaseTest(unittest.TestCase):
26
+ """Base test class."""
27
+
28
+ name: str = None # name of the extractor
29
+ # folder and/or file where extractor is.
30
+ # I recommend something like os.path.join(__file__, "../../extractors")
31
+ # if your extractors are in a folder 'extractors' next to a folder of tests
32
+ path: str = None
33
+ create_venv: bool = False
34
+
35
+ @classmethod
36
+ def setUpClass(cls) -> None:
37
+ """Initialization of class.
38
+
39
+ Raises:
40
+ Exception: when name or path is not set.
41
+ """
42
+ if not cls.name or not cls.path:
43
+ raise Exception("name and path must be set")
44
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
45
+ return super().setUpClass()
46
+
47
+ def test_default_metadata(self):
48
+ """Require extractor to be loadable and valid."""
49
+ self.assertIn(self.name, self.c.extractors)
50
+ self.assertEqual(len(self.c.extractors), 1)
51
+
52
+ def extract(self, stream):
53
+ """Return results for running extractor over stream, including yara check.
54
+
55
+ Raises:
56
+ NoHitException: when yara rule doesn't hit.
57
+ """
58
+ runs = self.c.match(stream)
59
+ if not runs:
60
+ raise NoHitException("no yara rule hit")
61
+ resp = self.c.extract(stream, self.name)
62
+ return resp
63
+
64
+ @classmethod
65
+ def _get_location(cls) -> str:
66
+ """Return path to child class that implements this class."""
67
+ # import child module
68
+ module = cls.__module__
69
+ i = importlib.import_module(module)
70
+ # get location to child module
71
+ return i.__file__
72
+
73
+ @classmethod
74
+ def load_cart(cls, filepath: str) -> io.BytesIO:
75
+ """Load and unneuter a test file (likely malware) into memory for processing.
76
+
77
+ Args:
78
+ filepath (str): Path to carted sample
79
+
80
+ Returns:
81
+ (io.BytesIO): Buffered stream containing the un-carted sample
82
+
83
+ Raises:
84
+ FileNotFoundError: if the path to the sample doesn't exist
85
+ """
86
+ # it is nice if we can load files relative to whatever is implementing base_test
87
+ dirpath = os.path.split(cls._get_location())[0]
88
+ # either filepath is absolute, or should be loaded relative to child of base_test
89
+ filepath = os.path.join(dirpath, filepath)
90
+ if not os.path.isfile(filepath):
91
+ raise FileNotFoundError(filepath)
92
+ with open(filepath, "rb") as f:
93
+ unpacked = io.BytesIO()
94
+ # just bubble exceptions if it isn't cart
95
+ cart.unpack_stream(f, unpacked)
96
+ # seek to start of the unneutered stream
97
+ unpacked.seek(0)
98
+ return unpacked