maco 1.2.17__py3-none-any.whl → 1.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,98 @@
1
+ """Foundation for unit testing an extractor.
2
+
3
+ Example:
4
+ from maco import base_test
5
+ class TestExample(base_test.BaseTest):
6
+ name = "Example"
7
+ path = os.path.join(__file__, "../../extractors")
8
+ def test_run(self):
9
+ data = b"data with Example information"
10
+ ret = self.extract(io.BytesIO(data))
11
+ self.assertEqual(ret["family"], "example")
12
+ """
13
+
14
+ import importlib
15
+ import io
16
+ import os
17
+ import unittest
18
+
19
+ import cart
20
+
21
+ from maco import collector
22
+ from maco.exceptions import NoHitException
23
+
24
+
25
+ class BaseTest(unittest.TestCase):
26
+ """Base test class."""
27
+
28
+ name: str = None # name of the extractor
29
+ # folder and/or file where extractor is.
30
+ # I recommend something like os.path.join(__file__, "../../extractors")
31
+ # if your extractors are in a folder 'extractors' next to a folder of tests
32
+ path: str = None
33
+ create_venv: bool = False
34
+
35
+ @classmethod
36
+ def setUpClass(cls) -> None:
37
+ """Initialization of class.
38
+
39
+ Raises:
40
+ Exception: when name or path is not set.
41
+ """
42
+ if not cls.name or not cls.path:
43
+ raise Exception("name and path must be set")
44
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
45
+ return super().setUpClass()
46
+
47
+ def test_default_metadata(self):
48
+ """Require extractor to be loadable and valid."""
49
+ self.assertIn(self.name, self.c.extractors)
50
+ self.assertEqual(len(self.c.extractors), 1)
51
+
52
+ def extract(self, stream):
53
+ """Return results for running extractor over stream, including yara check.
54
+
55
+ Raises:
56
+ NoHitException: when yara rule doesn't hit.
57
+ """
58
+ runs = self.c.match(stream)
59
+ if not runs:
60
+ raise NoHitException("no yara rule hit")
61
+ resp = self.c.extract(stream, self.name)
62
+ return resp
63
+
64
+ @classmethod
65
+ def _get_location(cls) -> str:
66
+ """Return path to child class that implements this class."""
67
+ # import child module
68
+ module = cls.__module__
69
+ i = importlib.import_module(module)
70
+ # get location to child module
71
+ return i.__file__
72
+
73
+ @classmethod
74
+ def load_cart(cls, filepath: str) -> io.BytesIO:
75
+ """Load and unneuter a test file (likely malware) into memory for processing.
76
+
77
+ Args:
78
+ filepath (str): Path to carted sample
79
+
80
+ Returns:
81
+ (io.BytesIO): Buffered stream containing the un-carted sample
82
+
83
+ Raises:
84
+ FileNotFoundError: if the path to the sample doesn't exist
85
+ """
86
+ # it is nice if we can load files relative to whatever is implementing base_test
87
+ dirpath = os.path.split(cls._get_location())[0]
88
+ # either filepath is absolute, or should be loaded relative to child of base_test
89
+ filepath = os.path.join(dirpath, filepath)
90
+ if not os.path.isfile(filepath):
91
+ raise FileNotFoundError(filepath)
92
+ with open(filepath, "rb") as f:
93
+ unpacked = io.BytesIO()
94
+ # just bubble exceptions if it isn't cart
95
+ cart.unpack_stream(f, unpacked)
96
+ # seek to start of the unneutered stream
97
+ unpacked.seek(0)
98
+ return unpacked
@@ -0,0 +1,275 @@
1
+ """CLI example of how extractors can be executed."""
2
+
3
+ import argparse
4
+ import base64
5
+ import binascii
6
+ import hashlib
7
+ import io
8
+ import json
9
+ import logging
10
+ import os
11
+ import sys
12
+ from importlib.metadata import version
13
+ from typing import BinaryIO, List, Tuple
14
+
15
+ import cart
16
+
17
+ from maco import collector
18
+
19
+ logger = logging.getLogger("maco.lib.cli")
20
+
21
+
22
+ def process_file(
23
+ collected: collector.Collector,
24
+ path_file: str,
25
+ stream: BinaryIO,
26
+ *,
27
+ pretty: bool,
28
+ force: bool,
29
+ include_base64: bool,
30
+ ):
31
+ """Process a filestream with the extractors and rules.
32
+
33
+ Args:
34
+ collected (collector.Collector): a Collector instance
35
+ path_file (str): path to sample to be analyzed
36
+ stream (BinaryIO): binary stream to be analyzed
37
+ pretty (bool): Pretty print the JSON output
38
+ force (bool): Run all extractors regardless of YARA rule match
39
+ include_base64 (bool): include base64'd data in output
40
+
41
+ Returns:
42
+ (dict): The output from the extractors analyzing the sample
43
+
44
+ """
45
+ unneutered = io.BytesIO()
46
+ try:
47
+ cart.unpack_stream(stream, unneutered)
48
+ except Exception:
49
+ # use original stream if anything goes wrong here
50
+ # i.e. invalid/malformed cart
51
+ pass
52
+ else:
53
+ # use unneutered stream
54
+ stream = unneutered
55
+ # unpack will read some bytes either way so reset position
56
+ stream.seek(0)
57
+
58
+ # find extractors that should run based on yara rules
59
+ if not force:
60
+ runs = collected.match(stream)
61
+ else:
62
+ # execute all extractors with no yara information
63
+ # note - extractors may rely on a yara hit so this may cause errors
64
+ runs = {x: [] for x in collected.extractors.keys()}
65
+ if not runs:
66
+ return
67
+
68
+ # run extractor for the set of hits
69
+ logger.info(f"path: {path_file}")
70
+ ret = {}
71
+ for extractor_name, hits in runs.items():
72
+ # run and store results for extractor
73
+ logger.info(f"run {extractor_name} extractor from rules {[x.rule for x in hits]}")
74
+ try:
75
+ resp = collected.extract(stream, extractor_name)
76
+ except Exception as e:
77
+ logger.exception(f"extractor error with {path_file} ({e})")
78
+ resp = None
79
+ # encode binary data so we can print as json
80
+ if resp:
81
+ for row in resp.get("binaries", []):
82
+ row["sha256"] = hashlib.sha256(row["data"]).hexdigest()
83
+ # number of bytes in the binary
84
+ row["size"] = len(row["data"])
85
+ # small sample of first part of binary
86
+ row["hex_sample"] = binascii.hexlify(row["data"][:32]).decode("utf8").upper()
87
+ if include_base64:
88
+ # this can be large
89
+ row["base64"] = base64.b64encode(row["data"]).decode("utf8")
90
+ # do not print raw bytes to console
91
+ row.pop("data")
92
+ ret[extractor_name] = resp
93
+ logger.info(json.dumps(resp, indent=2 if pretty else None))
94
+ logger.info("")
95
+
96
+ return ret
97
+
98
+
99
+ def process_filesystem(
100
+ path_extractors: str,
101
+ path_samples: str,
102
+ include: List[str],
103
+ exclude: List[str],
104
+ *,
105
+ pretty: bool,
106
+ force: bool,
107
+ include_base64: bool,
108
+ create_venv: bool = False,
109
+ skip_install: bool = False,
110
+ ) -> Tuple[int, int, int]:
111
+ """Process filesystem with extractors and print results of extraction.
112
+
113
+ Returns:
114
+ (Tuple[int, int, int]): Total number of analysed files, yara hits and successful maco extractions.
115
+ """
116
+ if force:
117
+ logger.warning("force execute will cause errors if an extractor requires a yara rule hit during execution")
118
+ collected = collector.Collector(
119
+ path_extractors, include=include, exclude=exclude, create_venv=create_venv, skip_install=skip_install
120
+ )
121
+
122
+ logger.info(f"extractors loaded: {[x for x in collected.extractors.keys()]}\n")
123
+ for _, extractor in collected.extractors.items():
124
+ extractor_meta = extractor["metadata"]
125
+ logger.info(
126
+ f"{extractor_meta['family']} by {extractor_meta['author']}"
127
+ f" {extractor_meta['last_modified']} {extractor_meta['sharing']}"
128
+ f"\n{extractor_meta['description']}\n"
129
+ )
130
+
131
+ num_analysed = 0
132
+ num_hits = 0
133
+ num_extracted = 0
134
+ if os.path.isfile(path_samples):
135
+ # analyse a single file
136
+ walker = [("", None, [path_samples])]
137
+ elif os.path.isdir(path_samples):
138
+ # load files from directory tree
139
+ walker = os.walk(path_samples)
140
+ else:
141
+ logger.error(f"not file or folder: {path_samples}")
142
+ exit(2)
143
+ try:
144
+ base_directory = os.path.abspath(path_samples)
145
+ for path, _, files in walker:
146
+ for file in files:
147
+ num_analysed += 1
148
+ path_file = os.path.abspath(os.path.join(path, file))
149
+ if not path_file.startswith(base_directory):
150
+ logger.error(f"Attempted path traversal detected: {path_file}")
151
+ continue
152
+
153
+ try:
154
+ with open(path_file, "rb") as stream:
155
+ resp = process_file(
156
+ collected,
157
+ path_file,
158
+ stream,
159
+ pretty=pretty,
160
+ force=force,
161
+ include_base64=include_base64,
162
+ )
163
+ if resp:
164
+ num_hits += 1
165
+ if any(x for x in resp.values()):
166
+ num_extracted += 1
167
+ except Exception as e:
168
+ logger.exception(f"file error with {path_file} ({e})")
169
+ continue
170
+ except:
171
+ raise
172
+ finally:
173
+ logger.info("")
174
+ logger.info(f"{num_analysed} analysed, {num_hits} hits, {num_extracted} extracted")
175
+ return num_analysed, num_hits, num_extracted
176
+
177
+
178
+ def main():
179
+ """Main block for CLI."""
180
+ parser = argparse.ArgumentParser(description="Run extractors over samples.")
181
+ parser.add_argument("extractors", type=str, help="path to extractors")
182
+ parser.add_argument("samples", type=str, help="path to samples")
183
+ parser.add_argument(
184
+ "-v",
185
+ "--verbose",
186
+ action="count",
187
+ default=0,
188
+ help="print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug",
189
+ )
190
+ parser.add_argument("--pretty", action="store_true", help="pretty print json output")
191
+ parser.add_argument(
192
+ "--base64",
193
+ action="store_true",
194
+ help="Include base64 encoded binary data in output "
195
+ "(can be large, consider printing to file rather than console)",
196
+ )
197
+ parser.add_argument("--logfile", type=str, help="file to log output")
198
+ parser.add_argument("--include", type=str, help="comma separated extractors to run")
199
+ parser.add_argument("--exclude", type=str, help="comma separated extractors to not run")
200
+ parser.add_argument(
201
+ "-f",
202
+ "--force",
203
+ action="store_true",
204
+ help="ignore yara rules and execute all extractors",
205
+ )
206
+ parser.add_argument(
207
+ "--create_venv",
208
+ action="store_true",
209
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). "
210
+ "This runs much slower than the alternative but may be necessary "
211
+ "when there are many extractors with conflicting dependencies.",
212
+ )
213
+ parser.add_argument(
214
+ "--force_install",
215
+ action="store_true",
216
+ help="Force installation of Python dependencies for extractors (in both host and virtual environments).",
217
+ )
218
+ parser.add_argument(
219
+ "--version",
220
+ action="version",
221
+ version=f"version: {version('maco')}",
222
+ help="Show version of MACO",
223
+ )
224
+
225
+ args = parser.parse_args()
226
+ inc = args.include.split(",") if args.include else []
227
+ exc = args.exclude.split(",") if args.exclude else []
228
+
229
+ # set up logging for lib, only show debug with 3+ verbose
230
+ logger_lib = logging.getLogger("maco.lib")
231
+ logger_lib.setLevel(logging.DEBUG if args.verbose > 2 else logging.INFO)
232
+ ch = logging.StreamHandler(sys.stdout)
233
+ ch.setLevel(logging.DEBUG)
234
+ logger_lib.addHandler(ch)
235
+
236
+ # set up logging for extractor
237
+ logger_ex = logging.getLogger("maco.extractor")
238
+ if args.verbose == 0:
239
+ logger_ex.setLevel(logging.WARNING)
240
+ elif args.verbose == 1:
241
+ logger_ex.setLevel(logging.INFO)
242
+ else:
243
+ logger_ex.setLevel(logging.DEBUG)
244
+ ch = logging.StreamHandler(sys.stdout)
245
+ ch.setLevel(logging.DEBUG)
246
+ formatter = logging.Formatter(
247
+ fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
248
+ )
249
+ ch.setFormatter(formatter)
250
+ logger_ex.addHandler(ch)
251
+
252
+ # log everything to file
253
+ if args.logfile:
254
+ logger = logging.getLogger("maco")
255
+ logger_lib.setLevel(logging.DEBUG)
256
+ fh = logging.FileHandler(args.logfile)
257
+ fh.setLevel(logging.DEBUG)
258
+ fh.setFormatter(formatter)
259
+ logger.addHandler(fh)
260
+
261
+ process_filesystem(
262
+ args.extractors,
263
+ args.samples,
264
+ inc,
265
+ exc,
266
+ pretty=args.pretty,
267
+ force=args.force,
268
+ include_base64=args.base64,
269
+ create_venv=args.create_venv,
270
+ skip_install=not args.force_install,
271
+ )
272
+
273
+
274
+ if __name__ == "__main__":
275
+ main()
@@ -0,0 +1,220 @@
1
+ """Convenience functions for discovering your extractors."""
2
+
3
+ import inspect
4
+ import logging
5
+ import logging.handlers
6
+ import os
7
+ import sys
8
+ from tempfile import NamedTemporaryFile
9
+ from types import ModuleType
10
+ from typing import Any, BinaryIO, Dict, List, TypedDict, Union
11
+
12
+ from multiprocess import Manager, Process, Queue
13
+ from pydantic import BaseModel
14
+
15
+ from maco import extractor, model, utils, yara
16
+ from maco.exceptions import AnalysisAbortedException, ExtractorLoadError
17
+
18
+ logger = logging.getLogger("maco.lib.helpers")
19
+
20
+
21
+ def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
22
+ """Enforce types and verify properties, and remove defaults.
23
+
24
+ Args:
25
+ resp (Union[BaseModel, dict])): results from extractor
26
+
27
+ Returns:
28
+ (Dict): results from extractor after verification
29
+ """
30
+ if not resp:
31
+ return None
32
+ # check the response is valid for its own model
33
+ # this is useful if a restriction on the 'other' dictionary is needed
34
+ resp_model = type(resp)
35
+ if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
36
+ resp = resp_model.model_validate(resp)
37
+ # check the response is valid according to the ExtractorModel
38
+ resp = model.ExtractorModel.model_validate(resp)
39
+ # coerce sets to correct types
40
+ # otherwise we end up with sets where we expect lists
41
+ resp = model.ExtractorModel(**resp.model_dump())
42
+ # dump model to dict
43
+ return resp.model_dump(exclude_defaults=True)
44
+
45
+
46
+ class ExtractorMetadata(TypedDict):
47
+ """Extractor-supplied metadata."""
48
+
49
+ author: str
50
+ family: str
51
+ last_modified: str
52
+ sharing: str
53
+ description: str
54
+
55
+
56
+ class ExtractorRegistration(TypedDict):
57
+ """Registration collected by the collector for a single extractor."""
58
+
59
+ venv: str
60
+ module_path: str
61
+ module_name: str
62
+ extractor_class: str
63
+ metadata: ExtractorMetadata
64
+
65
+
66
+ class Collector:
67
+ """Discover and load extractors from file system."""
68
+
69
+ def __init__(
70
+ self,
71
+ path_extractors: str,
72
+ include: List[str] = None,
73
+ exclude: List[str] = None,
74
+ create_venv: bool = False,
75
+ skip_install: bool = False,
76
+ ):
77
+ """Discover and load extractors from file system.
78
+
79
+ Raises:
80
+ ExtractorLoadError: when no extractors are found
81
+ """
82
+ # maco requires the extractor to be imported directly, so ensure they are available on the path
83
+ full_path_extractors = os.path.abspath(path_extractors)
84
+ full_path_above_extractors = os.path.dirname(full_path_extractors)
85
+ # Modify the PATH so we can recognize this new package on import
86
+ if full_path_extractors not in sys.path:
87
+ sys.path.insert(1, full_path_extractors)
88
+ if full_path_above_extractors not in sys.path:
89
+ sys.path.insert(1, full_path_above_extractors)
90
+
91
+ path_extractors = os.path.realpath(path_extractors)
92
+ self.path: str = path_extractors
93
+ self.extractors: Dict[str, ExtractorRegistration] = {}
94
+
95
+ with Manager() as manager:
96
+ extractors = manager.dict()
97
+ namespaced_rules = manager.dict()
98
+
99
+ def extractor_module_callback(module: ModuleType, venv: str):
100
+ members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
101
+ for member in members:
102
+ name, member = member
103
+ if exclude and name in exclude:
104
+ # Module is part of the exclusion list, skip
105
+ logger.debug(f"exclude excluded '{name}'")
106
+ return
107
+
108
+ if include and name not in include:
109
+ # Module wasn't part of the inclusion list, skip
110
+ logger.debug(f"include excluded '{name}'")
111
+ return
112
+
113
+ # initialise and register
114
+ logger.debug(f"register '{name}'")
115
+ extractors[name] = dict(
116
+ venv=venv,
117
+ module_path=module.__file__,
118
+ module_name=member.__module__,
119
+ extractor_class=member.__name__,
120
+ metadata={
121
+ "family": member.family,
122
+ "author": member.author,
123
+ "last_modified": member.last_modified,
124
+ "sharing": member.sharing,
125
+ "description": member.__doc__,
126
+ },
127
+ )
128
+ namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
129
+
130
+ # multiprocess logging is awkward - set up a queue to ensure we can log
131
+ logging_queue = Queue()
132
+ queue_handler = logging.handlers.QueueListener(logging_queue, *logging.getLogger().handlers)
133
+ queue_handler.start()
134
+
135
+ # Find the extractors within the given directory
136
+ # Execute within a child process to ensure main process interpreter is kept clean
137
+ p = Process(
138
+ target=utils.proxy_logging,
139
+ args=(
140
+ logging_queue,
141
+ utils.import_extractors,
142
+ extractor_module_callback,
143
+ ),
144
+ kwargs=dict(
145
+ root_directory=path_extractors,
146
+ scanner=yara.compile(source=utils.MACO_YARA_RULE),
147
+ create_venv=create_venv and os.path.isdir(path_extractors),
148
+ skip_install=skip_install,
149
+ ),
150
+ )
151
+ p.start()
152
+ p.join()
153
+
154
+ # stop multiprocess logging
155
+ queue_handler.stop()
156
+ logging_queue.close()
157
+
158
+ self.extractors = dict(extractors)
159
+ if not self.extractors:
160
+ raise ExtractorLoadError("no extractors were loaded")
161
+ logger.debug(f"found extractors {list(self.extractors.keys())}\n")
162
+
163
+ # compile yara rules gathered from extractors
164
+ self.rules = yara.compile(sources=dict(namespaced_rules))
165
+
166
+ def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
167
+ """Return extractors that should run based on yara rules."""
168
+ # execute yara rules on file to find extractors we should run
169
+ # yara can't run on a stream so we give it a bytestring
170
+ matches = self.rules.match(data=stream.read())
171
+ stream.seek(0)
172
+ if not matches:
173
+ return
174
+ # get all rules that hit for each extractor
175
+ runs = {}
176
+ for match in matches:
177
+ runs.setdefault(match.namespace, []).append(match)
178
+
179
+ return runs
180
+
181
+ def extract(
182
+ self,
183
+ stream: BinaryIO,
184
+ extractor_name: str,
185
+ ) -> Dict[str, Any]:
186
+ """Run extractor with stream and verify output matches the model.
187
+
188
+ Args:
189
+ stream (BinaryIO): Binary stream to analyze
190
+ extractor_name (str): Name of extractor to analyze stream
191
+
192
+ Returns:
193
+ (Dict[str, Any]): Results from extractor
194
+ """
195
+ extractor = self.extractors[extractor_name]
196
+ try:
197
+ # Run extractor on a copy of the sample
198
+ with NamedTemporaryFile() as sample_path:
199
+ sample_path.write(stream.read())
200
+ sample_path.flush()
201
+ # enforce types and verify properties, and remove defaults
202
+ return _verify_response(
203
+ utils.run_extractor(
204
+ sample_path.name,
205
+ module_name=extractor["module_name"],
206
+ extractor_class=extractor["extractor_class"],
207
+ module_path=extractor["module_path"],
208
+ venv=extractor["venv"],
209
+ )
210
+ )
211
+ except AnalysisAbortedException:
212
+ # Extractor voluntarily aborted analysis of sample
213
+ return
214
+ except Exception:
215
+ # caller can deal with the exception
216
+ raise
217
+ finally:
218
+ # make sure to reset where we are in the file
219
+ # otherwise follow on extractors are going to read 0 bytes
220
+ stream.seek(0)
@@ -0,0 +1,33 @@
1
+ """Exception classes for extractors."""
2
+
3
+
4
+ # Can be raised by extractors to abort analysis of a sample
5
+ # ie. Can abort if preliminary checks at start of run indicate the file shouldn't be analyzed by extractor
6
+ class AnalysisAbortedException(Exception):
7
+ """Raised when extractors voluntarily abort analysis of a sample."""
8
+
9
+ pass
10
+
11
+
12
+ class ExtractorLoadError(Exception):
13
+ """Raised when extractors cannot be loaded."""
14
+
15
+ pass
16
+
17
+
18
+ class InvalidExtractor(ValueError):
19
+ """Raised when an extractor is invalid."""
20
+
21
+ pass
22
+
23
+
24
+ class NoHitException(Exception):
25
+ """Raised when the YARA rule of an extractor doesn't hit."""
26
+
27
+ pass
28
+
29
+
30
+ class SyntaxError(Exception):
31
+ """Raised when there's a syntax error in the YARA rule."""
32
+
33
+ pass
@@ -0,0 +1,70 @@
1
+ """Base class for an extractor script."""
2
+
3
+ import logging
4
+ import textwrap
5
+ from typing import BinaryIO, List, Optional, Union
6
+
7
+ from maco import model, yara
8
+ from maco.exceptions import InvalidExtractor
9
+
10
+ DEFAULT_YARA_RULE = """
11
+ rule {name}
12
+ {{
13
+ condition:
14
+ true
15
+ }}
16
+ """
17
+
18
+
19
+ class Extractor:
20
+ """Base class for an analysis extractor with common entrypoint and metadata.
21
+
22
+ Override this docstring with a good description of your extractor.
23
+ """
24
+
25
+ family: Union[str, List[str]] = None # family or families of malware that is detected by the extractor
26
+ author: str = None # author of the extractor (name@organisation)
27
+ last_modified: str = None # last modified date (YYYY-MM-DD)
28
+ sharing: str = "TLP:WHITE" # who can this be shared with?
29
+ yara_rule: str = None # yara rule that we filter inputs with
30
+ reference: str = None # link to malware report or other reference information
31
+ logger: logging.Logger = None # logger for use when debugging
32
+
33
+ def __init__(self) -> None:
34
+ """Initialise the extractor.
35
+
36
+ Raises:
37
+ InvalidExtractor: When the extractor is invalid.
38
+ """
39
+ self.name = name = type(self).__name__
40
+ self.logger = logging.getLogger(f"maco.extractor.{name}")
41
+ self.logger.debug(f"initialise '{name}'")
42
+ if not self.family or not self.author or not self.last_modified:
43
+ raise InvalidExtractor("must set family, author, last_modified")
44
+ # if author does not set a yara rule, match on everything
45
+ if not self.yara_rule:
46
+ self.yara_rule = DEFAULT_YARA_RULE.format(name=name)
47
+ # unindent the yara rule from triple quoted string
48
+ # this is for friendly printing, yara handles the rule ok either way
49
+ self.yara_rule = textwrap.dedent(self.yara_rule)
50
+ # check yara rules conform to expected structure
51
+ # we throw away these compiled rules as we need all rules in system compiled together
52
+ try:
53
+ self.yara_compiled = yara.compile(source=self.yara_rule)
54
+ except yara.SyntaxError as e:
55
+ raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
56
+ # need to track which plugin owns the rules
57
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
58
+ if not len(list(self.yara_compiled)):
59
+ raise InvalidExtractor(f"{name} must define at least one yara rule")
60
+ for x in self.yara_compiled:
61
+ if x.is_global:
62
+ raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
63
+
64
+ def run(self, stream: BinaryIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
65
+ """Run the analysis process and return dict matching.
66
+
67
+ :param stream: file object from disk/network/memory.
68
+ :param match: yara rule match information contains locations of strings.
69
+ """
70
+ raise NotImplementedError()