maco 1.2.4__py3-none-any.whl → 1.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  from io import BytesIO
2
2
  from typing import List, Optional
3
3
 
4
- from maco import extractor, model, yara
5
-
6
4
  from demo_extractors.complex import complex_utils
5
+ from maco import extractor, model, yara
7
6
 
8
7
 
9
8
  class Complex(extractor.Extractor):
demo_extractors/elfy.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
 
4
4
  from maco import extractor, model, yara
5
5
 
@@ -1,9 +1,8 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
3
-
4
- from maco import extractor, model, yara
2
+ from typing import List, Optional
5
3
 
6
4
  from demo_extractors import shared
5
+ from maco import extractor, model, yara
7
6
 
8
7
 
9
8
  class LimitOther(extractor.Extractor):
@@ -24,6 +23,14 @@ class LimitOther(extractor.Extractor):
24
23
  """
25
24
 
26
25
  def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
26
+ # import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
27
+ # the tests that do direct importing
28
+ import httpx
29
+
30
+ # use httpx so it doesn't get deleted by auto linter
31
+ if not httpx.__name__:
32
+ raise Exception("wow I really want to use this library in a useful way")
33
+
27
34
  # use a custom model that inherits from ExtractorModel
28
35
  # this model defines what can go in the 'other' dict
29
36
  tmp = shared.MyCustomModel(family="specify_other")
@@ -1,5 +1,5 @@
1
1
  from io import BytesIO
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
 
4
4
  from maco import extractor, model, yara
5
5
 
@@ -0,0 +1 @@
1
+ httpx
demo_extractors/shared.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from typing import Optional
2
+
2
3
  import pydantic
3
4
 
4
5
  from maco import model
maco/base_test.py CHANGED
@@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
32
32
  # I recommend something like os.path.join(__file__, "../../extractors")
33
33
  # if your extractors are in a folder 'extractors' next to a folder of tests
34
34
  path: str = None
35
+ create_venv: bool = False
35
36
 
36
- def setUp(self) -> None:
37
- if not self.name or not self.path:
37
+ @classmethod
38
+ def setUpClass(cls) -> None:
39
+ if not cls.name or not cls.path:
38
40
  raise Exception("name and path must be set")
39
- self.c = collector.Collector(self.path, include=[self.name])
41
+ cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
42
+ return super().setUpClass()
43
+
44
+ def test_default_metadata(self):
45
+ """Require extractor to be loadable and valid."""
40
46
  self.assertIn(self.name, self.c.extractors)
41
47
  self.assertEqual(len(self.c.extractors), 1)
42
- return super().setUp()
43
48
 
44
49
  def extract(self, stream):
45
50
  """Return results for running extractor over stream, including yara check."""
@@ -49,18 +54,20 @@ class BaseTest(unittest.TestCase):
49
54
  resp = self.c.extract(stream, self.name)
50
55
  return resp
51
56
 
52
- def _get_location(self) -> str:
57
+ @classmethod
58
+ def _get_location(cls) -> str:
53
59
  """Return path to child class that implements this class."""
54
60
  # import child module
55
- module = type(self).__module__
61
+ module = cls.__module__
56
62
  i = importlib.import_module(module)
57
63
  # get location to child module
58
64
  return i.__file__
59
65
 
60
- def load_cart(self, filepath: str) -> io.BytesIO:
66
+ @classmethod
67
+ def load_cart(cls, filepath: str) -> io.BytesIO:
61
68
  """Load and unneuter a test file (likely malware) into memory for processing."""
62
69
  # it is nice if we can load files relative to whatever is implementing base_test
63
- dirpath = os.path.split(self._get_location())[0]
70
+ dirpath = os.path.split(cls._get_location())[0]
64
71
  # either filepath is absolute, or should be loaded relative to child of base_test
65
72
  filepath = os.path.join(dirpath, filepath)
66
73
  if not os.path.isfile(filepath):
maco/cli.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """CLI example of how extractors can be executed."""
2
+
2
3
  import argparse
3
4
  import base64
4
5
  import binascii
@@ -150,6 +151,7 @@ def process_filesystem(
150
151
  logger.info(f"{num_analysed} analysed, {num_hits} hits, {num_extracted} extracted")
151
152
  return num_analysed, num_hits, num_extracted
152
153
 
154
+
153
155
  def main():
154
156
  parser = argparse.ArgumentParser(description="Run extractors over samples.")
155
157
  parser.add_argument("extractors", type=str, help="path to extractors")
@@ -165,7 +167,8 @@ def main():
165
167
  parser.add_argument(
166
168
  "--base64",
167
169
  action="store_true",
168
- help="Include base64 encoded binary data in output (can be large, consider printing to file rather than console)",
170
+ help="Include base64 encoded binary data in output "
171
+ "(can be large, consider printing to file rather than console)",
169
172
  )
170
173
  parser.add_argument("--logfile", type=str, help="file to log output")
171
174
  parser.add_argument("--include", type=str, help="comma separated extractors to run")
@@ -179,7 +182,9 @@ def main():
179
182
  parser.add_argument(
180
183
  "--create_venv",
181
184
  action="store_true",
182
- help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
185
+ help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). "
186
+ "This runs much slower than the alternative but may be necessary "
187
+ "when there are many extractors with conflicting dependencies.",
183
188
  )
184
189
  args = parser.parse_args()
185
190
  inc = args.include.split(",") if args.include else []
@@ -225,7 +230,7 @@ def main():
225
230
  pretty=args.pretty,
226
231
  force=args.force,
227
232
  include_base64=args.base64,
228
- create_venv=args.create_venv
233
+ create_venv=args.create_venv,
229
234
  )
230
235
 
231
236
 
maco/collector.py CHANGED
@@ -4,6 +4,7 @@ import inspect
4
4
  import logging
5
5
  import logging.handlers
6
6
  import os
7
+ import sys
7
8
  from multiprocessing import Manager, Process, Queue
8
9
  from tempfile import NamedTemporaryFile
9
10
  from types import ModuleType
@@ -48,6 +49,15 @@ class Collector:
48
49
  create_venv: bool = False,
49
50
  ):
50
51
  """Discover and load extractors from file system."""
52
+ # maco requires the extractor to be imported directly, so ensure they are available on the path
53
+ full_path_extractors = os.path.abspath(path_extractors)
54
+ full_path_above_extractors = os.path.dirname(full_path_extractors)
55
+ # Modify the PATH so we can recognize this new package on import
56
+ if full_path_extractors not in sys.path:
57
+ sys.path.insert(1, full_path_extractors)
58
+ if full_path_above_extractors not in sys.path:
59
+ sys.path.insert(1, full_path_above_extractors)
60
+
51
61
  path_extractors = os.path.realpath(path_extractors)
52
62
  self.path: str = path_extractors
53
63
  self.extractors: Dict[str, Dict[str, str]] = {}
@@ -89,7 +99,7 @@ class Collector:
89
99
 
90
100
  # multiprocess logging is awkward - set up a queue to ensure we can log
91
101
  logging_queue = Queue()
92
- queue_handler = logging.handlers.QueueListener(logging_queue,*logging.getLogger().handlers)
102
+ queue_handler = logging.handlers.QueueListener(logging_queue, *logging.getLogger().handlers)
93
103
  queue_handler.start()
94
104
 
95
105
  # Find the extractors within the given directory
maco/extractor.py CHANGED
@@ -51,14 +51,14 @@ class Extractor:
51
51
  # check yara rules conform to expected structure
52
52
  # we throw away these compiled rules as we need all rules in system compiled together
53
53
  try:
54
- rules = yara.compile(source=self.yara_rule)
54
+ self.yara_compiled = yara.compile(source=self.yara_rule)
55
55
  except yara.SyntaxError as e:
56
56
  raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
57
57
  # need to track which plugin owns the rules
58
- self.yara_rule_names = [x.identifier for x in rules]
59
- if not len(list(rules)):
58
+ self.yara_rule_names = [x.identifier for x in self.yara_compiled]
59
+ if not len(list(self.yara_compiled)):
60
60
  raise InvalidExtractor(f"{name} must define at least one yara rule")
61
- for x in rules:
61
+ for x in self.yara_compiled:
62
62
  if x.is_global:
63
63
  raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
64
64
 
maco/model/__init__.py CHANGED
@@ -1 +1 @@
1
- from maco.model.model import *
1
+ from maco.model.model import * # noqa: F403
maco/model/model.py CHANGED
@@ -59,31 +59,48 @@ class CategoryEnum(str, Enum):
59
59
  # Malware related to an Advanced Persistent Threat (APT) group.
60
60
  apt = "apt"
61
61
 
62
- # A backdoor Trojan gives malicious users remote control over the infected computer. They enable the author to do anything they wish on the infected computer including sending, receiving, launching and deleting files, displaying data and rebooting the computer. Backdoor Trojans are often used to unite a group of victim computers to form a botnet or zombie network that can be used for criminal purposes.
62
+ # A backdoor Trojan gives malicious users remote control over the infected computer.
63
+ # They enable the author to do anything they wish on the infected computer including
64
+ # sending, receiving, launching and deleting files, displaying data and rebooting the computer.
65
+ # Backdoor Trojans are often used to unite a group of victim computers to form a botnet or
66
+ # zombie network that can be used for criminal purposes.
63
67
  backdoor = "backdoor"
64
68
 
65
- # Trojan Banker programs are designed to steal your account data for online banking systems, e-payment systems and credit or debit cards.
69
+ # Trojan Banker programs are designed to steal your account data for online banking systems,
70
+ # e-payment systems and credit or debit cards.
66
71
  banker = "banker"
67
72
 
68
- # A malware variant that modifies the boot sectors of a hard drive, including the Master Boot Record (MBR) and Volume Boot Record (VBR).
73
+ # A malware variant that modifies the boot sectors of a hard drive, including the Master Boot Record (MBR)
74
+ # and Volume Boot Record (VBR).
69
75
  bootkit = "bootkit"
70
76
 
71
- # A malicious bot is self-propagating malware designed to infect a host and connect back to a central server or servers that act as a command and control (C&C) center for an entire network of compromised devices, or botnet.
77
+ # A malicious bot is self-propagating malware designed to infect a host and connect back to a central server
78
+ # or servers that act as a command and control (C&C) center for an entire network of compromised devices,
79
+ # or botnet.
72
80
  bot = "bot"
73
81
 
74
- # A browser hijacker is defined as a form of unwanted software that modifies a web browser's settings without the user's permission. The result is the placement of unwanted advertising into the browser, and possibly the replacement of an existing home page or search page with the hijacker page.
82
+ # A browser hijacker is defined as a form of unwanted software that modifies a web browser's settings without
83
+ # the user's permission. The result is the placement of unwanted advertising into the browser,
84
+ # and possibly the replacement of an existing home page or search page with the hijacker page.
75
85
  browser_hijacker = "browser_hijacker"
76
86
 
77
- # Trojan bruteforcer are trying to brute force website in order to achieve something else (EX: Finding WordPress websites with default credentials).
87
+ # Trojan bruteforcer are trying to brute force website in order to achieve something else
88
+ # (EX: Finding WordPress websites with default credentials).
78
89
  bruteforcer = "bruteforcer"
79
90
 
80
- # A type of trojan that can use your PC to 'click' on websites or applications. They are usually used to make money for a malicious hacker by clicking on online advertisements and making it look like the website gets more traffic than it does. They can also be used to skew online polls, install programs on your PC, or make unwanted software appear more popular than it is.
91
+ # A type of trojan that can use your PC to 'click' on websites or applications.
92
+ # They are usually used to make money for a malicious hacker by clicking on online advertisements
93
+ # and making it look like the website gets more traffic than it does.
94
+ # They can also be used to skew online polls, install programs on your PC, or make unwanted software
95
+ # appear more popular than it is.
81
96
  clickfraud = "clickfraud"
82
97
 
83
98
  # Cryptocurrency mining malware.
84
99
  cryptominer = "cryptominer"
85
100
 
86
- # These programs conduct DoS (Denial of Service) attacks against a targeted web address. By sending multiple requests from your computer and several other infected computers, the attack can overwhelm the target address leading to a denial of service.
101
+ # These programs conduct DoS (Denial of Service) attacks against a targeted web address.
102
+ # By sending multiple requests from your computer and several other infected computers,
103
+ # the attack can overwhelm the target address leading to a denial of service.
87
104
  ddos = "ddos"
88
105
 
89
106
  # Trojan Downloaders can download and install new versions of malicious programs in the target system.
@@ -92,49 +109,66 @@ class CategoryEnum(str, Enum):
92
109
  # These programs are used by hackers in order to install malware or to prevent the detection of malicious programs.
93
110
  dropper = "dropper"
94
111
 
95
- # Exploit kits are programs that contain data or code that takes advantage of a vulnerability within an application that is running in the target system.
112
+ # Exploit kits are programs that contain data or code that takes advantage of a vulnerability
113
+ # within an application that is running in the target system.
96
114
  exploitkit = "exploitkit"
97
115
 
98
- # Trojan FakeAV programs simulate the activity of antivirus software. They are designed to extort money in return for the detection and removal of threat, even though the threats that they report are actually non-existent.
116
+ # Trojan FakeAV programs simulate the activity of antivirus software.
117
+ # They are designed to extort money in return for the detection and removal of threat, even though the
118
+ # threats that they report are actually non-existent.
99
119
  fakeav = "fakeav"
100
120
 
101
121
  # A type of tool that can be used to allow and maintain unauthorized access to your PC.
102
122
  hacktool = "hacktool"
103
123
 
104
- # A program that collects your personal information, such as your browsing history, and uses it without adequate consent.
124
+ # A program that collects your personal information, such as your browsing history,
125
+ # and uses it without adequate consent.
105
126
  infostealer = "infostealer"
106
127
 
107
- # A keylogger monitors and logs every keystroke it can identify. Once installed, the virus either keeps track of all the keys and stores the information locally, after which the hacker needs physical access to the computer to retrieve the information, or the logs are sent over the internet back to the hacker.
128
+ # A keylogger monitors and logs every keystroke it can identify.
129
+ # Once installed, the virus either keeps track of all the keys and stores the information locally,
130
+ # after which the hacker needs physical access to the computer to retrieve the information,
131
+ # or the logs are sent over the internet back to the hacker.
108
132
  keylogger = "keylogger"
109
133
 
110
134
  # A program that loads another application / memory space.
111
135
  loader = "loader"
112
136
 
113
- # A type of malware that hides its code and purpose to make it more difficult for security software to detect or remove it.
137
+ # A type of malware that hides its code and purpose to make it more difficult for
138
+ # security software to detect or remove it.
114
139
  obfuscator = "obfuscator"
115
140
 
116
- # Point-of-sale malware is usually a type of malware that is used by cybercriminals to target point of sale (POS) and payment terminals with the intent to obtain credit card and debit card information.
141
+ # Point-of-sale malware is usually a type of malware that is used by cybercriminals to target point of sale (POS)
142
+ # and payment terminals with the intent to obtain credit card and debit card information.
117
143
  pos = "pos"
118
144
 
119
- # This type of trojan allows unauthorized parties to use the infected computer as a proxy server to access the Internet anonymously.
145
+ # This type of trojan allows unauthorized parties to use the infected computer as a proxy server
146
+ # to access the Internet anonymously.
120
147
  proxy = "proxy"
121
148
 
122
149
  # A program that can be used by a remote hacker to gain access and control of an infected machine.
123
150
  rat = "rat"
124
151
 
125
- # This type of malware can modify data in the target computer so the operating system will stop running correctly or the data is no longer accessible. The criminal will only restore the computer state or data after a ransom is paid to them (mostly using cryptocurrency).
152
+ # This type of malware can modify data in the target computer so the operating system
153
+ # will stop running correctly or the data is no longer accessible.
154
+ # The criminal will only restore the computer state or data after a ransom is paid to them
155
+ # (mostly using cryptocurrency).
126
156
  ransomware = "ransomware"
127
157
 
128
158
  # A reverse proxy is a server that receives requests from the internet and forwards them to a small set of servers.
129
159
  reverse_proxy = "reverse_proxy"
130
160
 
131
- # Rootkits are designed to conceal certain objects or activities in the system. Often their main purpose is to prevent malicious programs being detected in order to extend the period in which programs can run on an infected computer.
161
+ # Rootkits are designed to conceal certain objects or activities in the system.
162
+ # Often their main purpose is to prevent malicious programs being detected
163
+ # in order to extend the period in which programs can run on an infected computer.
132
164
  rootkit = "rootkit"
133
165
 
134
- # This type of malware scan the internet / network(s) / system(s) / service(s) to collect information. That information could be used later to perpetuate an cyber attack.
166
+ # This type of malware scan the internet / network(s) / system(s) / service(s) to collect information.
167
+ # That information could be used later to perpetuate an cyber attack.
135
168
  scanner = "scanner"
136
169
 
137
- # Scareware is a form of malware which uses social engineering to cause shock, anxiety, or the perception of a threat in order to manipulate users into buying unwanted software.
170
+ # Scareware is a form of malware which uses social engineering to cause shock, anxiety,
171
+ # or the perception of a threat in order to manipulate users into buying unwanted software.
138
172
  scareware = "scareware"
139
173
 
140
174
  # Malware that is sending spam.