PyPI - maco-extractor - Versions diffs - 1.2.18__py3-none-any.whl - Mend

maco-extractor 1.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

maco/__init__.py +0 -0
maco/exceptions.py +33 -0
maco/extractor.py +70 -0
maco/model/__init__.py +1 -0
maco/model/model.py +606 -0
maco/yara.py +129 -0
maco_extractor-1.2.18.dist-info/METADATA +283 -0
maco_extractor-1.2.18.dist-info/RECORD +11 -0
maco_extractor-1.2.18.dist-info/WHEEL +5 -0
maco_extractor-1.2.18.dist-info/licenses/LICENSE.md +11 -0
maco_extractor-1.2.18.dist-info/top_level.txt +1 -0

maco/__init__.py ADDED Viewed

File without changes

maco/exceptions.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Exception classes for extractors."""
+# Can be raised by extractors to abort analysis of a sample
+# ie. Can abort if preliminary checks at start of run indicate the file shouldn't be analyzed by extractor
+class AnalysisAbortedException(Exception):
+    """Raised when extractors voluntarily abort analysis of a sample."""
+    pass
+class ExtractorLoadError(Exception):
+    """Raised when extractors cannot be loaded."""
+    pass
+class InvalidExtractor(ValueError):
+    """Raised when an extractor is invalid."""
+    pass
+class NoHitException(Exception):
+    """Raised when the YARA rule of an extractor doesn't hit."""
+    pass
+class SyntaxError(Exception):
+    """Raised when there's a syntax error in the YARA rule."""
+    pass

maco/extractor.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Base class for an extractor script."""
+import logging
+import textwrap
+from typing import BinaryIO, List, Optional, Union
+from maco import model, yara
+from maco.exceptions import InvalidExtractor
+DEFAULT_YARA_RULE = """
+rule {name}
+{{
+    condition:
+        true
+}}
+"""
+class Extractor:
+    """Base class for an analysis extractor with common entrypoint and metadata.
+    Override this docstring with a good description of your extractor.
+    """
+    family: Union[str, List[str]] = None  # family or families of malware that is detected by the extractor
+    author: str = None  # author of the extractor (name@organisation)
+    last_modified: str = None  # last modified date (YYYY-MM-DD)
+    sharing: str = "TLP:WHITE"  # who can this be shared with?
+    yara_rule: str = None  # yara rule that we filter inputs with
+    reference: str = None  # link to malware report or other reference information
+    logger: logging.Logger = None  # logger for use when debugging
+    def __init__(self) -> None:
+        """Initialise the extractor.
+        Raises:
+            InvalidExtractor: When the extractor is invalid.
+        """
+        self.name = name = type(self).__name__
+        self.logger = logging.getLogger(f"maco.extractor.{name}")
+        self.logger.debug(f"initialise '{name}'")
+        if not self.family or not self.author or not self.last_modified:
+            raise InvalidExtractor("must set family, author, last_modified")
+        # if author does not set a yara rule, match on everything
+        if not self.yara_rule:
+            self.yara_rule = DEFAULT_YARA_RULE.format(name=name)
+        # unindent the yara rule from triple quoted string
+        # this is for friendly printing, yara handles the rule ok either way
+        self.yara_rule = textwrap.dedent(self.yara_rule)
+        # check yara rules conform to expected structure
+        # we throw away these compiled rules as we need all rules in system compiled together
+        try:
+            self.yara_compiled = yara.compile(source=self.yara_rule)
+        except yara.SyntaxError as e:
+            raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
+        # need to track which plugin owns the rules
+        self.yara_rule_names = [x.identifier for x in self.yara_compiled]
+        if not len(list(self.yara_compiled)):
+            raise InvalidExtractor(f"{name} must define at least one yara rule")
+        for x in self.yara_compiled:
+            if x.is_global:
+                raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
+    def run(self, stream: BinaryIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
+        """Run the analysis process and return dict matching.
+        :param stream: file object from disk/network/memory.
+        :param match: yara rule match information contains locations of strings.
+        """
+        raise NotImplementedError()

maco/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from maco.model.model import * # noqa: F403

maco/model/model.py ADDED Viewed

@@ -0,0 +1,606 @@
+"""Malware config extractor output model."""
+from enum import Enum
+from typing import Any, Dict, List, Optional, Union
+from pydantic import BaseModel, ConfigDict
+class ForbidModel(BaseModel):
+    """We want to forbid extra properties, so that the 'other' field is used instead."""
+    model_config = ConfigDict(extra="forbid", use_enum_values=True)
+class ConnUsageEnum(str, Enum):
+    """Purpose of the connection."""
+    c2 = "c2"  # issue commands to malware
+    upload = "upload"  # get data out of the network
+    download = "download"  # fetch dynamic config, second stage, etc
+    propagate = "propagate"  # spread through the network
+    tunnel = "tunnel"  # communicate through the network
+    ransom = "ransom"  # payment
+    decoy = "decoy"  # Decoy connections to obfuscate malicious
+    other = "other"
+class Encryption(ForbidModel):
+    """Encryption usage."""
+    class UsageEnum(str, Enum):
+        """Purpose of the encryption."""
+        config = "config"
+        communication = "communication"
+        binary = "binary"
+        ransom = "ransom"
+        other = "other"
+    algorithm: Optional[str] = None
+    public_key: Optional[str] = None
+    key: Optional[str] = None  # private key or symmetric key
+    provider: Optional[str] = None  # encryption library used. openssl, homebrew, etc.
+    mode: Optional[str] = None  # block vs stream
+    # base 64'd binary data for these details?
+    # TODO to confirm usage of these different properties
+    iv: Optional[str] = None  # initialisation vector
+    seed: Optional[str] = None
+    nonce: Optional[str] = None
+    constants: List[str] = []
+    usage: Optional[UsageEnum] = None
+class CategoryEnum(str, Enum):
+    """Category of the malware."""
+    # Software that shows you extra promotions that you cannot control as you use your PC.
+    # You wouldn't see the extra ads if you didn't have adware installed.
+    adware = "adware"
+    # Malware related to an Advanced Persistent Threat (APT) group.
+    apt = "apt"
+    # A backdoor Trojan gives malicious users remote control over the infected computer.
+    # They enable the author to do anything they wish on the infected computer including
+    # sending, receiving, launching and deleting files, displaying data and rebooting the computer.
+    # Backdoor Trojans are often used to unite a group of victim computers to form a botnet or
+    # zombie network that can be used for criminal purposes.
+    backdoor = "backdoor"
+    # Trojan Banker programs are designed to steal your account data for online banking systems,
+    # e-payment systems and credit or debit cards.
+    banker = "banker"
+    # A malware variant that modifies the boot sectors of a hard drive, including the Master Boot Record (MBR)
+    # and Volume Boot Record (VBR).
+    bootkit = "bootkit"
+    # A malicious bot is self-propagating malware designed to infect a host and connect back to a central server
+    # or servers that act as a command and control (C&C) center for an entire network of compromised devices,
+    # or botnet.
+    bot = "bot"
+    # A browser hijacker is defined as a form of unwanted software that modifies a web browser's settings without
+    # the user's permission. The result is the placement of unwanted advertising into the browser,
+    # and possibly the replacement of an existing home page or search page with the hijacker page.
+    browser_hijacker = "browser_hijacker"
+    # Trojan bruteforcer are trying to brute force website in order to achieve something else
+    # (EX: Finding  WordPress websites with default credentials).
+    bruteforcer = "bruteforcer"
+    # A type of trojan that can use your PC to 'click' on websites or applications.
+    # They are usually used to make money for a malicious hacker by clicking on online advertisements
+    # and making it look like the website gets more traffic than it does.
+    # They can also be used to skew online polls, install programs on your PC, or make unwanted software
+    # appear more popular than it is.
+    clickfraud = "clickfraud"
+    # Cryptocurrency mining malware.
+    cryptominer = "cryptominer"
+    # These programs conduct DoS (Denial of Service) attacks against a targeted web address.
+    # By sending multiple requests from your computer and several other infected computers,
+    # the attack can overwhelm the target address leading to a denial of service.
+    ddos = "ddos"
+    # Trojan Downloaders can download and install new versions of malicious programs in the target system.
+    downloader = "downloader"
+    # These programs are used by hackers in order to install malware or to prevent the detection of malicious programs.
+    dropper = "dropper"
+    # Exploit kits are programs that contain data or code that takes advantage of a vulnerability
+    # within an application that is running in the target system.
+    exploitkit = "exploitkit"
+    # Trojan FakeAV programs simulate the activity of antivirus software.
+    # They are designed to extort money in return for the detection and removal of threat, even though the
+    # threats that they report are actually non-existent.
+    fakeav = "fakeav"
+    # A type of tool that can be used to allow and maintain unauthorized access to your PC.
+    hacktool = "hacktool"
+    # A program that collects your personal information, such as your browsing history,
+    # and uses it without adequate consent.
+    infostealer = "infostealer"
+    # A keylogger monitors and logs every keystroke it can identify.
+    # Once installed, the virus either keeps track of all the keys and stores the information locally,
+    # after which the hacker needs physical access to the computer to retrieve the information,
+    # or the logs are sent over the internet back to the hacker.
+    keylogger = "keylogger"
+    # A program that loads another application / memory space.
+    loader = "loader"
+    # A type of malware that hides its code and purpose to make it more difficult for
+    # security software to detect or remove it.
+    obfuscator = "obfuscator"
+    # Point-of-sale malware is usually a type of malware that is used by cybercriminals to target point of sale (POS)
+    # and payment terminals with the intent to obtain credit card and debit card information.
+    pos = "pos"
+    # This type of trojan allows unauthorized parties to use the infected computer as a proxy server
+    # to access the Internet anonymously.
+    proxy = "proxy"
+    # A program that can be used by a remote hacker to gain access and control of an infected machine.
+    rat = "rat"
+    # This type of malware can modify data in the target computer so the operating system
+    # will stop running correctly or the data is no longer accessible.
+    # The criminal will only restore the computer state or data after a ransom is paid to them
+    # (mostly using cryptocurrency).
+    ransomware = "ransomware"
+    # A reverse proxy is a server that receives requests from the internet and forwards them to a small set of servers.
+    reverse_proxy = "reverse_proxy"
+    # Rootkits are designed to conceal certain objects or activities in the system.
+    # Often their main purpose is to prevent malicious programs being detected
+    # in order to extend the period in which programs can run on an infected computer.
+    rootkit = "rootkit"
+    # This type of malware scan the internet / network(s) / system(s) / service(s) to collect information.
+    # That information could be used later to perpetuate an cyber attack.
+    scanner = "scanner"
+    # Scareware is a form of malware which uses social engineering to cause shock, anxiety,
+    # or the perception of a threat in order to manipulate users into buying unwanted software.
+    scareware = "scareware"
+    # Malware that is sending spam.
+    spammer = "spammer"
+    # Generic or Unknown Trojan
+    trojan = "trojan"
+    # A generic computer virus
+    virus = "virus"
+    # A type of malware that destroy the data.
+    wiper = "wiper"
+    # A web shell is a script that can be uploaded to a web server to enable remote administration of the machine.
+    webshell = "webshell"
+    # A type of malware that spreads to other PCs.
+    worm = "worm"
+class ExtractorModel(ForbidModel):
+    r"""Captured config/iocs, unpacked binaries and other malware properties from a robo-analyst.
+    This model defines common fields for output of a script targeting a specific malware family.
+    Usage of this model will allow for easier sharing of scripts between different authors and systems.
+    The model will not define fields for all data that can be extracted from a binary, only the most common.
+    This is to make it easier for authors to understand and use the model.
+    This model can have new fields added in the future if they become more common,
+    but the intent is to avoid removing or modifying existing fields, for backwards compatibility.
+    Where data does not fit with the current model, the 'others' field should be used.
+    Contents in this field is not defined by the model and verification/normalisation is up to
+    the author and whatever systems run the scripts.
+    If many decoders define similar data in the 'others' field, that field should be migrated to this model.
+    The model must be kept relatively flat, with nested lists of dictionaries to be avoided.
+    This is to make queries simpler to write in sql, elasticsearch and other storage systems.
+    Malware and systems that investigate malware can do pretty much anything.
+    This model needs to be simple and flexible to make sharing easy.
+    Some things should be out of scope for this model.
+    Responsibility for these things are up to authors and systems that use this model.
+    Out of scope
+    * Verifying anything in the 'others' dict, including that it is json-compatible.
+        * We don't know anything about the structure
+        * checking is json compatible requires dumping to json string, which can be slow
+    * Connecting specific config items to malware behaviour catalog
+        * i.e. "Persistence::Modify Registry" with 'registry' item from model (SYSTEM\ControlSet001\Services\)
+        * due to complexity and normalisation difficulties
+        * much malware behaviour is not related to specific config items
+    * Normalisation/verification of individual properties
+        * i.e. lowercase filepaths - some filesystems are case sensitive
+        * i.e. checking registry hives match known - not enough SME and too complex for a simple model
+        * generally, this quickly becomes complex (validating a fully defined http item)
+        * calling systems are probably performing their own validation anyway
+    * requiring specific properties to be set
+        * i.e. if http item is defined, requiring hostname to be set
+        * Some use cases always seem to exist where a property should not be set
+    """
+    family: Union[str, List[str]]  # family or families of malware that was detected
+    version: Optional[str] = None  # version/variant of malware
+    category: List[CategoryEnum] = []  # capability/purpose of the malware
+    attack: List[str] = []  # mitre att&ck reference ids, e.g. 'T1129'
+    #
+    # simple config properties
+    #
+    # capabilities of the malware enabled/disabled in config
+    # note these are probably malware-specific capabilities so no attempt to normalise has been made
+    # note - av/sandbox detection should be noted by 'detect_<product>'
+    capability_enabled: List[str] = []
+    capability_disabled: List[str] = []
+    campaign_id: List[str] = []  # Server/Campaign Id for malware
+    identifier: List[str] = []  # UUID/Identifiers for deployed instance
+    decoded_strings: List[str] = []  # decoded strings from within malware
+    password: List[str] = []  # Any password extracted from the binary
+    mutex: List[str] = []  # mutex to prevent multiple instances
+    pipe: List[str] = []  # pipe name used for communication
+    sleep_delay: Optional[int] = None  # time to sleep/delay execution (milliseconds)
+    # additional time applied to sleep_delay (milliseconds).
+    # Jitter implementations can vary but usually it is a value from which a random number is generated and
+    # added/subtracted to the sleep_delay to make behaviour more unpredictable
+    sleep_delay_jitter: Optional[int] = None
+    inject_exe: List[str] = []  # name of executable to inject into
+    # configuration or clustering/research data that doesnt fit the other fields
+    # * rarely used by decoders or specific to one decoder
+    # to prevent key explosion, the keys must not be dynamically generated
+    # e.g. api_imports, api_checksums, num_imports, import_hash + many more
+    # data stored here must always be JSON-serialisable
+    other: Dict[str, Any] = {}
+    #
+    # embedded binary data
+    #
+    class Binary(ForbidModel):
+        """Binary data extracted by decoder."""
+        class TypeEnum(str, Enum):
+            """Type of binary data."""
+            payload = "payload"  # contained within the original file
+            config = "config"  # sometimes malware uses json/formatted text for config
+            other = "other"
+        datatype: Optional[TypeEnum] = None  # what the binary data is used for
+        data: bytes  # binary data, not json compatible
+        # other information for the extracted binary rather than the config
+        # data stored here must always be JSON-serialisable
+        # e.g. filename, extension, relationship label
+        other: Dict[str, Any] = {}
+        # convenience for ret.encryption.append(ret.Encryption(*properties))
+        # Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
+        class Encryption(Encryption):
+            """Encryption usage."""
+            pass
+        encryption: Union[List[Encryption], Encryption, None] = None  # encryption information for the binary
+    binaries: List[Binary] = []
+    #
+    # communication protocols
+    #
+    class FTP(ForbidModel):
+        """Usage of FTP connection."""
+        username: Optional[str] = None
+        password: Optional[str] = None
+        hostname: Optional[str] = None
+        port: Optional[int] = None
+        path: Optional[str] = None
+        usage: Optional[ConnUsageEnum] = None
+    ftp: List[FTP] = []
+    class SMTP(ForbidModel):
+        """Usage of SMTP."""
+        # credentials and location of server
+        username: Optional[str] = None
+        password: Optional[str] = None
+        hostname: Optional[str] = None
+        port: Optional[int] = None
+        mail_to: List[str] = []  # receivers
+        mail_from: Optional[str] = None  # sender
+        subject: Optional[str] = None
+        usage: Optional[ConnUsageEnum] = None
+    smtp: List[SMTP] = []  # SMTP server for malware
+    class Http(ForbidModel):
+        """Usage of HTTP connection."""
+        # malware sometimes does weird stuff with uris so we don't want to force
+        # authors to break the uri into username, hostname, path, etc.
+        # as we lose that information.
+        # e.g. extra '?' or '/' when unnecessary.
+        # or something that is technically an invalid uri but still works
+        uri: Optional[str] = None
+        # on the other hand we might not have enough info to construct a uri
+        protocol: Optional[str] = None  # http,https
+        username: Optional[str] = None
+        password: Optional[str] = None
+        hostname: Optional[str] = None  # (A host/hostname can be an IP, domain or hostname)
+        port: Optional[int] = None
+        path: Optional[str] = None
+        query: Optional[str] = None
+        fragment: Optional[str] = None
+        user_agent: Optional[str] = None  # user agent sent by malware
+        method: Optional[str] = None  # get put delete etc
+        headers: Optional[Dict[str, str]] = None  # custom/additional HTTP headers
+        max_size: Optional[int] = None
+        usage: Optional[ConnUsageEnum] = None
+    http: List[Http] = []
+    class SSH(ForbidModel):
+        """Usage of ssh connection."""
+        username: Optional[str] = None
+        password: Optional[str] = None
+        hostname: Optional[str] = None
+        port: Optional[int] = None
+        usage: Optional[ConnUsageEnum] = None
+    ssh: List[SSH] = []
+    class Proxy(ForbidModel):
+        """Usage of proxy connection."""
+        protocol: Optional[str] = None  # socks5,http
+        username: Optional[str] = None
+        password: Optional[str] = None
+        hostname: Optional[str] = None
+        port: Optional[int] = None
+        usage: Optional[ConnUsageEnum] = None
+    proxy: List[Proxy] = []
+    class ICMP(ForbidModel):
+        """Usage of ICMP."""
+        type: Optional[int] = None
+        code: Optional[int] = None
+        header: Optional[str] = None  # Some malware uses non-standard header fields
+        hostname: Optional[str] = None
+        usage: Optional[ConnUsageEnum] = None
+    icmp: List[ICMP] = []
+    #
+    # inter process communication (IPC)
+    #
+    class IPC(ForbidModel):
+        """Usage of named pipe communications."""
+        # A record stored on disk, or a record synthesized on demand by a file
+        # server, which can be accessed by multiple processes.
+        file: Optional[List[str]] = None
+        # Data sent over a network interface, either to a different process on
+        # the same computer or to another computer on the network. Stream
+        # oriented (TCP; data written through a socket requires formatting to
+        # preserve message boundaries) or more rarely message-oriented (UDP,
+        # SCTP).
+        socket: Optional[List[str]] = None
+        # Similar to an internet socket, but all communication occurs within
+        # the kernel. Domain sockets use the file system as their address
+        # space. Processes reference a domain socket as an inode, and multiple
+        # processes can communicate with one socket.
+        unix_domain_socket: Optional[List[str]] = None
+        # A file mapped to RAM and can be modified by changing memory
+        # addresses directly instead of outputting to a stream. This shares
+        # the same benefits as a standard file.
+        memory_mapped_file: Optional[Union[bytes, List[str]]] = None
+        # A data stream similar to a socket, but which usually preserves
+        # message boundaries. Typically implemented by the operating system,
+        # they allow multiple processes to read and write to the message queue
+        # without being directly connected to each other.
+        message_queue: Optional[List[str]] = None
+        # A unidirectional data channel using standard input and output. Data
+        # written to the write-end of the pipe is buffered by the operating
+        # system until it is read from the read-end of the pipe. Two-way
+        # communication between processes can be achieved by using two pipes
+        # in opposite "directions".
+        anonymous_pipe: Optional[List[str]] = None
+        # A pipe that is treated like a file. Instead of using standard input
+        # and output as with an anonymous pipe, processes write to and read
+        # from a named pipe, as if it were a regular file.
+        named_pipe: Optional[List[str]] = None
+        # The process names involved in the IPC communication
+        process_names: Optional[List[str]] = None
+        # Multiple processes are given access to the same block of memory,
+        # which creates a shared buffer for the processes to communicate with
+        # each other.
+        shared_memory: Optional[bytes] = None
+        usage: Optional[ConnUsageEnum] = None
+    ipc: List[IPC] = []  # Inter-Process Communications (similar to 'pipe' but more detailed)
+    class DNS(ForbidModel):
+        """Direct usage of DNS."""
+        class RecordTypeEnum(str, Enum):
+            """DNS record types."""
+            A = "A"
+            AAAA = "AAAA"
+            AFSDB = "AFSDB"
+            APL = "APL"
+            CAA = "CAA"
+            CDNSKEY = "CDNSKEY"
+            CDS = "CDS"
+            CERT = "CERT"
+            CNAME = "CNAME"
+            CSYNC = "CSYNC"
+            DHCID = "DHCID"
+            DLV = "DLV"
+            DNAME = "DNAME"
+            DNSKEY = "DNSKEY"
+            DS = "DS"
+            EUI48 = "EUI48"
+            EUI64 = "EUI64"
+            HINFO = "HINFO"
+            HIP = "HIP"
+            HTTPS = "HTTPS"
+            IPSECKEY = "IPSECKEY"
+            KEY = "KEY"
+            KX = "KX"
+            LOC = "LOC"
+            MX = "MX"
+            NAPTR = "NAPTR"
+            NS = "NS"
+            NSEC = "NSEC"
+            NSEC3 = "NSEC3"
+            NSEC3PARAM = "NSEC3PARAM"
+            OPENPGPKEY = "OPENPGPKEY"
+            PTR = "PTR"
+            RRSIG = "RRSIG"
+            RP = "RP"
+            SIG = "SIG"
+            SMIMEA = "SMIMEA"
+            SOA = "SOA"
+            SRV = "SRV"
+            SSHFP = "SSHFP"
+            SVCB = "SVCB"
+            TA = "TA"
+            TKEY = "TKEY"
+            TLSA = "TLSA"
+            TSIG = "TSIG"
+            TXT = "TXT"
+            URI = "URI"
+            ZONEMD = "ZONEMD"
+        ip: Optional[str] = None
+        port: Optional[int] = None  # The default value is 53
+        hostname: Optional[str] = None  # This is the query hostname
+        record_type: Optional[RecordTypeEnum] = None  # The DNS record type that is queried
+        usage: Optional[ConnUsageEnum] = None
+    dns: List[DNS] = []  # custom DNS address to use for name resolution
+    class Connection(ForbidModel):
+        """Generic TCP/UDP usage."""
+        client_ip: Optional[str] = None
+        client_port: Optional[int] = None
+        server_ip: Optional[str] = None
+        server_domain: Optional[str] = None
+        server_port: Optional[int] = None
+        usage: Optional[ConnUsageEnum] = None
+    tcp: List[Connection] = []
+    udp: List[Connection] = []
+    #
+    # complex configuration properties
+    #
+    # convenience for ret.encryption.append(ret.Encryption(*properties))
+    # Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
+    class Encryption(Encryption):
+        """Encryption usage."""
+        pass
+    encryption: List[Encryption] = []
+    class Service(ForbidModel):
+        """OS service usage by malware."""
+        dll: Optional[str] = None  # dll that the service is loaded from
+        name: Optional[str] = None  # service/driver name for persistence
+        display_name: Optional[str] = None  # display name for service
+        description: Optional[str] = None  # description for service
+    service: List[Service] = []
+    class Cryptocurrency(ForbidModel):
+        """Cryptocoin usage (ransomware/miner)."""
+        class UsageEnum(str, Enum):
+            """Cryptocoin usage."""
+            ransomware = "ransomware"  # request money to unlock
+            miner = "miner"  # use gpu/cpu to mint coins
+            other = "other"
+        coin: Optional[str] = None  # BTC,ETH,USDT,BNB, etc
+        address: Optional[str] = None
+        ransom_amount: Optional[float] = None  # number of coins required (if hardcoded)
+        usage: UsageEnum
+    cryptocurrency: List[Cryptocurrency] = []
+    class Path(ForbidModel):
+        """Path used by malware."""
+        class UsageEnum(str, Enum):
+            """Purpose of the path."""
+            c2 = "c2"  # file/folder issues commands to malware
+            config = "config"  # config is loaded from this path
+            install = "install"  # install directory/filename for malware
+            plugins = "plugins"  # load new capability from this directory
+            logs = "logs"  # location to log activity
+            storage = "storage"  # location to store/backup copied files
+            other = "other"
+        # C:\User\tmp\whatever.txt or /some/unix/folder/path
+        path: str
+        usage: Optional[UsageEnum] = None
+    paths: List[Path] = []  # files/directories used by malware
+    class Registry(ForbidModel):
+        """Registry usage by malware."""
+        class UsageEnum(str, Enum):
+            """Registry usage."""
+            persistence = "persistence"  # stay alive
+            store_data = "store_data"  # generated encryption keys or config
+            store_payload = "store_payload"  # malware hidden in registry key
+            read = "read"  # read system registry keys
+            other = "other"
+        key: str
+        usage: Optional[UsageEnum] = None
+    registry: List[Registry] = []

maco/yara.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""yara-python facade that uses yara-x."""
+import re
+from collections import namedtuple
+from itertools import cycle
+from typing import Dict, List, Union
+import yara_x
+from maco.exceptions import SyntaxError
+RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
+# Create interfaces that resembles yara-python (but is running yara-x under the hood)
+class StringMatchInstance:
+    """Instance of a string match."""
+    def __init__(self, match: yara_x.Match, file_content: bytes):
+        """Initializes StringMatchInstance."""
+        self.matched_data = file_content[match.offset : match.offset + match.length]
+        self.matched_length = match.length
+        self.offset = match.offset
+        self.xor_key = match.xor_key
+    def plaintext(self) -> bytes:
+        """Plaintext of the matched data.
+        Returns:
+            (bytes): Plaintext of the matched cipher text
+        """
+        if not self.xor_key:
+            # No need to XOR the matched data
+            return self.matched_data
+        else:
+            return bytes(c ^ k for c, k in zip(self.matched_data, cycle(self.xor_key)))
+class StringMatch:
+    """String match."""
+    def __init__(self, pattern: yara_x.Pattern, file_content: bytes):
+        """Initializes StringMatch."""
+        self.identifier = pattern.identifier
+        self.instances = [StringMatchInstance(match, file_content) for match in pattern.matches]
+        self._is_xor = any([match.xor_key for match in pattern.matches])
+    def is_xor(self):
+        """Checks if string match is xor'd.
+        Returns:
+            (bool): True if match is xor'd
+        """
+        return self._is_xor
+class Match:
+    """Match."""
+    def __init__(self, rule: yara_x.Rule, file_content: bytes):
+        """Initializes Match."""
+        self.rule = rule.identifier
+        self.namespace = rule.namespace
+        self.tags = list(rule.tags) or []
+        self.meta = dict()
+        # Ensure metadata doesn't get overwritten
+        for k, v in rule.metadata:
+            self.meta.setdefault(k, []).append(v)
+        self.strings = [StringMatch(pattern, file_content) for pattern in rule.patterns]
+class Rules:
+    """Rules."""
+    def __init__(self, source: str = None, sources: Dict[str, str] = None):
+        """Initializes Rules.
+        Raises:
+            SyntaxError: Raised when there's a syntax error in the YARA rule.
+        """
+        Rule = namedtuple("Rule", "identifier namespace is_global")
+        if source:
+            sources = {"default": source}
+        try:
+            self._rules = []
+            compiler = yara_x.Compiler(relaxed_re_syntax=True)
+            for namespace, source in sources.items():
+                compiler.new_namespace(namespace)
+                for rule_type, id in RULE_ID_RE.findall(source):
+                    is_global = True if rule_type == "global" else False
+                    self._rules.append(Rule(namespace=namespace, identifier=id, is_global=is_global))
+                compiler.add_source(source)
+            self.scanner = yara_x.Scanner(compiler.build())
+        except yara_x.CompileError as e:
+            raise SyntaxError(e)
+    def __iter__(self):
+        """Iterate over rules.
+        Yields:
+            YARA rules
+        """
+        for rule in self._rules:
+            yield rule
+    def match(self, filepath: str = None, data: Union[bytes, bytearray] = None) -> List[Match]:
+        """Performs a scan to check for YARA rules matches based on the file, either given by path or buffer.
+        Returns:
+            (List[Match]): A list of YARA matches.
+        """
+        if filepath:
+            with open(filepath, "rb") as fp:
+                data = fp.read()
+        if isinstance(data, bytearray):
+            data = bytes(data)
+        return [Match(m, data) for m in self.scanner.scan(data).matching_rules]
+def compile(source: str = None, sources: Dict[str, str] = None) -> Rules:
+    """Compiles YARA rules from source or from sources.
+    Returns:
+        (Rules): a Rules object
+    """
+    return Rules(source, sources)

maco_extractor-1.2.18.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,283 @@
+Metadata-Version: 2.4
+Name: maco-extractor
+Version: 1.2.18
+Summary: This package contains the essentials for creating Maco extractors and using them at runtime.
+Author: sl-govau
+Maintainer: cccs-rs
+License: MIT License
+        Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
+        Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
+        Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Project-URL: Repository, https://github.com/CybercentreCanada/Maco
+Project-URL: Issues, https://github.com/CybercentreCanada/Maco/issues
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE.md
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: yara-x
+Dynamic: license-file
+# Maco - Malware config extractor framework
+## Maco is a framework for <ins>ma</ins>lware <ins>co</ins>nfig extractors.
+It aims to solve two problems:
+- Define a standardize ontology (or model) for extractor output. This greatly helps for databasing extracted values.
+- Provide a standard way of identifying which parsers to run and how to execute them.
+## Maco components
+- `model.py`
+  - A data model for the common output of an extractor
+- `extractor.py`
+  - Base class for extractors to implement
+- `collector.py`
+  - Utilities for loading and running extractors
+- `cli.py`
+  - A CLI tool `maco` to assist with running your extractors locally
+- `base_test.py`
+  - Assist with writing unit tests for your extractors
+**Note: If you're interested in using only the model in your project, you can `pip install maco-model` which is a smaller package containing only the model definition**
+## Project Integrations 🛠️
+This framework is actively being used by:
+|                                                                                                              Project                                                                                                               | Description                                                                                                                                                                                                                                                   |                                                                                License                                                                                 |
+| :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|        <a href="https://cybercentrecanada.github.io/assemblyline4_docs/"><img src="https://images.weserv.nl/?url=cybercentrecanada.github.io/assemblyline4_docs/images/crane.png?v=4&h=100&w=100&fit=cover&maxage=7d"></a>         | A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format                                                                                                                    |       [![License](https://img.shields.io/github/license/CybercentreCanada/assemblyline)](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md)       |
+|                                                                           [configextractor-py](https://github.com/CybercentreCanada/configextractor-py)                                                                            | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization                                                                                                                                                   | [![License](https://img.shields.io/github/license/CybercentreCanada/configextractor-py)](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
+| <a href="https://github.com/jeFF0Falltrades/rat_king_parser"><img src="https://images.weserv.nl/?url=raw.githubusercontent.com/jeFF0Falltrades/rat_king_parser/master/.github/logo.png?v=4&h=100&w=100&fit=cover&maxage=7d"/> </a> | A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO                                                                                                                                                      |      [![License](https://img.shields.io/github/license/jeFF0Falltrades/rat_king_parser)](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE)       |
+|                           <a href="https://github.com/CAPESandbox/community"><img src="https://images.weserv.nl/?url=github.com/CAPESandbox.png?v=4&h=100&w=100&fit=cover&maxage=7d0&mask=circle"/> </a>                           | A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.<br>**Note: These MACO extractors wrap and parse the original CAPE extractors.** |                  [![License](https://img.shields.io/badge/license-GPL--3.0-informational)](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE)                   |
+## Model Example
+See [the model definition](https://github.com/CybercentreCanada/Maco/blob/0f447a66de5e5ce8770ef3fe2325aec002842e63/maco/model.py#L127) for all the supported fields.
+You can use the model independently of the rest of the framework.
+This is still useful for compatibility between systems!
+```python
+from maco import model
+# 'family' is the only required property on the model
+output = model.ExtractorModel(family="wanabee")
+output.version = "2019"  # variant first found in 2019
+output.category.extend([model.CategoryEnum.cryptominer, model.CategoryEnum.clickfraud])
+output.http.append(model.ExtractorModel.Http(protocol="https",
+                                             uri="https://bad-domain.com/c2_payload",
+                                             usage="c2"))
+output.tcp.append(model.ExtractorModel.Connection(server_ip="127.0.0.1",
+                                           usage="ransom"))
+output.campaign_id.append("859186-3224-9284")
+output.inject_exe.append("explorer.exe")
+output.binaries.append(
+    output.Binary(
+        data=b"sam I am",
+        datatype=output.Binary.TypeEnum.config,
+        encryption=output.Binary.Encryption(
+            algorithm="rot26",
+            mode="block",
+        ),
+    )
+)
+# data about the malware that doesn't fit the model
+output.other["author_lunch"] = "green eggs and ham"
+output.other["author_lunch_time"] = "3pm"
+print(output.model_dump(exclude_defaults=True))
+# Generated model
+{
+    'family': 'wanabee',
+    'version': '2019',
+    'category': ['cryptominer', 'clickfraud'],
+    'campaign_id': ['859186-3224-9284'],
+    'inject_exe': ['explorer.exe'],
+    'other': {'author_lunch': 'green eggs and ham', 'author_lunch_time': '3pm'},
+    'http': [{'uri': 'https://bad-domain.com/c2_payload', 'usage': 'c2', 'protocol': 'https'}],
+    'tcp': [{'server_ip': '127.0.0.1', 'usage': 'ransom'}],
+    'binaries': [{
+        'datatype': 'config', 'data': b'sam I am',
+        'encryption': {'algorithm': 'rot26', 'mode': 'block'}
+    }]
+}
+```
+And you can create model instances from dictionaries:
+```python
+from maco import model
+output = {
+    "family": "wanabee2",
+    "version": "2022",
+    "ssh": [
+        {
+            "username": "wanna",
+            "password": "bee2",
+            "hostname": "10.1.10.100",
+        }
+    ],
+}
+print(model.ExtractorModel(**output))
+# Generated model
+family='wanabee2' version='2022' category=[] attack=[] capability_enabled=[]
+capability_disabled=[] campaign_id=[] identifier=[] decoded_strings=[]
+password=[] mutex=[] pipe=[] sleep_delay=None inject_exe=[] other={}
+binaries=[] ftp=[] smtp=[] http=[]
+ssh=[SSH(username='wanna', password='bee2', hostname='10.1.10.100', port=None, usage=None)]
+proxy=[] dns=[] tcp=[] udp=[] encryption=[] service=[] cryptocurrency=[]
+paths=[] registry=[]
+```
+## Extractor Example
+The following extractor will trigger on any file with more than 50 ELF sections,
+and set some properties in the model.
+Your extractors will do a better job of finding useful information than this one!
+```python
+class Elfy(extractor.Extractor):
+    """Check basic elf property."""
+    family = "elfy"
+    author = "blue"
+    last_modified = "2022-06-14"
+    yara_rule = """
+        import "elf"
+        rule Elfy
+        {
+            condition:
+                elf.number_of_sections > 50
+        }
+        """
+    def run(
+        self, stream: BytesIO, matches: List[yara.Match]
+    ) -> Optional[model.ExtractorModel]:
+        # return config model formatted results
+        ret = model.ExtractorModel(family=self.family)
+        # the list for campaign_id already exists and is empty, so we just add an item
+        ret.campaign_id.append(str(len(stream.read())))
+        return ret
+```
+## Writing Extractors
+There are several examples that use Maco in the '`demo_extractors`' folder.
+Some things to keep in mind:
+- The Yara rule names must be prefixed with the extractor class name.
+  - e.g. Class 'MyScript' has Yara rules named 'MyScriptDetect1' and 'MyScriptDetect2', not 'Detect1'
+- You can load other scripts contained within the same folder via a Python relative import
+  - See `complex.py` for details
+- You can standardise your usage of the '`other`' dict
+  - This is optional, see `limit_other.py` for details
+  - Consider instead making a PR with the properties you are frequently using
+# Requirements
+Python 3.8+.
+Install this package with `pip install maco`.
+All required Python packages are in the `requirements.txt`.
+# CLI Usage
+```bash
+> maco --help
+usage: maco [-h] [-v] [--pretty] [--base64] [--logfile LOGFILE] [--include INCLUDE] [--exclude EXCLUDE] [-f] [--create_venv] extractors samples
+Run extractors over samples.
+positional arguments:
+  extractors         path to extractors
+  samples            path to samples
+optional arguments:
+  -h, --help         show this help message and exit
+  -v, --verbose      print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug
+  --pretty           pretty print json output
+  --base64           Include base64 encoded binary data in output (can be large, consider printing to file rather than console)
+  --logfile LOGFILE  file to log output
+  --include INCLUDE  comma separated extractors to run
+  --exclude EXCLUDE  comma separated extractors to not run
+  -f, --force        ignore yara rules and execute all extractors
+  --create_venv      Creates venvs for every requirements.txt found (only applies when extractor path is a directory)
+```
+## CLI output example
+The CLI is helpful for using your extractors in a standalone system, such as in a reverse engineering environment.
+```bash
+> maco demo_extractors/ /usr/lib --include Complex
+extractors loaded: ['Complex']
+complex by blue 2022-06-14 TLP:WHITE
+This script has multiple yara rules and coverage of the data model.
+path: /usr/lib/udev/hwdb.bin
+run Complex extractor from rules ['ComplexAlt']
+{"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
+"binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
+"encryption": {"algorithm": "something"}}],
+"http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/9956330", "usage": "c2"}],
+"encryption": [{"algorithm": "sha256"}]}
+path: /usr/lib/udev/hwdb.d/20-OUI.hwdb
+run Complex extractor from rules ['ComplexAlt']
+{"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
+"binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
+"encryption": {"algorithm": "something"}}],
+"http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1986908", "usage": "c2"}],
+"encryption": [{"algorithm": "sha256"}]}
+path: /usr/lib/udev/hwdb.d/20-usb-vendor-model.hwdb
+run Complex extractor from rules ['ComplexAlt']
+{"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
+"binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
+"encryption": {"algorithm": "something"}}],
+"http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1257481", "usage": "c2"}],
+"encryption": [{"algorithm": "sha256"}]}
+15884 analysed, 3 hits, 3 extracted
+```
+The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
+e.g. `maco demo_extractors demo_extractors`
+# Contributions
+Please use ruff to format and lint PRs. This may be the cause of PR test failures.
+Ruff will attempt to fix most issues, but some may require manual resolution.
+```
+pip install ruff
+ruff format
+ruff check --fix
+```

maco_extractor-1.2.18.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+maco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+maco/exceptions.py,sha256=XBHUrs1kr1ZayPI9B_W-WejKgVmC8sWL_o4RL0b4DQE,745
+maco/extractor.py,sha256=s36aGcsXSc-9iCik6iihVt5G1a1DZUA7TquvWYQNwdE,2912
+maco/yara.py,sha256=y141t8NqDDXHY23uE1d6BDPeNmSuUuohR6Yr_LKa7GI,4067
+maco/model/__init__.py,sha256=ULdyHx8R5D2ICHZo3VoCk1YTlewTok36TYIpwx__pNY,45
+maco/model/model.py,sha256=DBHTmZXMzjpVq0s2mzZv3VCzPhwPnv7sH6u_QZCTcA4,24484
+maco_extractor-1.2.18.dist-info/licenses/LICENSE.md,sha256=gMSjshPhXvV_F1qxmeNkKdBqGWkd__fEJf4glS504bM,1478
+maco_extractor-1.2.18.dist-info/METADATA,sha256=-Hfk91VNhYm_ZIbqs8ke1qffq5eX4_nPeSvw5COYLAo,15208
+maco_extractor-1.2.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+maco_extractor-1.2.18.dist-info/top_level.txt,sha256=JTYRldTIdoZJHXQU2LH0AKgD6Hm_azz5f_kOLuBorFU,5
+maco_extractor-1.2.18.dist-info/RECORD,,

maco_extractor-1.2.18.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

maco_extractor-1.2.18.dist-info/licenses/LICENSE.md ADDED Viewed

@@ -0,0 +1,11 @@
+MIT License
+Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
+Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

maco_extractor-1.2.18.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ maco