reait 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +3 -0
- reait/api.py +429 -174
- reait/main.py +246 -130
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/METADATA +45 -21
- reait-0.0.20.dist-info/RECORD +9 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/WHEEL +1 -1
- reait-0.0.18.dist-info/RECORD +0 -9
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/LICENSE +0 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/entry_points.txt +0 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/top_level.txt +0 -0
    
        reait/api.py
    CHANGED
    
    | @@ -1,307 +1,561 @@ | |
| 1 1 | 
             
            #!/usr/bin/env python
         | 
| 2 | 
            -
             | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            from __future__ import print_function, annotations
         | 
| 4 | 
            +
             | 
| 3 5 | 
             
            from hashlib import sha256
         | 
| 4 | 
            -
             | 
| 6 | 
            +
             | 
| 5 7 | 
             
            from sklearn.metrics.pairwise import cosine_similarity
         | 
| 6 | 
            -
            import  | 
| 7 | 
            -
            import  | 
| 8 | 
            -
            import argparse
         | 
| 8 | 
            +
            from os.path import basename, exists, expanduser
         | 
| 9 | 
            +
            from requests import request, Response, HTTPError
         | 
| 9 10 | 
             
            import requests
         | 
| 10 | 
            -
            from numpy import array, vstack,  | 
| 11 | 
            +
            from numpy import array, vstack, dot, arccos, pi
         | 
| 11 12 | 
             
            from pandas import DataFrame
         | 
| 12 13 | 
             
            import json
         | 
| 13 14 | 
             
            import tomli
         | 
| 14 | 
            -
             | 
| 15 | 
            -
            from  | 
| 16 | 
            -
            from IPython import embed
         | 
| 17 | 
            -
            import lief
         | 
| 15 | 
            +
            import logging
         | 
| 16 | 
            +
            from lief import parse, ELF, PE, MachO
         | 
| 18 17 |  | 
| 19 | 
            -
            __version__ = "0.0.18"
         | 
| 20 18 |  | 
| 21 19 | 
             
            re_conf = {
         | 
| 22 | 
            -
                 | 
| 23 | 
            -
                 | 
| 24 | 
            -
                 | 
| 20 | 
            +
                "apikey": "l1br3",
         | 
| 21 | 
            +
                "host": "https://api.reveng.ai",
         | 
| 22 | 
            +
                "model": "binnet-0.2-x86"
         | 
| 25 23 | 
             
            }
         | 
| 26 24 |  | 
| 27 | 
            -
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            logger = logging.getLogger("REAIT")
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, params=None,
         | 
| 30 | 
            +
                           json_data: dict = None, timeout: int = 30) -> Response:
         | 
| 31 | 
            +
                """
         | 
| 32 | 
            +
                Constructs and sends a Request
         | 
| 33 | 
            +
                :param r: Method for the new Request
         | 
| 34 | 
            +
                :param end_point: Endpoint to add to the base URL
         | 
| 35 | 
            +
                :param ex_headers: Extended HTTP headers to add
         | 
| 36 | 
            +
                :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
         | 
| 37 | 
            +
                :param params: Dictionary, list of tuples or bytes to send in the query string for the query string
         | 
| 38 | 
            +
                :param json_data: A JSON serializable Python object to send in the body
         | 
| 39 | 
            +
                :param timeout: Number of seconds to stop waiting for a Response
         | 
| 40 | 
            +
                """
         | 
| 28 41 | 
             
                url = f"{re_conf['host']}/{end_point}"
         | 
| 29 | 
            -
                headers = { | 
| 42 | 
            +
                headers = {"Authorization": re_conf["apikey"]}
         | 
| 43 | 
            +
             | 
| 30 44 | 
             
                if ex_headers:
         | 
| 31 45 | 
             
                    headers.update(ex_headers)
         | 
| 32 | 
            -
                return r(url, headers=headers, data=data, params=params)
         | 
| 33 46 |  | 
| 47 | 
            +
                logger.debug("Making request %s:\n  - headers: %s\n  - data: %s\n  - json_data: %s\n  - params: %s",
         | 
| 48 | 
            +
                             url, headers, data, json_data, params)
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                logger.debug("Making response %s:\n  - headers: %s\n  - status_code: %d\n  - content: %s",
         | 
| 53 | 
            +
                             url, response.headers, response.status_code, response.text)
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                return response
         | 
| 56 | 
            +
             | 
| 57 | 
            +
             | 
| 58 | 
            +
            def re_hash_check(bin_id: str) -> bool:
         | 
| 59 | 
            +
                status = False
         | 
| 60 | 
            +
                res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
         | 
| 34 61 |  | 
| 35 | 
            -
            def RE_delete(fpath: str, model_name: str):
         | 
| 36 | 
            -
                """
         | 
| 37 | 
            -
                    Delete analysis results for Binary ID in command
         | 
| 38 | 
            -
                """
         | 
| 39 | 
            -
                bin_id = binary_id(fpath)
         | 
| 40 | 
            -
                params = { 'model_name': model_name }
         | 
| 41 | 
            -
                res = reveng_req(requests.delete, f"/analyse/{bin_id}", params=params)
         | 
| 42 62 | 
             
                if res.status_code == 200:
         | 
| 43 | 
            -
                     | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 63 | 
            +
                    binaries_data = res.json()["binaries"]
         | 
| 64 | 
            +
                    status = len(binaries_data) > 0
         | 
| 65 | 
            +
                elif res.status_code == 400:
         | 
| 66 | 
            +
                    logger.warning("Bad Request: %s", res.text)
         | 
| 46 67 | 
             
                else:
         | 
| 47 | 
            -
                     | 
| 48 | 
            -
             | 
| 68 | 
            +
                    logger.error("Internal Server Error.")
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                res.raise_for_status()
         | 
| 71 | 
            +
                return status
         | 
| 72 | 
            +
             | 
| 49 73 |  | 
| 74 | 
            +
            # Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
         | 
| 75 | 
            +
            # Assumes a file has been passed, correct hash only
         | 
| 76 | 
            +
            # Returns the BID of the binary_id (hash)
         | 
| 77 | 
            +
            def re_bid_search(bin_id: str) -> int:
         | 
| 78 | 
            +
                res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All")
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                bid = -1
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                # Valid request
         | 
| 83 | 
            +
                if res.status_code == 200:
         | 
| 84 | 
            +
                    # Check only one record is returned
         | 
| 85 | 
            +
                    binaries_data = res.json()["binaries"]
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    if len(binaries_data) > 1:
         | 
| 88 | 
            +
                        logger.info("%d matches found for hash: %s.", len(binaries_data), bin_id)
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                        if len(binaries_data) > 1:
         | 
| 91 | 
            +
                            options_dict = {}
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                            for idx, binary in enumerate(binaries_data):
         | 
| 94 | 
            +
                                logger.info("[%d] - ID: {}, Name: %s, Creation: %s, Model: %s, Owner: %s, Status: %s",
         | 
| 95 | 
            +
                                            idx, binary["binary_id"], binary["binary_name"], binary["creation"],
         | 
| 96 | 
            +
                                            binary["model_name"], binary["owner"], binary["status"])
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                                options_dict[idx] = binary["binary_id"]
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                            user_input = input("[+] Please enter the option you want to use for this operation:")
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                            try:
         | 
| 103 | 
            +
                                option_number = int(user_input)
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                                bid = options_dict.get(option_number, -1)
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                                if bid == -1:
         | 
| 108 | 
            +
                                    logger.warning("Invalid option.")
         | 
| 109 | 
            +
                            except Exception:
         | 
| 110 | 
            +
                                bid = -1
         | 
| 111 | 
            +
                                logger.warning("Invalid option.")
         | 
| 112 | 
            +
                        # Only 1 match found
         | 
| 113 | 
            +
                        elif len(binaries_data) == 1:
         | 
| 114 | 
            +
                            binary = binaries_data[0]
         | 
| 115 | 
            +
                            bid = binary["binary_id"]
         | 
| 116 | 
            +
                        else:
         | 
| 117 | 
            +
                            logger.warning("No matches found for hash: %s.", bin_id)
         | 
| 118 | 
            +
                    elif len(binaries_data) == 1:
         | 
| 119 | 
            +
                        binary = binaries_data[0]
         | 
| 120 | 
            +
                        bid = binary["binary_id"]
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                        logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
         | 
| 123 | 
            +
                                    "Creation: %s, Model: %s, Owner: %s, Status: %s",
         | 
| 124 | 
            +
                                    bid, binary["binary_name"], binary["creation"],
         | 
| 125 | 
            +
                                    binary["model_name"], binary["owner"], binary["status"])
         | 
| 126 | 
            +
                    else:
         | 
| 127 | 
            +
                        logger.warning("No matches found for hash: %s.", bin_id)
         | 
| 128 | 
            +
                elif res.status_code == 400:
         | 
| 129 | 
            +
                    logger.warning("Bad Request: %s", res.text)
         | 
| 130 | 
            +
                else:
         | 
| 131 | 
            +
                    logger.error("Internal Server Error.")
         | 
| 50 132 |  | 
| 51 | 
            -
             | 
| 133 | 
            +
                res.raise_for_status()
         | 
| 134 | 
            +
                return bid
         | 
| 135 | 
            +
             | 
| 136 | 
            +
             | 
| 137 | 
            +
            def RE_delete(fpath: str, binary_id: int = 0) -> Response:
         | 
| 52 138 | 
             
                """
         | 
| 53 | 
            -
             | 
| 139 | 
            +
                Delete analysis results for Binary ID in command
         | 
| 140 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 141 | 
            +
                :param binary_id: ID of binary
         | 
| 54 142 | 
             
                """
         | 
| 55 | 
            -
                 | 
| 56 | 
            -
                 | 
| 57 | 
            -
             | 
| 143 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 144 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                if bid == -1:
         | 
| 147 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                res = reveng_req(requests.delete, f"analyse/{bid}")
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                if res.status_code == 200:
         | 
| 152 | 
            +
                    logger.info("Securely deleted %s analysis.", bin_id)
         | 
| 153 | 
            +
                elif res.status_code == 404:
         | 
| 154 | 
            +
                    logger.warning("Error analysis not found for %s.", bin_id)
         | 
| 155 | 
            +
                else:
         | 
| 156 | 
            +
                    logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                res.raise_for_status()
         | 
| 159 | 
            +
                return res
         | 
| 160 | 
            +
             | 
| 161 | 
            +
             | 
| 162 | 
            +
            def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, platform_options: str = None,
         | 
| 163 | 
            +
                           file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None,
         | 
| 164 | 
            +
                           scope: str = None, tags: list = None, priority: int = 0,
         | 
| 165 | 
            +
                           duplicate: bool = False, symbols: dict = None) -> Response:
         | 
| 166 | 
            +
                """
         | 
| 167 | 
            +
                Start analysis job for binary file
         | 
| 168 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 169 | 
            +
                :param model_name: Binary model name
         | 
| 170 | 
            +
                :param isa_options: Executable ISA
         | 
| 171 | 
            +
                :param file_options: File options
         | 
| 172 | 
            +
                :param platform_options: OS platform
         | 
| 173 | 
            +
                :param dynamic_execution: Enable dynamic execution in sandbox during analysis
         | 
| 174 | 
            +
                :param command_line_args: Command line arguments to pass when running binary sample in the sandbox
         | 
| 175 | 
            +
                :param scope: Analysis visibility
         | 
| 176 | 
            +
                :param tags: Assign tags to an analysis
         | 
| 177 | 
            +
                :param priority: Priority to processing queue
         | 
| 178 | 
            +
                :param duplicate: Duplicate an existing binary
         | 
| 179 | 
            +
                :param symbols: List of functions
         | 
| 180 | 
            +
                """
         | 
| 181 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 182 | 
            +
                result = re_hash_check(bin_id)
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                if result and duplicate is False:
         | 
| 185 | 
            +
                    logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
         | 
| 186 | 
            +
                                 bin_id)
         | 
| 187 | 
            +
                    raise HTTPError(f"Duplicate analysis for hash: {bin_id}")
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                filename = basename(fpath)
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                params = {"file_name": filename, "sha_256_hash": bin_id}
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                for p_name in ("model_name", "isa_options", "platform_options", "file_options",
         | 
| 194 | 
            +
                               "dynamic_execution", "command_line_args", "scope", "tags", "priority", "symbols"):
         | 
| 58 195 | 
             
                    p_value = locals()[p_name]
         | 
| 196 | 
            +
             | 
| 59 197 | 
             
                    if p_value:
         | 
| 60 198 | 
             
                        params[p_name] = p_value
         | 
| 61 199 |  | 
| 62 | 
            -
                res = reveng_req(requests.post, f"analyse",  | 
| 200 | 
            +
                res = reveng_req(requests.post, f"analyse", json_data=params)
         | 
| 201 | 
            +
             | 
| 63 202 | 
             
                if res.status_code == 200:
         | 
| 64 | 
            -
                     | 
| 65 | 
            -
             | 
| 66 | 
            -
                     | 
| 203 | 
            +
                    logger.info("Successfully submitted binary for analysis. %s - %s", fpath, re_binary_id(fpath))
         | 
| 204 | 
            +
                elif res.status_code == 400:
         | 
| 205 | 
            +
                    response = res.json()
         | 
| 67 206 |  | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
                    if 'error' in response.keys():
         | 
| 71 | 
            -
                        print(f"[-] Error analysing {fpath} - {response['error']}. Please check the results log file for {binary_id(fpath)}")
         | 
| 72 | 
            -
                        return res
         | 
| 207 | 
            +
                    if "error" in response.keys():
         | 
| 208 | 
            +
                        logger.warning("Error analysing %s - %s", fpath, response["error"])
         | 
| 73 209 |  | 
| 74 210 | 
             
                res.raise_for_status()
         | 
| 211 | 
            +
                return res
         | 
| 75 212 |  | 
| 76 213 |  | 
| 77 | 
            -
            def RE_upload(fpath: str):
         | 
| 214 | 
            +
            def RE_upload(fpath: str) -> Response | bool:
         | 
| 78 215 | 
             
                """
         | 
| 79 | 
            -
             | 
| 216 | 
            +
                Upload binary to Server
         | 
| 217 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 80 218 | 
             
                """
         | 
| 81 | 
            -
                 | 
| 219 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 220 | 
            +
                result = re_hash_check(bin_id)
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                if result:
         | 
| 223 | 
            +
                    logger.info("File %s - %s already exists. Skipping upload...", basename(fpath), re_binary_id(fpath))
         | 
| 224 | 
            +
                    return True
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                res = reveng_req(requests.post, f"upload", data=open(fpath, "rb").read())
         | 
| 227 | 
            +
             | 
| 82 228 | 
             
                if res.status_code == 200:
         | 
| 83 | 
            -
                     | 
| 84 | 
            -
             | 
| 85 | 
            -
                     | 
| 229 | 
            +
                    logger.info("Successfully uploaded binary to your account. %s - %s", fpath, re_binary_id(fpath))
         | 
| 230 | 
            +
                elif res.status_code == 400:
         | 
| 231 | 
            +
                    response = res.json()
         | 
| 86 232 |  | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 233 | 
            +
                    if "error" in response.keys():
         | 
| 234 | 
            +
                        logger.warning("Error uploading %s - %s", fpath, response["error"])
         | 
| 235 | 
            +
                elif res.status_code == 413:
         | 
| 236 | 
            +
                    logger.warning("File too large. Please upload files under 100MB.")
         | 
| 237 | 
            +
                elif res.status_code == 500:
         | 
| 238 | 
            +
                    logger.error("Internal Server Error. Please contact support. Skipping upload...")
         | 
| 91 239 |  | 
| 92 240 | 
             
                res.raise_for_status()
         | 
| 241 | 
            +
                return res
         | 
| 93 242 |  | 
| 94 243 |  | 
| 95 | 
            -
            def RE_embeddings(fpath: str,  | 
| 244 | 
            +
            def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
         | 
| 96 245 | 
             
                """
         | 
| 97 | 
            -
             | 
| 246 | 
            +
                Fetch symbol embeddings
         | 
| 247 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 248 | 
            +
                :param binary_id: ID of binary
         | 
| 98 249 | 
             
                """
         | 
| 99 | 
            -
                 | 
| 100 | 
            -
                 | 
| 250 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 251 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                if bid == -1:
         | 
| 254 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                res = reveng_req(requests.get, f"embeddings/{bid}")
         | 
| 257 | 
            +
             | 
| 101 258 | 
             
                if res.status_code == 400:
         | 
| 102 | 
            -
                     | 
| 259 | 
            +
                    logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
         | 
| 260 | 
            +
                                   bin_id)
         | 
| 103 261 |  | 
| 104 262 | 
             
                res.raise_for_status()
         | 
| 105 | 
            -
                return res | 
| 263 | 
            +
                return res
         | 
| 106 264 |  | 
| 107 265 |  | 
| 108 | 
            -
            def RE_signature(fpath: str,  | 
| 266 | 
            +
            def RE_signature(fpath: str, binary_id: int = 0) -> Response:
         | 
| 109 267 | 
             
                """
         | 
| 110 | 
            -
             | 
| 268 | 
            +
                Fetch binary BinNet signature
         | 
| 269 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 270 | 
            +
                :param binary_id: ID of binary
         | 
| 111 271 | 
             
                """
         | 
| 112 | 
            -
                 | 
| 113 | 
            -
                 | 
| 272 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 273 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                if bid == -1:
         | 
| 276 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                res = reveng_req(requests.get, f"signature/{bid}")
         | 
| 279 | 
            +
             | 
| 114 280 | 
             
                if res.status_code == 425:
         | 
| 115 | 
            -
                     | 
| 281 | 
            +
                    logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
         | 
| 282 | 
            +
                                   bin_id)
         | 
| 116 283 |  | 
| 117 284 | 
             
                res.raise_for_status()
         | 
| 118 | 
            -
                return res | 
| 285 | 
            +
                return res
         | 
| 119 286 |  | 
| 120 287 |  | 
| 121 | 
            -
            def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None, | 
| 288 | 
            +
            def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
         | 
| 289 | 
            +
                             model: str = None) -> Response:
         | 
| 122 290 | 
             
                """
         | 
| 123 | 
            -
             | 
| 291 | 
            +
                Fetch embedding for custom symbol range
         | 
| 292 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 293 | 
            +
                :param start_vaddr: Start virtual address of the function to extract embeddings
         | 
| 294 | 
            +
                :param end_vaddr: End virtual address of the function to extract embeddings
         | 
| 295 | 
            +
                :param base_vaddr: Base address of the binary
         | 
| 296 | 
            +
                :param model: Binary model name
         | 
| 124 297 | 
             
                """
         | 
| 125 298 | 
             
                params = {}
         | 
| 126 299 |  | 
| 127 300 | 
             
                if end_vaddr:
         | 
| 128 | 
            -
                    params[ | 
| 301 | 
            +
                    params["end_vaddr"] = end_vaddr
         | 
| 129 302 | 
             
                if base_vaddr:
         | 
| 130 | 
            -
                    params[ | 
| 303 | 
            +
                    params["base_vaddr"] = base_vaddr
         | 
| 131 304 | 
             
                if model:
         | 
| 132 | 
            -
                    params[ | 
| 305 | 
            +
                    params["models"] = model
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 308 | 
            +
             | 
| 309 | 
            +
                res = reveng_req(requests.get, f"embedding/{bin_id}/{start_vaddr}", params=params)
         | 
| 133 310 |  | 
| 134 | 
            -
                res = reveng_req(requests.get, f"embedding/{binary_id(fpath)}/{start_vaddr}", params=params)
         | 
| 135 311 | 
             
                if res.status_code == 425:
         | 
| 136 | 
            -
                     | 
| 137 | 
            -
             | 
| 312 | 
            +
                    logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
         | 
| 313 | 
            +
                                   bin_id)
         | 
| 138 314 |  | 
| 139 315 | 
             
                res.raise_for_status()
         | 
| 140 | 
            -
                return res | 
| 316 | 
            +
                return res
         | 
| 141 317 |  | 
| 142 318 |  | 
| 143 | 
            -
            def RE_logs(fpath: str,  | 
| 319 | 
            +
            def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
         | 
| 144 320 | 
             
                """
         | 
| 145 | 
            -
             | 
| 321 | 
            +
                Get the logs for an analysis associated to Binary ID in command
         | 
| 322 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 323 | 
            +
                :param binary_id: ID of binary
         | 
| 324 | 
            +
                :param console: Show response in console
         | 
| 146 325 | 
             
                """
         | 
| 147 | 
            -
                bin_id =  | 
| 148 | 
            -
                 | 
| 149 | 
            -
             | 
| 150 | 
            -
                if  | 
| 151 | 
            -
                     | 
| 152 | 
            -
             | 
| 326 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 327 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                if bid == -1:
         | 
| 330 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                res = reveng_req(requests.get, f"logs/{bid}")
         | 
| 333 | 
            +
             | 
| 334 | 
            +
                if res.status_code == 200 and console:
         | 
| 335 | 
            +
                    logger.info("Logs found for %s:\n%s", bin_id, res.text)
         | 
| 153 336 | 
             
                elif res.status_code == 404:
         | 
| 154 | 
            -
                     | 
| 155 | 
            -
                    return
         | 
| 337 | 
            +
                    logger.warning("Error, logs not found for %s.", bin_id)
         | 
| 156 338 |  | 
| 157 339 | 
             
                res.raise_for_status()
         | 
| 340 | 
            +
                return res
         | 
| 158 341 |  | 
| 159 342 |  | 
| 160 | 
            -
            def RE_cves(fpath: str,  | 
| 343 | 
            +
            def RE_cves(fpath: str, binary_id: int = 0) -> Response:
         | 
| 161 344 | 
             
                """
         | 
| 162 | 
            -
             | 
| 345 | 
            +
                Check for known CVEs in Binary
         | 
| 346 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 347 | 
            +
                :param binary_id: ID of binary
         | 
| 163 348 | 
             
                """
         | 
| 164 | 
            -
                bin_id =  | 
| 165 | 
            -
                 | 
| 166 | 
            -
             | 
| 349 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 350 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                if bid == -1:
         | 
| 353 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 354 | 
            +
             | 
| 355 | 
            +
                res = reveng_req(requests.get, f"cves/{bid}")
         | 
| 356 | 
            +
             | 
| 167 357 | 
             
                if res.status_code == 200:
         | 
| 168 358 | 
             
                    cves = json.loads(res.text)
         | 
| 169 | 
            -
                     | 
| 359 | 
            +
                    logger.info("Checking for known CVEs embedded inside %s", fpath)
         | 
| 360 | 
            +
             | 
| 170 361 | 
             
                    if len(cves) == 0:
         | 
| 171 | 
            -
                         | 
| 362 | 
            +
                        logger.info("0 CVEs found.")
         | 
| 172 363 | 
             
                    else:
         | 
| 173 | 
            -
                         | 
| 174 | 
            -
                        print_json(data=cves)
         | 
| 175 | 
            -
                    return
         | 
| 364 | 
            +
                        logger.warning("Warning CVEs found!\n%s", res.text)
         | 
| 176 365 | 
             
                elif res.status_code == 404:
         | 
| 177 | 
            -
                     | 
| 178 | 
            -
                    return
         | 
| 366 | 
            +
                    logger.warning("Error, binary analysis not found for %s.", bin_id)
         | 
| 179 367 |  | 
| 180 368 | 
             
                res.raise_for_status()
         | 
| 369 | 
            +
                return res
         | 
| 370 | 
            +
             | 
| 181 371 |  | 
| 182 | 
            -
            def RE_status(fpath: str,  | 
| 372 | 
            +
            def RE_status(fpath: str, binary_id: int = 0) -> Response:
         | 
| 183 373 | 
             
                """
         | 
| 184 | 
            -
             | 
| 374 | 
            +
                Get the status of an ongoing binary analysis
         | 
| 375 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 376 | 
            +
                :param binary_id: ID of binary
         | 
| 185 377 | 
             
                """
         | 
| 186 | 
            -
                bin_id =  | 
| 187 | 
            -
                 | 
| 188 | 
            -
             | 
| 189 | 
            -
                if  | 
| 190 | 
            -
                     | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
            -
             | 
| 378 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 379 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 380 | 
            +
             | 
| 381 | 
            +
                if bid == -1:
         | 
| 382 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 383 | 
            +
             | 
| 384 | 
            +
                res = reveng_req(requests.get, f"analyse/status/{bid}")
         | 
| 385 | 
            +
             | 
| 386 | 
            +
                if res.status_code == 400:
         | 
| 387 | 
            +
                    logger.warning(" Error, status not found for %s.", bin_id)
         | 
| 194 388 |  | 
| 195 389 | 
             
                res.raise_for_status()
         | 
| 390 | 
            +
                return res
         | 
| 196 391 |  | 
| 197 392 |  | 
| 198 | 
            -
            def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
         | 
| 393 | 
            +
            def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list:
         | 
| 199 394 | 
             
                """
         | 
| 200 | 
            -
             | 
| 395 | 
            +
                Compute the cosine distance between source embedding and embedding from binary
         | 
| 396 | 
            +
                :param embedding: Embedding vector as python list
         | 
| 397 | 
            +
                :param embeddings: Symbol embeddings
         | 
| 398 | 
            +
                :param nns: Number of nearest neighbors
         | 
| 201 399 | 
             
                """
         | 
| 202 400 | 
             
                df = DataFrame(data=embeddings)
         | 
| 203 401 | 
             
                np_embedding = array(embedding).reshape(1, -1)
         | 
| 204 | 
            -
                source_embeddings = vstack(df[ | 
| 402 | 
            +
                source_embeddings = vstack(df["embedding"].values)
         | 
| 205 403 | 
             
                closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
         | 
| 206 404 | 
             
                distances = cosine_similarity(source_embeddings[closest], np_embedding)
         | 
| 405 | 
            +
             | 
| 207 406 | 
             
                # match closest embeddings with similarity
         | 
| 208 407 | 
             
                closest_df = df.iloc[closest]
         | 
| 408 | 
            +
             | 
| 209 409 | 
             
                # create json similarity object
         | 
| 210 410 | 
             
                similarities = list(zip(distances, closest_df.index.tolist()))
         | 
| 211 | 
            -
                json_sims = [{ | 
| 411 | 
            +
                json_sims = [{"similaritiy": float(d[0]), "vaddr": int(df.iloc[v]["vaddr"]), "name": str(df.iloc[v]["name"]),
         | 
| 412 | 
            +
                              "size": int(df.iloc[v]["size"])} for d, v in similarities]
         | 
| 212 413 | 
             
                return json_sims
         | 
| 213 414 |  | 
| 214 415 |  | 
| 215 | 
            -
            def RE_nearest_symbols(embedding: list, model_name, nns: int = 5, | 
| 416 | 
            +
            def RE_nearest_symbols(embedding: list, model_name: str, nns: int = 5,
         | 
| 417 | 
            +
                                   collections: list = None, ignore_hashes: list = None,
         | 
| 418 | 
            +
                                   distance: float = 0.0, debug_enabled: bool = False) -> Response:
         | 
| 216 419 | 
             
                """
         | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 420 | 
            +
                Get function name suggestions for an embedding
         | 
| 421 | 
            +
                :param embedding: Embedding vector as python list
         | 
| 422 | 
            +
                :param model_name: Binary model name
         | 
| 423 | 
            +
                :param nns: Number of nearest neighbors
         | 
| 424 | 
            +
                :param collections: List of collections RevEng.AI collection names to search through
         | 
| 425 | 
            +
                :param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
         | 
| 426 | 
            +
                :param distance: How close we want the ANN search to filter for
         | 
| 427 | 
            +
                :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
         | 
| 221 428 | 
             
                """
         | 
| 222 | 
            -
                params={ | 
| 429 | 
            +
                params = {"nns": nns, "model_name": model_name, "debug_enabled": debug_enabled}
         | 
| 430 | 
            +
             | 
| 431 | 
            +
                if collections and len(collections) > 0:
         | 
| 432 | 
            +
                    # api param is collection, not collections
         | 
| 433 | 
            +
                    params["collection"] = "|".join(collections)
         | 
| 434 | 
            +
             | 
| 435 | 
            +
                if ignore_hashes and len(ignore_hashes) > 0:
         | 
| 436 | 
            +
                    params["ignore_hashes"] = ignore_hashes
         | 
| 223 437 |  | 
| 224 | 
            -
                if  | 
| 225 | 
            -
                    params[ | 
| 438 | 
            +
                if distance > 0.0:
         | 
| 439 | 
            +
                    params["distance"] = distance
         | 
| 226 440 |  | 
| 227 441 | 
             
                res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
         | 
| 442 | 
            +
             | 
| 228 443 | 
             
                res.raise_for_status()
         | 
| 229 | 
            -
                 | 
| 230 | 
            -
                print_json(data=f_suggestions)
         | 
| 444 | 
            +
                return res
         | 
| 231 445 |  | 
| 232 446 |  | 
| 233 | 
            -
            def RE_nearest_binaries(embedding: list, model_name, nns: int = 5, | 
| 447 | 
            +
            def RE_nearest_binaries(embedding: list, model_name: str, nns: int = 5,
         | 
| 448 | 
            +
                                    collections: list = None, ignore_hashes: list = None) -> Response:
         | 
| 234 449 | 
             
                """
         | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 450 | 
            +
                Get executable suggestions for a binary embedding
         | 
| 451 | 
            +
                :param embedding: Embedding vector as python list
         | 
| 452 | 
            +
                :param model_name: Binary model name
         | 
| 453 | 
            +
                :param nns: Number of nearest neighbors
         | 
| 454 | 
            +
                :param collections: List of collections RevEng.AI collection names to search through
         | 
| 455 | 
            +
                :param ignore_hashes: List[str] SHA-256 hash of binary files to ignore symbols from (usually the current binary)
         | 
| 239 456 | 
             
                """
         | 
| 240 | 
            -
                params={ | 
| 457 | 
            +
                params = {"nns": nns, "model_name": model_name}
         | 
| 458 | 
            +
             | 
| 459 | 
            +
                if collections and len(collections) > 0:
         | 
| 460 | 
            +
                    # api param is collection, not collections
         | 
| 461 | 
            +
                    params["collection"] = "|".join(collections)
         | 
| 241 462 |  | 
| 242 | 
            -
                if  | 
| 243 | 
            -
                    params[ | 
| 463 | 
            +
                if ignore_hashes and len(ignore_hashes) > 0:
         | 
| 464 | 
            +
                    params["ignore_hashes"] = ignore_hashes
         | 
| 244 465 |  | 
| 245 466 | 
             
                res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
         | 
| 467 | 
            +
             | 
| 246 468 | 
             
                res.raise_for_status()
         | 
| 247 | 
            -
                 | 
| 248 | 
            -
                print_json(data=f_suggestions)
         | 
| 469 | 
            +
                return res
         | 
| 249 470 |  | 
| 250 471 |  | 
| 251 | 
            -
            def RE_SBOM(fpath: str,  | 
| 472 | 
            +
            def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
         | 
| 252 473 | 
             
                """
         | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 474 | 
            +
                Get Software Bill Of Materials for binary
         | 
| 475 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 476 | 
            +
                :param binary_id: ID of binary
         | 
| 256 477 | 
             
                """
         | 
| 257 | 
            -
                 | 
| 478 | 
            +
                bin_id = re_binary_id(fpath)
         | 
| 479 | 
            +
                bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
         | 
| 480 | 
            +
             | 
| 481 | 
            +
                if bid == -1:
         | 
| 482 | 
            +
                    raise HTTPError(f"No matches found for hash: {bin_id}")
         | 
| 483 | 
            +
             | 
| 484 | 
            +
                res = reveng_req(requests.get, f"sboms/{bid}")
         | 
| 485 | 
            +
             | 
| 486 | 
            +
                logger.info("SBOM for %s:\n%s", fpath, res.text)
         | 
| 258 487 |  | 
| 259 | 
            -
                res = reveng_req(requests.get, f"sboms/{binary_id(fpath)}", params=params)
         | 
| 260 488 | 
             
                res.raise_for_status()
         | 
| 261 | 
            -
                 | 
| 262 | 
            -
                print_json(data=sbom)
         | 
| 489 | 
            +
                return res
         | 
| 263 490 |  | 
| 264 491 |  | 
| 265 | 
            -
            def  | 
| 266 | 
            -
                """ | 
| 492 | 
            +
            def RE_functions_rename(function_id: int, new_name: str) -> Response:
         | 
| 493 | 
            +
                """
         | 
| 494 | 
            +
                Send the new name of a function to C2
         | 
| 495 | 
            +
                :param function_id: ID of a function
         | 
| 496 | 
            +
                :param new_name: New function name
         | 
| 497 | 
            +
                """
         | 
| 498 | 
            +
                res = reveng_req(requests.post, f"functions/rename/{function_id}", json_data={"new_name": new_name})
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                if res.status_code == 200:
         | 
| 501 | 
            +
                    logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
         | 
| 502 | 
            +
                else:
         | 
| 503 | 
            +
                    logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
         | 
| 504 | 
            +
             | 
| 505 | 
            +
                res.raise_for_status()
         | 
| 506 | 
            +
                return res
         | 
| 507 | 
            +
             | 
| 508 | 
            +
             | 
| 509 | 
            +
            def re_binary_id(fpath: str) -> str:
         | 
| 510 | 
            +
                """
         | 
| 511 | 
            +
                Take the SHA-256 hash of binary file
         | 
| 512 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 513 | 
            +
                """
         | 
| 514 | 
            +
                if not fpath or not exists(fpath):
         | 
| 515 | 
            +
                    return "undefined"
         | 
| 516 | 
            +
             | 
| 267 517 | 
             
                hf = sha256()
         | 
| 268 | 
            -
             | 
| 518 | 
            +
             | 
| 519 | 
            +
                with open(fpath, "rb") as f:
         | 
| 269 520 | 
             
                    c = f.read()
         | 
| 270 521 | 
             
                    hf.update(c)
         | 
| 522 | 
            +
             | 
| 271 523 | 
             
                return hf.hexdigest()
         | 
| 272 524 |  | 
| 273 525 |  | 
| 274 | 
            -
            def _binary_isa(lief_hdlr, exec_type):
         | 
| 526 | 
            +
            def _binary_isa(lief_hdlr, exec_type: str) -> str:
         | 
| 275 527 | 
             
                """
         | 
| 276 | 
            -
             | 
| 528 | 
            +
                Get ISA format
         | 
| 277 529 | 
             
                """
         | 
| 278 530 | 
             
                if exec_type == "elf":
         | 
| 279 531 | 
             
                    machine_type = lief_hdlr.header.machine_type
         | 
| 280 | 
            -
             | 
| 532 | 
            +
             | 
| 533 | 
            +
                    if machine_type == ELF.ARCH.i386:
         | 
| 281 534 | 
             
                        return "x86"
         | 
| 282 | 
            -
                    elif machine_type ==  | 
| 535 | 
            +
                    elif machine_type == ELF.ARCH.x86_64:
         | 
| 283 536 | 
             
                        return "x86_64"
         | 
| 284 | 
            -
             | 
| 285 537 | 
             
                elif exec_type == "pe":
         | 
| 286 538 | 
             
                    machine_type = lief_hdlr.header.machine
         | 
| 287 | 
            -
             | 
| 539 | 
            +
             | 
| 540 | 
            +
                    if machine_type == PE.MACHINE_TYPES.I386:
         | 
| 288 541 | 
             
                        return "x86"
         | 
| 289 | 
            -
                    elif machine_type ==  | 
| 542 | 
            +
                    elif machine_type == PE.MACHINE_TYPES.AMD64:
         | 
| 290 543 | 
             
                        return "x86_64"
         | 
| 291 | 
            -
             | 
| 292 544 | 
             
                elif exec_type == "macho":
         | 
| 293 545 | 
             
                    machine_type = lief_hdlr.header.cpu_type
         | 
| 294 | 
            -
             | 
| 546 | 
            +
             | 
| 547 | 
            +
                    if machine_type == MachO.CPU_TYPES.x86:
         | 
| 295 548 | 
             
                        return "x86"
         | 
| 296 | 
            -
                    elif machine_type ==  | 
| 549 | 
            +
                    elif machine_type == MachO.CPU_TYPES.x86_64:
         | 
| 297 550 | 
             
                        return "x86_64"
         | 
| 298 | 
            -
                
         | 
| 299 | 
            -
                raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}")
         | 
| 300 551 |  | 
| 552 | 
            +
                logger.error("Error, failed to determine or unsupported ISA for exec_type: %s.", exec_type)
         | 
| 553 | 
            +
                raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}.")
         | 
| 301 554 |  | 
| 302 | 
            -
             | 
| 555 | 
            +
             | 
| 556 | 
            +
            def _binary_format(lief_hdlr) -> str:
         | 
| 303 557 | 
             
                """
         | 
| 304 | 
            -
             | 
| 558 | 
            +
                Get executable file format
         | 
| 305 559 | 
             
                """
         | 
| 306 560 | 
             
                if lief_hdlr.format == lief_hdlr.format.PE:
         | 
| 307 561 | 
             
                    return "pe"
         | 
| @@ -309,41 +563,42 @@ def _binary_format(lief_hdlr): | |
| 309 563 | 
             
                    return "elf"
         | 
| 310 564 | 
             
                if lief_hdlr.format == lief_hdlr.format.MACHO:
         | 
| 311 565 | 
             
                    return "macho"
         | 
| 312 | 
            -
                
         | 
| 313 | 
            -
                raise RuntimeError("Error, could not determine binary format")
         | 
| 314 566 |  | 
| 567 | 
            +
                logger.error("Error, could not determine binary format: %s.", lief_hdlr.format)
         | 
| 568 | 
            +
                raise RuntimeError("Error, could not determine binary format.")
         | 
| 315 569 |  | 
| 316 570 |  | 
| 317 | 
            -
            def file_type(fpath: str):
         | 
| 571 | 
            +
            def file_type(fpath: str) -> tuple[str, str]:
         | 
| 318 572 | 
             
                """
         | 
| 319 | 
            -
             | 
| 573 | 
            +
                Determine ISA for binary
         | 
| 574 | 
            +
                :param fpath: File path for binary to analyse
         | 
| 320 575 | 
             
                """
         | 
| 321 | 
            -
                binary =  | 
| 576 | 
            +
                binary = parse(fpath)
         | 
| 322 577 |  | 
| 323 578 | 
             
                # handle PE and ELF files
         | 
| 324 579 | 
             
                file_format = _binary_format(binary)
         | 
| 325 | 
            -
                isa | 
| 580 | 
            +
                isa = _binary_isa(binary, file_format)
         | 
| 581 | 
            +
             | 
| 326 582 | 
             
                return file_format, isa
         | 
| 327 583 |  | 
| 328 584 |  | 
| 329 | 
            -
            def parse_config():
         | 
| 585 | 
            +
            def parse_config() -> None:
         | 
| 586 | 
            +
                """
         | 
| 587 | 
            +
                Parse ~/.reait.toml config file
         | 
| 330 588 | 
             
                """
         | 
| 331 | 
            -
             | 
| 332 | 
            -
             | 
| 333 | 
            -
             | 
| 334 | 
            -
                    return
         | 
| 589 | 
            +
                if exists(expanduser("~/.reait.toml")):
         | 
| 590 | 
            +
                    with open(expanduser("~/.reait.toml"), "r") as file:
         | 
| 591 | 
            +
                        config = tomli.loads(file.read())
         | 
| 335 592 |  | 
| 336 | 
            -
             | 
| 337 | 
            -
             | 
| 338 | 
            -
             | 
| 339 | 
            -
                        if key in config:
         | 
| 340 | 
            -
                            re_conf[key] = config[key]
         | 
| 593 | 
            +
                        for key in ("apikey", "host", "model"):
         | 
| 594 | 
            +
                            if key in config:
         | 
| 595 | 
            +
                                re_conf[key] = config[key]
         | 
| 341 596 |  | 
| 342 597 |  | 
| 343 | 
            -
            def angular_distance(x, y):
         | 
| 598 | 
            +
            def angular_distance(x, y) -> float:
         | 
| 344 599 | 
             
                """
         | 
| 345 600 | 
             
                Compute angular distance between two embedding vectors
         | 
| 346 | 
            -
                Normalised  | 
| 601 | 
            +
                Normalised euclidian distance
         | 
| 347 602 | 
             
                """
         | 
| 348 603 | 
             
                cos = dot(x, y) / ((dot(x, x) * dot(y, y)) ** 0.5)
         | 
| 349 | 
            -
                return 1.0 - arccos(cos)/pi
         | 
| 604 | 
            +
                return 1.0 - arccos(cos) / pi
         |