PyPI - reait - Versions diffs - 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

reait 0.0.19py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

reait/__init__.py +2 -0
reait/api.py +523 -218
reait/main.py +265 -295
{reait-0.0.19.dist-info → reait-1.0.0.dist-info}/METADATA +40 -51
reait-1.0.0.dist-info/RECORD +9 -0
{reait-0.0.19.dist-info → reait-1.0.0.dist-info}/WHEEL +1 -1
reait-0.0.19.dist-info/RECORD +0 -9
{reait-0.0.19.dist-info → reait-1.0.0.dist-info}/LICENSE +0 -0
{reait-0.0.19.dist-info → reait-1.0.0.dist-info}/entry_points.txt +0 -0
{reait-0.0.19.dist-info → reait-1.0.0.dist-info}/top_level.txt +0 -0

reait/main.py CHANGED Viewed

@@ -1,75 +1,73 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
 from __future__ import print_function
-from hashlib import sha256
-from rich import print_json, print as rich_print
+import logging
+from pathlib import Path
+from typing import Optional
+from rich import print_json
 from rich.progress import track
 from rich.console import Console
-from rich.table import Table
 import os
-import re
 import argparse
-import requests
-from numpy import array, vstack, mean, average
-from pandas import DataFrame
 import json
-import tomli
-from os.path import isfile, getsize
-from sys import exit
-from IPython import embed
-from reait import api
+from sys import exit, stdout, stderr
 from scipy.spatial import distance
-from scipy.special import expit
 from glob import iglob
 import numpy as np
-def version():
+import api
+rerr = Console(file=stderr, width=180)
+rout = Console(file=stdout, width=180)
+def version() -> int:
     """
         Display program version
     """
-    rich_print(f"[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]")
+    rout.print(f"""[bold blue] ::::::::    ::::::::
+::  ::::    :::  :::
+::::::::::::::::::::
+:::::   :::   ::::::
+   ::::::::::::::
+   .::  :::  ::::
+::::::  :::  :::::::
+::  :::::::::::  :::
+::  :::::  ::::  :::
+::::::::    :::::::: [/bold blue]
+  [bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]
+""")
+    rout.print("[yellow]Config:[/yellow]")
     print_json(data=api.re_conf)
+    return 0
-def verify_binary(fpath_fmt: str):
-    fmt     = None
-    fpath   = fpath_fmt
-    if ':' in fpath_fmt:
-        fpath, fmt = fpath_fmt.split(':')
-    if not os.path.isfile(fpath):
-        raise RuntimeError(f"File path {fpath} is not a file")
+def verify_binary(fpath_fmt: str) -> tuple[str, str, str]:
+    fpath = fpath_fmt
-    if getsize(fpath) > 1024 * 1024 * 10:
-        raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
-    if not fmt:
-        exec_format, exec_isa = api.file_type(fpath)
-    else:
-        if '-' not in fmt:
-            raise RuntimeError('Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
-        exec_format, exec_isa = fmt.split('-')
+    exec_format, exec_isa = api.file_type(fpath)
     return fpath, exec_format, exec_isa
-def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1):
+def match(fpath: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1) -> None:
     """
     Match embeddings in fpath from a list of embeddings
     """
-    print(f"Matching symbols from {fpath} with confidence {confidence}")
-    sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
-    b_embeds = api.RE_embeddings(fpath, model_name)
-    source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
+    rout.print(f"Matching symbols from {fpath} with confidence {confidence}.")
+    sink_embed_mat = np.vstack(list(map(lambda x: x["embedding"], embeddings)))
+    b_embeds = api.RE_embeddings(fpath).json()["data"]
+    source_embed_mat = np.vstack(list(map(lambda x: x["embedding"], b_embeds)))
     # angular distance over cosine
-    #closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
+    # closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
     closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
     # rescale to separate high end of (-1, 1.0)
-    closest = rescale_sim(closest)
+    # closest = rescale_sim(closest)
     i, j = closest.shape
-    for _i in track(range(i), description='Matching Symbols...'):
+    for _i in track(range(i), description="Matching Symbols..."):
         row = closest[_i, :]
         match_index, second_match = row.argsort()[::-1][:2]
         source_index = _i
@@ -78,321 +76,293 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
         sink_symb = embeddings[sink_index]
         m_confidence = row[match_index]
         s_confidence = row[second_match]
         if row[match_index] >= confidence:
-            rich_print(f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
+            rout.print(
+                f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t"
+                f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
+                f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
         elif (m_confidence - s_confidence) > deviation:
-            rich_print(f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
+            rout.print(
+                f"[bold magenta]Possible match[/bold magenta][yellow]\t"
+                f"Confidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t"
+                f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
+                f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
         else:
-            #rich_print(f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
+            rerr.print(
+                f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t"
+                f"{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
             pass
-def rescale_sim(x):
+def match_for_each(fpath: str, confidence: float = 0.9, nns: int = 1) -> int:
     """
-        Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values seperated, map to hyperbolic space
+    Match embeddings in fpath from a list of embeddings
     """
-    return np.power(x, 5)
+    nns = max(nns, 1)
+    rout.print(f"Matching symbols from '{fpath}' with a confidence {confidence:.02f} and up to "
+               f"{nns} result{'' if nns == 1 else 's'} per function")
+    functions = api.RE_analyze_functions(fpath).json()["functions"]
+    function_matches = api.RE_nearest_functions(fpath, nns=nns, distance=1 - confidence).json()["function_matches"]
+    if len(function_matches) == 0:
+        rerr.print(f"[bold red]No matches found for a confidence of [/bold red] {confidence:.02f}")
+        return -1
+    else:
+        for function in functions:
+            matches = list(filter(lambda x: function["function_id"] == x["origin_function_id"], function_matches))
+            if len(matches):
+                rout.print(f"[bold green]Found {len(matches)} match{'' if len(matches) == 1 else 'es'} for "
+                           f"[/bold green][blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
-def binary_similarity(fpath: str, fpaths: list, model_name: str):
+                for match in matches:
+                    rout.print(f"\t[yellow]Confidence: {match['confidence']:.05f}[/yellow]"
+                               f"\t[blue]{match['nearest_neighbor_function_name']}"
+                               f" ({match['nearest_neighbor_binary_name']})[/blue]")
+            else:
+                rout.print(f"[bold red]No matches found for[/bold red] "
+                           f"[blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
+    return 0
+def parse_collections(collections: str) -> Optional[list[str]]:
     """
-    Compute binary similarity between source and list of binary files
+    Return collections as list from CSV
     """
-    console = Console()
+    if not collections:
+        return None
+    return collections.split(",")
-    table = Table(title=f"Binary Similarity to {fpath}")
-    table.add_column("Binary", justify="right", style="cyan", no_wrap=True)
-    table.add_column("SHA3-256", style="magenta", no_wrap=True)
-    table.add_column("Similarity", style="yellow", no_wrap=True)
-    b_embed = api.RE_signature(fpath, model_name)
+def rescale_sim(x):
+    """
+    Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low,
+    high values separated, map to hyperbolic space
+    """
+    return np.power(x, 5)
-    b_sums = []
-    for b in track(fpaths, description='Computing Binary Similarity...'):
-        try:
-            b_sum = api.RE_signature(b, model_name)
-            b_sums.append(b_sum)
-        except Exception as e:
-            console.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.binary_id(b)}[/green bold]")
-            console.print(e)
-    if len(b_sums) > 0:
-            #closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
-            closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
+def validate_file(arg):
+    file = Path(arg)
+    if file.is_file():
+        return file.absolute()
+    raise FileNotFoundError(f"File path {arg} does not exists.")
-            for binary, similarity in zip(fpaths, closest.tolist()[0]):
-                table.add_row(os.path.basename(binary), api.binary_id(binary), f"{rescale_sim(similarity):.05f}")
-    console.print(table)
+def validate_dir(arg):
+    dir = Path(arg)
+    if dir.is_dir():
+        return dir.absolute()
+    raise NotADirectoryError(f"Directory path {arg} does not exists.")
-def main() -> None:
+def main() -> int:
     """
     Tool entry
     """
     parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument("-b", "--binary", default="", help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
-    parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
-    parser.add_argument("-a", "--analyse", action='store_true', help="Perform a full analysis and generate embeddings for every symbol")
-    parser.add_argument("--no-embeddings", action='store_true', help="Only perform binary analysis. Do not generate embeddings for symbols")
+    parser.add_argument("-b", "--binary", type=validate_file,
+                        help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
+    parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
+    parser.add_argument("-D", "--dir", type=validate_dir, help="Path of directory to recursively analyse")
+    parser.add_argument("-a", "--analyse", action="store_true",
+                        help="Perform a full analysis and generate embeddings for every symbol")
     parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
-    parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
-    parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
-    parser.add_argument("-n", "--ann", action='store_true', help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
-    parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
-    parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
-    parser.add_argument("--collections", default=None, help="Regex string to select RevEng.AI collections for filtering e.g., libc")
+    parser.add_argument("-A", action="store_true", help="Upload and Analyse a new binary")
+    parser.add_argument("-u", "--upload", action="store_true", help="Upload a new binary to remote server")
+    parser.add_argument("--duplicate", default=False, action="store_true", help="Duplicate an existing binary")
+    parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
+    parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch", type=int)
+    parser.add_argument("--collections", default=None,
+                        help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
     parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
-    parser.add_argument("--from-file", help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
+    parser.add_argument("--from-file",
+                        help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
     parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
-    parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
     parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
-    parser.add_argument("-m", "--model", default="binnet-0.1", help="AI model used to generate embeddings")
-    parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
-    parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
-    parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
-    parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
-    parser.add_argument("-S", "--similarity", action='store_true', help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
-    parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
-    parser.add_argument("-M", "--match", action='store_true', help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
-    parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
-    parser.add_argument("--deviation", default=0.2, help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
-    parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
-    parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
-    parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
+    parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
+    parser.add_argument("-x", "--extract", action="store_true", help="Fetch embeddings for binary")
+    parser.add_argument("-M", "--match", action="store_true",
+                        help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
+    parser.add_argument("--confidence", default="high", choices=["high", "medium", "low", "partial", "all"],
+                        help="Confidence threshold used to match symbols. Valid values are 'all', 'medium', 'low', 'partial' or 'high'[DEFAULT]")
+    parser.add_argument("--deviation", default=0.1, type=float,
+                        help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
+    parser.add_argument("-l", "--logs", action="store_true", help="Fetch analysis log file for binary")
+    parser.add_argument("-d", "--delete", action="store_true", help="Delete all metadata associated with binary")
+    parser.add_argument("-k", "--apikey", help="RevEng.AI Personal API key")
     parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
     parser.add_argument("-v", "--version", action="store_true", help="Display version information")
-    parser.add_argument("--help", action="help", default=argparse.SUPPRESS, help=argparse._('Show this help message and exit'))
+    parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
+                        help=argparse._("Show this help message and exit"))
     parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
-    parser.add_argument("--exec-format", default=None, help="Override executable format. Valid values are pe, elf, macho, raw")
-    parser.add_argument("--platform", default=None, help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
-    parser.add_argument("--dynamic-execution", default=False, action='store_true', help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
-    parser.add_argument("--cmd-line-args", default="", help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
-    parser.add_argument("--scope", default="private", help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
-    parser.add_argument("--tags", default=None, help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
+    parser.add_argument("--exec-format", default=None,
+                        help="Override executable format. Valid values are pe, elf, macho, raw")
+    parser.add_argument("--platform", default=None,
+                        help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
+    parser.add_argument("--dynamic-execution", default=False, action="store_true",
+                        help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
+    parser.add_argument("--cmd-line-args", default="",
+                        help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
+    parser.add_argument("--scope", default="private", choices=["public", "private"],
+                        help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
+    parser.add_argument("--tags", default=None, type=str,
+                        help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3.")
+    parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
+    parser.add_argument("--verbose", default=False, action="store_true", help="Set verbose output.")
+    parser.add_argument("--debug", default=None, help="Debug file path to write pass with analysis")
+    parser.add_argument("-s", "--status", action="store_true", help="Ongoing status of the provided binary")
     args = parser.parse_args()
-    if args.apikey:
-        api.re_conf['apikey'] = args.apikey
-    if args.host:
-        api.re_conf['host'] = args.host
-    if args.model:
-        api.re_conf['model'] = args.model
+    # set re_conf args
+    for arg in ("apikey", "host", "model",):
+        if getattr(args, arg):
+            api.re_conf[arg] = getattr(args, arg)
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
     # display version and exit
     if args.version:
-        version()
-        exit(0)
-    exec_fmt = None
-    exec_isa = None
-    base_address = 0
-    if args.base_address:
-        if args.base_address.upper()[:2] == "0X":
-            base_address = int(args.base_address, 16)
-        else:
-            base_address = int(args.base_address)
+        return version()
+    # validate length of string tags
+    tags = None
+    if args.tags:
+        tags = parse_collections(args.tags)
-    if args.dir:
-        if not os.path.isdir(args.dir):
-            rich_print(f'Error, {args.dir} is not a valid directory path')
-            exit(-1)
+    collections = None
+    if args.collections:
+        collections = parse_collections(args.collections)
+    # auto analysis, uploads and starts analysis
+    if args.A:
+        args.upload = args.analyse = True
-        files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
+    if args.dir:
+        files = iglob(os.path.abspath(args.dir) + "/**/*", recursive=True)
         ## perform operation on all files inside directory
-        for file in track(files, description='Files in directory'):
+        for file in track(files, description="Files in directory"):
             if not os.path.isfile(file):
-                #rich_print(f'[blue]Skipping non-file[/blue] {file}')
+                rerr.print(f"[blue]Skipping non-file:[/blue] {file}")
                 continue
+            # upload binary
+            if args.upload:
+                api.RE_upload(file)
             if args.analyse:
                 try:
                     fpath, exec_fmt, exec_isa = verify_binary(file)
-                    rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
-                    rich_print(f'[green bold]Analysing[/green bold] {file}')
-                    api.RE_analyse(file, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
+                    rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
                 except Exception as e:
-                    rich_print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
+                    rerr.print(f"[red bold][!] Error, binary exec type could not be verified:[/red bold] {file}")
+                    rerr.print(f"[yellow] {e} [/yellow]")
+                rout.print(f"[green bold]Analysing:[/green bold] {file}")
+                api.RE_analyse(file, model_name=api.re_conf["model"], isa_options=args.isa,
+                               platform_options=args.platform, dynamic_execution=args.dynamic_execution,
+                               command_line_args=args.cmd_line_args, file_options=args.exec_format,
+                               binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
+                               duplicate=args.duplicate, debug_fpath=args.debug)
-            elif args.delete:
+            if args.delete:
                 try:
-                    rich_print(f'[green bold]Deleting analyses for[/green bold] {file}')
-                    api.RE_delete(args.binary, args.model)
+                    rout.print(f"[green bold]Deleting analyses for:[/green bold] {file}")
+                    api.RE_delete(file)
                 except Exception as e:
-                    rich_print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
-            else:
-                rich_print(f'Error, -D only supports analyse or delete')
-                exit(-1)
-        exit(0)
-    if args.A or args.analyse or args.extract or args.logs or args.delete or args.signature or args.similarity or args.upload or args.match or args.sbom:
-        # verify binary is a file
+                    rerr.print(f"[red bold][!] Error, could not delete analysis for:[/red bold] {file}")
+                    rerr.print(f"[yellow] {e} [/yellow]")
+            if not (args.upload or args.analyse or args.delete):
+                rerr.print(f"Error, '-D' flag only supports upload, analyse, or delete.")
+                return -1
+    elif args.analyse or args.extract or args.logs or args.delete or \
+            args.upload or args.match or args.cves or args.sbom or args.status:
         try:
             fpath, exec_fmt, exec_isa = verify_binary(args.binary)
-            rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
+            # keep stdout to data only
+            rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
             args.binary = fpath
+        except TypeError as e:
+            rerr.print("[bold red][!] Error, please supply a valid binary file using '-b' flag.[/bold red]")
+            rerr.print(f"[yellow] {e} [/yellow]")
+            return 0
         except Exception as e:
-            print("[!] Error, please supply a valid binary file using '-b'.")
-            parser.print_help()
-            exit(-1)
-    if args.upload:
-        # upload binary first, them carry out actions
-        print(f"[!] RE:upload not implemented. Use analyse.")
-        exit(-1)
-    if args.analyse:
-        api.RE_analyse(args.binary, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
+            rerr.print(f"[bold red][!] Error, binary exec type could not be verified:[/bold red] {args.binary}")
+            rerr.print(f"[yellow] {e} [/yellow]")
-    elif args.extract:
-        embeddings = api.RE_embeddings(args.binary, args.model)
-        print_json(data=embeddings)
+        if args.upload:
+            api.RE_upload(args.binary)
-    elif args.signature and not args.ann:
-        # Arithetic mean of symbol embeddings
-        b_embed = api.RE_signature(args.binary, args.model)
-        print_json(data=b_embed)
+            if not args.analyse:
+                return 0
-    elif args.similarity:
-        #compute binary similarity from list of executables
-        if args.from_file:
-            binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
-        else:
-            if not args.to:
-                print(f"Error, please specify --from-file or --to to compute binary similarity against")
-                exit(-1)
-            binaries = args.to.split(",")
-        # verify all binaries are valid files
-        for b in binaries:
-            verify_binary(b)
-        binary_similarity(args.binary, binaries, args.model)
-    elif args.ann:
-        source = None
-        # parse embedding json file
-        if args.embedding:
-            if not isfile(args.embedding):
-                print("[!] Error, please supply a valid embedding JSON file using '-e'")
-                parser.print_help()
-                exit(-1)
-            embedding = json.loads(open(args.embedding, 'r').read())
-        elif (args.symbol or args.start_vaddr) and args.binary:
-            if args.start_vaddr:
-                if args.start_vaddr.upper()[:2] == "0X":
-                    vaddr = int(args.start_vaddr, 16) + base_address
-                else:
-                    vaddr = int(args.start_vaddr) + base_address
-                print(f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
-                embeddings = api.RE_embeddings(args.binary, args.model)
-                matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
-                if len(matches) == 0:
-                    print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
-                    exit(-1)
-                embedding = matches[0]['embedding']
-            else:
-                symb_name = args.symbol
-                print(f"[+] Using symbol {args.symbol} from {args.binary}")
-                embeddings = api.RE_embeddings(args.binary, args.model)
-                matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
-                if len(matches) == 0:
-                    print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
-                    exit(-1)
-                embedding = matches[0]['embedding']
-        elif args.binary and args.signature:
-            print(f"[+] Searching ANN for binary embeddings {args.binary}")
-            api.RE_nearest_binaries(api.RE_signature(args.binary, args.model), args.model, args.nns, args.collections)
-            exit(0)
-        else:
-            print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).")
-            parser.print_help()
-            exit(-1)
-        # check for valid regex
-        if args.collections:
-            try:
-                re.compile(args.collections)
-            except re.error as e:
-                print(f"[!] Error, invalid regex for collections - {args.collections}")
-                exit(-1)
-        if args.found_in:
-            if not os.path.isfile(args.found_in):
-                print("[!] Error, --found-in flag requires a path to a binary to search from")
-                exit(-1)
-            print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
-            embeddings = api.RE_embeddings(args.found_in, args.model)
-            res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
-            print_json(data=res)
-        elif args.from_file:
-            if not os.path.isfile(args.from_file):
-                print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
-                exit(-1)
-            print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
-            res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
-            print_json(data=res)
-        else:
-            print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
-            api.RE_nearest_symbols(embedding, args.model, int(args.nns), collections=args.collections)
-    elif args.match:
-        embeddings = None
-        if args.from_file:
-            embeddings = json.load(open(args.from_file, 'r'))
-        elif args.found_in:
-            if not os.path.isfile(args.found_in):
-                print("[!] Error, --found-in flag requires a path to a binary to search from")
-                exit(-1)
-            print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
-            embeddings = api.RE_embeddings(args.found_in, args.model)
-        else:
-            print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
-            exit(-1)
-        confidence = 0.99
-        if args.confidence:
-            confidences = {
-                'high': 0.99,
-                'medium': 0.95,
-                'low': 0.9,
-                'all': 0.0
-            }
-            if args.confidence in confidences.keys():
-                confidence = confidences[args.confidence]
+        # upload binary first, them carry out actions
+        if args.analyse:
+            api.RE_analyse(args.binary, model_name=api.re_conf["model"], isa_options=args.isa,
+                           platform_options=args.platform, dynamic_execution=args.dynamic_execution,
+                           command_line_args=args.cmd_line_args, file_options=args.exec_format,
+                           binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
+                           duplicate=args.duplicate, debug_fpath=args.debug)
+        elif args.extract:
+            embeddings = api.RE_embeddings(args.binary).json()
+            print_json(data=embeddings)
+        elif args.match:
+            # parse confidences
+            confidence: float = 0.90
+            if args.confidence:
+                confidences = {
+                    "high": 0.95,
+                    "medium": 0.9,
+                    "low": 0.7,
+                    "partial": 0.5,
+                    "all": 0.0
+                }
+                if args.confidence in confidences.keys():
+                    confidence = confidences[args.confidence]
+            if args.from_file:
+                if not os.path.isfile(args.from_file) and not os.access(args.from_file, os.R_OK):
+                    rerr.print("[bold red][!] Error, '--from-file' flag requires a path to a JSON embeddings file.[/bold red]")
+                    return -1
+                rout.print(f"[+] Searching for symbols similar to embedding in binary: {args.from_file}")
+                embeddings = json.load(open(args.from_file))
+            elif args.found_in:
+                if not os.path.isfile(args.found_in) and not os.access(args.found_in, os.R_OK):
+                    rerr.print("[bold red][!] Error, '--found-in' flag requires a path to a binary to search from.[/bold red]")
+                    return -1
+                rout.print(f"[+] Matching symbols between {args.binary} and {args.found_in}.")
+                embeddings = api.RE_embeddings(args.found_in).json()["data"]["embedding"]
             else:
-                confidence = float(args.confidence)
-        match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
+                return match_for_each(args.binary, confidence, args.nns)
+            match(args.binary, embeddings, confidence=confidence, deviation=float(args.deviation))
+        elif args.logs:
+            api.RE_logs(args.binary)
-    elif args.sca:
-        api.RE_sca(args.binary)
+        elif args.delete:
+            api.RE_delete(args.binary)
-    elif args.logs:
-        api.RE_logs(args.binary, args.model)
+        elif args.sbom:
+            api.RE_SBOM(args.binary)
-    elif args.delete:
-        api.RE_delete(args.binary, args.model)
+        elif args.cves:
+            api.RE_cves(args.binary)
-    elif args.sbom:
-        api.RE_SBOM(args.binary, args.model)
+        elif args.status:
+            api.RE_status(args.binary, console=True)
-    elif args.cves:
-        api.RE_cves(args.binary, args.model)
     else:
-        print("[!] Error, please supply an action command")
+        rerr.print("[bold red][!] Error, please supply an action command.[/bold red]")
         parser.print_help()
+    return 0
-if __name__ == '__main__':
-    main()
+if __name__ == "__main__":
+    exit(main())

reait 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl

reait 0.0.19py3-none-any.whl → 1.0.0py3-none-any.whl