PyPI - reait - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

reait 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

reait/__init__.py +3 -0
reait/api.py +429 -174
reait/main.py +246 -130
{reait-0.0.18.dist-info → reait-0.0.20.dist-info}/METADATA +45 -21
reait-0.0.20.dist-info/RECORD +9 -0
{reait-0.0.18.dist-info → reait-0.0.20.dist-info}/WHEEL +1 -1
reait-0.0.18.dist-info/RECORD +0 -9
{reait-0.0.18.dist-info → reait-0.0.20.dist-info}/LICENSE +0 -0
{reait-0.0.18.dist-info → reait-0.0.20.dist-info}/entry_points.txt +0 -0
{reait-0.0.18.dist-info → reait-0.0.20.dist-info}/top_level.txt +0 -0

reait/main.py CHANGED Viewed

@@ -1,53 +1,67 @@
 #!/usr/bin/env python
 from __future__ import print_function
-from hashlib import sha256
-from rich import print_json, print as rich_print
+import logging
+from rich import print_json
 from rich.progress import track
 from rich.console import Console
 from rich.table import Table
 import os
-import re
 import argparse
-import requests
-from numpy import array, vstack, mean, average
-from pandas import DataFrame
 import json
-import tomli
-from os.path import isfile, getsize
-from sys import exit
-from IPython import embed
-from reait import api
+from os.path import isfile
+from sys import exit, stdout, stderr
+from reait import api, __version__
 from scipy.spatial import distance
-from scipy.special import expit
 from glob import iglob
 import numpy as np
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from multiprocessing import cpu_count
+rerr = Console(file=stderr)
+rout = Console(file=stdout)
 def version():
     """
         Display program version
     """
-    rich_print(f"[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]")
+    rout.print(f"""[bold blue] ::::::::    ::::::::
+::  ::::    :::  :::
+::::::::::::::::::::
+:::::   :::   ::::::
+   ::::::::::::::
+   .::  :::  ::::
+::::::  :::  :::::::
+::  :::::::::::  :::
+::  :::::  ::::  :::
+::::::::    :::::::: [/bold blue]
+  [bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]
+""")
+    rout.print("[yellow]Config:[/yellow]")
     print_json(data=api.re_conf)
 def verify_binary(fpath_fmt: str):
-    fmt     = None
-    fpath   = fpath_fmt
+    fmt = None
+    fpath = fpath_fmt
-    if ':' in fpath_fmt:
-        fpath, fmt = fpath_fmt.split(':')
+    # if ':' in fpath_fmt:
+    #    fpath, fmt = fpath_fmt.split(':')
     if not os.path.isfile(fpath):
         raise RuntimeError(f"File path {fpath} is not a file")
-    if getsize(fpath) > 1024 * 1024 * 10:
-        raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
+    # if getsize(fpath) > 1024 * 1024 * 10:
+    #    raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
     if not fmt:
         exec_format, exec_isa = api.file_type(fpath)
     else:
         if '-' not in fmt:
-            raise RuntimeError('Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
+            raise RuntimeError(
+                'Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
         exec_format, exec_isa = fmt.split('-')
@@ -60,13 +74,13 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
     """
     print(f"Matching symbols from {fpath} with confidence {confidence}")
     sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
-    b_embeds = api.RE_embeddings(fpath, model_name)
+    b_embeds = api.RE_embeddings(fpath).json()
     source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
     # angular distance over cosine
-    #closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
+    # closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
     closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
     # rescale to separate high end of (-1, 1.0)
-    closest = rescale_sim(closest)
+    # closest = rescale_sim(closest)
     i, j = closest.shape
     for _i in track(range(i), description='Matching Symbols...'):
@@ -78,22 +92,74 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
         sink_symb = embeddings[sink_index]
         m_confidence = row[match_index]
         s_confidence = row[second_match]
         if row[match_index] >= confidence:
-            rich_print(f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
+            rout.print(
+                f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
         elif (m_confidence - s_confidence) > deviation:
-            rich_print(f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
+            rout.print(
+                f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
         else:
-            #rich_print(f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
+            rerr.print(
+                f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
             pass
+def match_for_each(fpath: str, model_name: str, confidence: float = 0.95, collections=None):
+    """
+    Match embeddings in fpath from a list of embeddings
+    """
+    if collections is None:
+        collections = []
+    print(f"Matching symbols from {fpath} with confidence {confidence}")
+    b_embeds = api.RE_embeddings(fpath).json()
+    b_hash = api.re_binary_id(fpath)
+    with ThreadPoolExecutor(max_workers=cpu_count()) as p:
+        # print(f"Collections: {collections}")
+        partial = lambda x: api.RE_nearest_symbols(x['embedding'], model_name, 1, collections=collections,
+                                                   ignore_hashes=[b_hash]).json()
+        res = {p.submit(partial, embed): embed for embed in b_embeds}
+        for future in track(as_completed(res), description='Matching Symbols...'):
+            # get result from future
+            symbol = res[future]
+            embedding = symbol['embedding']
+            # do ANN call to match symbols, ignore functions from current file
+            f_suggestions = api.RE_nearest_symbols(embedding, model_name, 1, collections=collections,
+                                                   ignore_hashes=[api.re_binary_id(fpath)]).json()
+            if len(f_suggestions) == 0:
+                # no match
+                rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
+                continue
+            matched = f_suggestions[0]
+            if matched['distance'] >= confidence:
+                rout.print(
+                    f"\t[bold green]Found match![/bold green][yellow]\tConfidence: {matched['distance']:.05f}[/yellow]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]\t->\t[blue]{matched['name']}:{matched['sha_256_hash']}")
+                continue
+            rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
+def parse_collections(collections: str):
+    """
+        Return collections as list from CSV
+    """
+    if not collections:
+        return None
+    return collections.split(',')
 def rescale_sim(x):
     """
-        Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values seperated, map to hyperbolic space
+        Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values separated, map to hyperbolic space
     """
     return np.power(x, 5)
 def binary_similarity(fpath: str, fpaths: list, model_name: str):
     """
     Compute binary similarity between source and list of binary files
@@ -105,25 +171,26 @@ def binary_similarity(fpath: str, fpaths: list, model_name: str):
     table.add_column("SHA3-256", style="magenta", no_wrap=True)
     table.add_column("Similarity", style="yellow", no_wrap=True)
-    b_embed = api.RE_signature(fpath, model_name)
+    b_embed = api.RE_signature(fpath).json()
     b_sums = []
     for b in track(fpaths, description='Computing Binary Similarity...'):
         try:
-            b_sum = api.RE_signature(b, model_name)
+            b_sum = api.RE_signature(b).json()
             b_sums.append(b_sum)
         except Exception as e:
-            console.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.binary_id(b)}[/green bold]")
-            console.print(e)
+            rerr.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.re_binary_id(b)}[/green bold]")
+            rerr.print(e)
     if len(b_sums) > 0:
-            #closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
-            closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
+        # closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
+        closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
-            for binary, similarity in zip(fpaths, closest.tolist()[0]):
-                table.add_row(os.path.basename(binary), api.binary_id(binary), f"{rescale_sim(similarity):.05f}")
+        for binary, similarity in zip(fpaths, closest.tolist()[0]):
+            # table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{rescale_sim(similarity):.05f}")
+            table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{similarity:.05f}")
-    console.print(table)
+    rout.print(table)
 def main() -> None:
@@ -131,53 +198,84 @@ def main() -> None:
     Tool entry
     """
     parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument("-b", "--binary", default="", help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
+    parser.add_argument("-b", "--binary", default="",
+                        help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
+    parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
     parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
-    parser.add_argument("-a", "--analyse", action='store_true', help="Perform a full analysis and generate embeddings for every symbol")
-    parser.add_argument("--no-embeddings", action='store_true', help="Only perform binary analysis. Do not generate embeddings for symbols")
+    parser.add_argument("-a", "--analyse", action='store_true',
+                        help="Perform a full analysis and generate embeddings for every symbol")
+    parser.add_argument("--no-embeddings", action='store_true',
+                        help="Only perform binary analysis. Do not generate embeddings for symbols")
     parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
     parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
     parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
-    parser.add_argument("-n", "--ann", action='store_true', help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
-    parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
+    parser.add_argument("--duplicate", default=False, action='store_true', help="Duplicate an existing binary")
+    parser.add_argument("-n", "--ann", action='store_true',
+                        help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
+    parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
     parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
-    parser.add_argument("--collections", default=None, help="Regex string to select RevEng.AI collections for filtering e.g., libc")
+    parser.add_argument("--collections", default=None,
+                        help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
     parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
-    parser.add_argument("--from-file", help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
+    parser.add_argument("--from-file",
+                        help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
     parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
-    parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
+    # parser.add_argument("-C", "--sca", action="store_true",
+    #                     help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
     parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
-    parser.add_argument("-m", "--model", default="binnet-0.1", help="AI model used to generate embeddings")
+    parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
     parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
     parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
     parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
     parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
-    parser.add_argument("-S", "--similarity", action='store_true', help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
+    parser.add_argument("-S", "--similarity", action='store_true',
+                        help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
     parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
-    parser.add_argument("-M", "--match", action='store_true', help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
+    parser.add_argument("-M", "--match", action='store_true',
+                        help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
     parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
-    parser.add_argument("--deviation", default=0.2, help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
+    parser.add_argument("--deviation", default=0.2,
+                        help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
     parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
     parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
     parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
     parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
     parser.add_argument("-v", "--version", action="store_true", help="Display version information")
-    parser.add_argument("--help", action="help", default=argparse.SUPPRESS, help=argparse._('Show this help message and exit'))
+    parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
+                        help=argparse._('Show this help message and exit'))
     parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
-    parser.add_argument("--exec-format", default=None, help="Override executable format. Valid values are pe, elf, macho, raw")
-    parser.add_argument("--platform", default=None, help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
-    parser.add_argument("--dynamic-execution", default=False, action='store_true', help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
-    parser.add_argument("--cmd-line-args", default="", help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
-    parser.add_argument("--scope", default="private", help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
-    parser.add_argument("--tags", default=None, help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
+    parser.add_argument("--exec-format", default=None,
+                        help="Override executable format. Valid values are pe, elf, macho, raw")
+    parser.add_argument("--platform", default=None,
+                        help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
+    parser.add_argument("--dynamic-execution", default=False, action='store_true',
+                        help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
+    parser.add_argument("--cmd-line-args", default="",
+                        help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
+    parser.add_argument("--scope", default="private", choices=["public", "private"],
+                        help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
+    parser.add_argument("--tags", default=None, type=str,
+                        help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
+    parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
+    parser.add_argument("--verbose", default=False, action='store_true', help="Set verbose output.")
     args = parser.parse_args()
-    if args.apikey:
-        api.re_conf['apikey'] = args.apikey
-    if args.host:
-        api.re_conf['host'] = args.host
-    if args.model:
-        api.re_conf['model'] = args.model
+    # set re_conf args
+    for arg in ('apikey', 'host', 'model'):
+        if getattr(args, arg):
+            api.re_conf[arg] = getattr(args, arg)
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+    # validate length of string tags
+    if args.tags:
+        # don't add non-content as tags
+        if len(args.tags.strip()) == 0:
+            args.tags = None
+        else:
+            # convert to list
+            args.tags = args.tags.split(',')
     # display version and exit
     if args.version:
@@ -193,36 +291,52 @@ def main() -> None:
         else:
             base_address = int(args.base_address)
+    collections = None
+    if args.collections:
+        collections = parse_collections(args.collections)
+    # auto analysis, uploads and starts analysis
+    if args.A:
+        args.upload = True
+        args.analyse = True
     if args.dir:
         if not os.path.isdir(args.dir):
-            rich_print(f'Error, {args.dir} is not a valid directory path')
+            rerr.print(f'Error, {args.dir} is not a valid directory path')
             exit(-1)
         files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
         ## perform operation on all files inside directory
         for file in track(files, description='Files in directory'):
             if not os.path.isfile(file):
-                #rich_print(f'[blue]Skipping non-file[/blue] {file}')
+                rerr.print(f'[blue]Skipping non-file[/blue] {file}')
                 continue
+            # upload binary
+            if args.upload:
+                api.RE_upload(file)
             if args.analyse:
                 try:
                     fpath, exec_fmt, exec_isa = verify_binary(file)
-                    rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
-                    rich_print(f'[green bold]Analysing[/green bold] {file}')
-                    api.RE_analyse(file, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
+                    rout.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
+                    rout.print(f'[green bold]Analysing[/green bold] {file}')
+                    api.RE_analyse(file, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
+                                   dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
+                                   file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags,
+                                   priority=args.priority, duplicate=args.duplicate)
                 except Exception as e:
-                    rich_print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
+                    rerr.print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
-            elif args.delete:
+            if args.delete:
                 try:
-                    rich_print(f'[green bold]Deleting analyses for[/green bold] {file}')
-                    api.RE_delete(args.binary, args.model)
+                    rout.print(f'[green bold]Deleting analyses for[/green bold] {file}')
+                    api.RE_delete(file)
                 except Exception as e:
-                    rich_print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
-            else:
-                rich_print(f'Error, -D only supports analyse or delete')
+                    rerr.print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
+                    rerr.print(f"[yellow] {e} [/yellow]")
+            if not (args.upload or args.analyse or args.delete):
+                rerr.print(f'Error, -D only supports upload, analyse, or delete')
                 exit(-1)
         exit(0)
@@ -231,32 +345,40 @@ def main() -> None:
         # verify binary is a file
         try:
             fpath, exec_fmt, exec_isa = verify_binary(args.binary)
-            rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
+            # keep stdout to data only
+            rerr.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
             args.binary = fpath
         except Exception as e:
-            print("[!] Error, please supply a valid binary file using '-b'.")
-            parser.print_help()
+            rerr.print(f"[bold red]{str(e)}[/bold red]")
+            rerr.print("[bold red][!] Error, please supply a valid binary file using '-b'.[/bold red]")
+            # parser.print_help()
             exit(-1)
     if args.upload:
+        api.RE_upload(args.binary)
+        if not args.analyse:
+            exit(0)
         # upload binary first, them carry out actions
-        print(f"[!] RE:upload not implemented. Use analyse.")
-        exit(-1)
     if args.analyse:
-        api.RE_analyse(args.binary, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
+        api.RE_analyse(args.binary, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
+                       dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
+                       file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags, priority=args.priority,
+                       duplicate=args.duplicate)
     elif args.extract:
-        embeddings = api.RE_embeddings(args.binary, args.model)
+        embeddings = api.RE_embeddings(args.binary).json()
         print_json(data=embeddings)
     elif args.signature and not args.ann:
         # Arithetic mean of symbol embeddings
-        b_embed = api.RE_signature(args.binary, args.model)
+        b_embed = api.RE_signature(args.binary).json()
         print_json(data=b_embed)
     elif args.similarity:
-        #compute binary similarity from list of executables
+        # compute binary similarity from list of executables
         if args.from_file:
             binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
         else:
@@ -272,9 +394,7 @@ def main() -> None:
         binary_similarity(args.binary, binaries, args.model)
     elif args.ann:
-        source = None
         # parse embedding json file
         if args.embedding:
             if not isfile(args.embedding):
                 print("[!] Error, please supply a valid embedding JSON file using '-e'")
@@ -282,7 +402,6 @@ def main() -> None:
                 exit(-1)
             embedding = json.loads(open(args.embedding, 'r').read())
         elif (args.symbol or args.start_vaddr) and args.binary:
             if args.start_vaddr:
                 if args.start_vaddr.upper()[:2] == "0X":
@@ -290,18 +409,18 @@ def main() -> None:
                 else:
                     vaddr = int(args.start_vaddr) + base_address
-                print(f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
-                embeddings = api.RE_embeddings(args.binary, args.model)
+                print(
+                    f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
+                embeddings = api.RE_embeddings(args.binary).json()
                 matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
                 if len(matches) == 0:
                     print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
                     exit(-1)
                 embedding = matches[0]['embedding']
             else:
-                symb_name = args.symbol
                 print(f"[+] Using symbol {args.symbol} from {args.binary}")
-                embeddings = api.RE_embeddings(args.binary, args.model)
+                embeddings = api.RE_embeddings(args.binary).json()
                 matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
                 if len(matches) == 0:
                     print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
@@ -309,28 +428,22 @@ def main() -> None:
                 embedding = matches[0]['embedding']
         elif args.binary and args.signature:
             print(f"[+] Searching ANN for binary embeddings {args.binary}")
-            api.RE_nearest_binaries(api.RE_signature(args.binary, args.model), args.model, args.nns, args.collections)
+            b_suggestions = api.RE_nearest_binaries(api.RE_signature(args.binary).json(), args.model, args.nns,
+                                                    collections, ignore_hashes=[api.re_binary_id(args.binary)])
+            print_json(data=b_suggestions)
             exit(0)
         else:
-            print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).")
-            parser.print_help()
+            rerr.print("[bold red][!] Error, please supply a valid embedding JSON file using '-e', or select a function"
+                       " using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).[/bold red]")
+            # parser.print_help()
             exit(-1)
-        # check for valid regex
-        if args.collections:
-            try:
-                re.compile(args.collections)
-            except re.error as e:
-                print(f"[!] Error, invalid regex for collections - {args.collections}")
-                exit(-1)
         if args.found_in:
             if not os.path.isfile(args.found_in):
                 print("[!] Error, --found-in flag requires a path to a binary to search from")
                 exit(-1)
             print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
-            embeddings = api.RE_embeddings(args.found_in, args.model)
+            embeddings = api.RE_embeddings(args.found_in).json()
             res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
             print_json(data=res)
         elif args.from_file:
@@ -341,12 +454,28 @@ def main() -> None:
             res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
             print_json(data=res)
         else:
-            print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
-            api.RE_nearest_symbols(embedding, args.model, int(args.nns), collections=args.collections)
+            print(f"[+] Searching for similar symbols to embedding in "
+                  f"{'all' if not args.collections else args.collections} collections.")
+            f_suggestions = api.RE_nearest_symbols(embedding["embedding"], args.model, int(args.nns),
+                                                   collections=collections).json()
+            print_json(data=f_suggestions)
     elif args.match:
-        embeddings = None
+        # parse confidences
+        confidence = 0.90
+        if args.confidence:
+            confidences = {
+                'high': 0.95,
+                'medium': 0.9,
+                'low': 0.7,
+                'partial': 0.5,
+                'all': 0.0
+            }
+            if args.confidence in confidences.keys():
+                confidence = confidences[args.confidence]
+            else:
+                confidence = float(args.confidence)
         if args.from_file:
             embeddings = json.load(open(args.from_file, 'r'))
         elif args.found_in:
@@ -354,41 +483,28 @@ def main() -> None:
                 print("[!] Error, --found-in flag requires a path to a binary to search from")
                 exit(-1)
             print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
-            embeddings = api.RE_embeddings(args.found_in, args.model)
+            embeddings = api.RE_embeddings(args.found_in).json()
         else:
-            print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
+            # print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
+            match_for_each(args.binary, args.model, confidence, collections)
             exit(-1)
-        confidence = 0.99
-        if args.confidence:
-            confidences = {
-                'high': 0.99,
-                'medium': 0.95,
-                'low': 0.9,
-                'all': 0.0
-            }
-            if args.confidence in confidences.keys():
-                confidence = confidences[args.confidence]
-            else:
-                confidence = float(args.confidence)
         match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
-    elif args.sca:
-        api.RE_sca(args.binary)
+    # elif args.sca:
+    #     api.RE_sca(args.binary)
     elif args.logs:
-        api.RE_logs(args.binary, args.model)
+        api.RE_logs(args.binary)
     elif args.delete:
-        api.RE_delete(args.binary, args.model)
+        api.RE_delete(args.binary)
     elif args.sbom:
-        api.RE_SBOM(args.binary, args.model)
+        api.RE_SBOM(args.binary)
     elif args.cves:
-        api.RE_cves(args.binary, args.model)
+        api.RE_cves(args.binary)
     else:
         print("[!] Error, please supply an action command")
         parser.print_help()

reait 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

reait 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl