reait 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/main.py CHANGED
@@ -1,75 +1,73 @@
1
- #!/usr/bin/env python
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
2
3
  from __future__ import print_function
3
- from hashlib import sha256
4
- from rich import print_json, print as rich_print
4
+
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from rich import print_json
5
10
  from rich.progress import track
6
11
  from rich.console import Console
7
- from rich.table import Table
8
12
  import os
9
- import re
10
13
  import argparse
11
- import requests
12
- from numpy import array, vstack, mean, average
13
- from pandas import DataFrame
14
14
  import json
15
- import tomli
16
- from os.path import isfile, getsize
17
- from sys import exit
18
- from IPython import embed
19
- from reait import api
15
+ from sys import exit, stdout, stderr
20
16
  from scipy.spatial import distance
21
- from scipy.special import expit
22
17
  from glob import iglob
23
18
  import numpy as np
24
19
 
25
- def version():
20
+ import api
21
+
22
+ rerr = Console(file=stderr, width=180)
23
+ rout = Console(file=stdout, width=180)
24
+
25
+
26
+ def version() -> int:
26
27
  """
27
28
  Display program version
28
29
  """
29
- rich_print(f"[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]")
30
+ rout.print(f"""[bold blue] :::::::: ::::::::
31
+ :: :::: ::: :::
32
+ ::::::::::::::::::::
33
+ ::::: ::: ::::::
34
+ ::::::::::::::
35
+ .:: ::: ::::
36
+ :::::: ::: :::::::
37
+ :: ::::::::::: :::
38
+ :: ::::: :::: :::
39
+ :::::::: :::::::: [/bold blue]
40
+ [bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]
41
+ """)
42
+ rout.print("[yellow]Config:[/yellow]")
30
43
  print_json(data=api.re_conf)
44
+ return 0
31
45
 
32
46
 
33
- def verify_binary(fpath_fmt: str):
34
- fmt = None
35
- fpath = fpath_fmt
36
-
37
- if ':' in fpath_fmt:
38
- fpath, fmt = fpath_fmt.split(':')
39
-
40
- if not os.path.isfile(fpath):
41
- raise RuntimeError(f"File path {fpath} is not a file")
47
+ def verify_binary(fpath_fmt: str) -> tuple[str, str, str]:
48
+ fpath = fpath_fmt
42
49
 
43
- if getsize(fpath) > 1024 * 1024 * 10:
44
- raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
45
-
46
- if not fmt:
47
- exec_format, exec_isa = api.file_type(fpath)
48
- else:
49
- if '-' not in fmt:
50
- raise RuntimeError('Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
51
-
52
- exec_format, exec_isa = fmt.split('-')
50
+ exec_format, exec_isa = api.file_type(fpath)
53
51
 
54
52
  return fpath, exec_format, exec_isa
55
53
 
56
54
 
57
- def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1):
55
+ def match(fpath: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1) -> None:
58
56
  """
59
57
  Match embeddings in fpath from a list of embeddings
60
58
  """
61
- print(f"Matching symbols from {fpath} with confidence {confidence}")
62
- sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
63
- b_embeds = api.RE_embeddings(fpath, model_name)
64
- source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
59
+ rout.print(f"Matching symbols from {fpath} with confidence {confidence}.")
60
+ sink_embed_mat = np.vstack(list(map(lambda x: x["embedding"], embeddings)))
61
+ b_embeds = api.RE_embeddings(fpath).json()["data"]
62
+ source_embed_mat = np.vstack(list(map(lambda x: x["embedding"], b_embeds)))
65
63
  # angular distance over cosine
66
- #closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
64
+ # closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
67
65
  closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
68
66
  # rescale to separate high end of (-1, 1.0)
69
- closest = rescale_sim(closest)
67
+ # closest = rescale_sim(closest)
70
68
  i, j = closest.shape
71
69
 
72
- for _i in track(range(i), description='Matching Symbols...'):
70
+ for _i in track(range(i), description="Matching Symbols..."):
73
71
  row = closest[_i, :]
74
72
  match_index, second_match = row.argsort()[::-1][:2]
75
73
  source_index = _i
@@ -78,321 +76,293 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
78
76
  sink_symb = embeddings[sink_index]
79
77
  m_confidence = row[match_index]
80
78
  s_confidence = row[second_match]
81
-
79
+
82
80
  if row[match_index] >= confidence:
83
- rich_print(f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
81
+ rout.print(
82
+ f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t"
83
+ f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
84
+ f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
84
85
  elif (m_confidence - s_confidence) > deviation:
85
- rich_print(f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
86
+ rout.print(
87
+ f"[bold magenta]Possible match[/bold magenta][yellow]\t"
88
+ f"Confidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t"
89
+ f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
90
+ f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
86
91
  else:
87
- #rich_print(f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
92
+ rerr.print(
93
+ f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t"
94
+ f"{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
88
95
  pass
89
96
 
90
-
91
- def rescale_sim(x):
97
+
98
+ def match_for_each(fpath: str, confidence: float = 0.9, nns: int = 1) -> int:
92
99
  """
93
- Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values seperated, map to hyperbolic space
100
+ Match embeddings in fpath from a list of embeddings
94
101
  """
95
- return np.power(x, 5)
102
+ nns = max(nns, 1)
103
+
104
+ rout.print(f"Matching symbols from '{fpath}' with a confidence {confidence:.02f} and up to "
105
+ f"{nns} result{'' if nns == 1 else 's'} per function")
106
+ functions = api.RE_analyze_functions(fpath).json()["functions"]
107
+ function_matches = api.RE_nearest_functions(fpath, nns=nns, distance=1 - confidence).json()["function_matches"]
108
+
109
+ if len(function_matches) == 0:
110
+ rerr.print(f"[bold red]No matches found for a confidence of [/bold red] {confidence:.02f}")
111
+ return -1
112
+ else:
113
+ for function in functions:
114
+ matches = list(filter(lambda x: function["function_id"] == x["origin_function_id"], function_matches))
115
+
116
+ if len(matches):
117
+ rout.print(f"[bold green]Found {len(matches)} match{'' if len(matches) == 1 else 'es'} for "
118
+ f"[/bold green][blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
96
119
 
97
- def binary_similarity(fpath: str, fpaths: list, model_name: str):
120
+ for match in matches:
121
+ rout.print(f"\t[yellow]Confidence: {match['confidence']:.05f}[/yellow]"
122
+ f"\t[blue]{match['nearest_neighbor_function_name']}"
123
+ f" ({match['nearest_neighbor_binary_name']})[/blue]")
124
+ else:
125
+ rout.print(f"[bold red]No matches found for[/bold red] "
126
+ f"[blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
127
+ return 0
128
+
129
+
130
+ def parse_collections(collections: str) -> Optional[list[str]]:
98
131
  """
99
- Compute binary similarity between source and list of binary files
132
+ Return collections as list from CSV
100
133
  """
101
- console = Console()
134
+ if not collections:
135
+ return None
136
+ return collections.split(",")
102
137
 
103
- table = Table(title=f"Binary Similarity to {fpath}")
104
- table.add_column("Binary", justify="right", style="cyan", no_wrap=True)
105
- table.add_column("SHA3-256", style="magenta", no_wrap=True)
106
- table.add_column("Similarity", style="yellow", no_wrap=True)
107
138
 
108
- b_embed = api.RE_signature(fpath, model_name)
139
+ def rescale_sim(x):
140
+ """
141
+ Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low,
142
+ high values separated, map to hyperbolic space
143
+ """
144
+ return np.power(x, 5)
109
145
 
110
- b_sums = []
111
- for b in track(fpaths, description='Computing Binary Similarity...'):
112
- try:
113
- b_sum = api.RE_signature(b, model_name)
114
- b_sums.append(b_sum)
115
- except Exception as e:
116
- console.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.binary_id(b)}[/green bold]")
117
- console.print(e)
118
146
 
119
- if len(b_sums) > 0:
120
- #closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
121
- closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
147
+ def validate_file(arg):
148
+ file = Path(arg)
149
+ if file.is_file():
150
+ return file.absolute()
151
+ raise FileNotFoundError(f"File path {arg} does not exists.")
122
152
 
123
- for binary, similarity in zip(fpaths, closest.tolist()[0]):
124
- table.add_row(os.path.basename(binary), api.binary_id(binary), f"{rescale_sim(similarity):.05f}")
125
153
 
126
- console.print(table)
154
+ def validate_dir(arg):
155
+ dir = Path(arg)
156
+ if dir.is_dir():
157
+ return dir.absolute()
158
+ raise NotADirectoryError(f"Directory path {arg} does not exists.")
127
159
 
128
160
 
129
- def main() -> None:
161
+ def main() -> int:
130
162
  """
131
163
  Tool entry
132
164
  """
133
165
  parser = argparse.ArgumentParser(add_help=False)
134
- parser.add_argument("-b", "--binary", default="", help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
135
- parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
136
- parser.add_argument("-a", "--analyse", action='store_true', help="Perform a full analysis and generate embeddings for every symbol")
137
- parser.add_argument("--no-embeddings", action='store_true', help="Only perform binary analysis. Do not generate embeddings for symbols")
166
+ parser.add_argument("-b", "--binary", type=validate_file,
167
+ help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
168
+ parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
169
+ parser.add_argument("-D", "--dir", type=validate_dir, help="Path of directory to recursively analyse")
170
+ parser.add_argument("-a", "--analyse", action="store_true",
171
+ help="Perform a full analysis and generate embeddings for every symbol")
138
172
  parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
139
- parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
140
- parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
141
- parser.add_argument("-n", "--ann", action='store_true', help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
142
- parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
143
- parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
144
- parser.add_argument("--collections", default=None, help="Regex string to select RevEng.AI collections for filtering e.g., libc")
173
+ parser.add_argument("-A", action="store_true", help="Upload and Analyse a new binary")
174
+ parser.add_argument("-u", "--upload", action="store_true", help="Upload a new binary to remote server")
175
+ parser.add_argument("--duplicate", default=False, action="store_true", help="Duplicate an existing binary")
176
+ parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
177
+ parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch", type=int)
178
+ parser.add_argument("--collections", default=None,
179
+ help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
145
180
  parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
146
- parser.add_argument("--from-file", help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
181
+ parser.add_argument("--from-file",
182
+ help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
147
183
  parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
148
- parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
149
184
  parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
150
- parser.add_argument("-m", "--model", default="binnet-0.1", help="AI model used to generate embeddings")
151
- parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
152
- parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
153
- parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
154
- parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
155
- parser.add_argument("-S", "--similarity", action='store_true', help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
156
- parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
157
- parser.add_argument("-M", "--match", action='store_true', help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
158
- parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
159
- parser.add_argument("--deviation", default=0.2, help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
160
- parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
161
- parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
162
- parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
185
+ parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
186
+ parser.add_argument("-x", "--extract", action="store_true", help="Fetch embeddings for binary")
187
+ parser.add_argument("-M", "--match", action="store_true",
188
+ help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
189
+ parser.add_argument("--confidence", default="high", choices=["high", "medium", "low", "partial", "all"],
190
+ help="Confidence threshold used to match symbols. Valid values are 'all', 'medium', 'low', 'partial' or 'high'[DEFAULT]")
191
+ parser.add_argument("--deviation", default=0.1, type=float,
192
+ help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
193
+ parser.add_argument("-l", "--logs", action="store_true", help="Fetch analysis log file for binary")
194
+ parser.add_argument("-d", "--delete", action="store_true", help="Delete all metadata associated with binary")
195
+ parser.add_argument("-k", "--apikey", help="RevEng.AI Personal API key")
163
196
  parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
164
197
  parser.add_argument("-v", "--version", action="store_true", help="Display version information")
165
- parser.add_argument("--help", action="help", default=argparse.SUPPRESS, help=argparse._('Show this help message and exit'))
198
+ parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
199
+ help=argparse._("Show this help message and exit"))
166
200
  parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
167
- parser.add_argument("--exec-format", default=None, help="Override executable format. Valid values are pe, elf, macho, raw")
168
- parser.add_argument("--platform", default=None, help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
169
- parser.add_argument("--dynamic-execution", default=False, action='store_true', help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
170
- parser.add_argument("--cmd-line-args", default="", help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
171
- parser.add_argument("--scope", default="private", help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
172
- parser.add_argument("--tags", default=None, help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
201
+ parser.add_argument("--exec-format", default=None,
202
+ help="Override executable format. Valid values are pe, elf, macho, raw")
203
+ parser.add_argument("--platform", default=None,
204
+ help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
205
+ parser.add_argument("--dynamic-execution", default=False, action="store_true",
206
+ help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
207
+ parser.add_argument("--cmd-line-args", default="",
208
+ help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
209
+ parser.add_argument("--scope", default="private", choices=["public", "private"],
210
+ help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
211
+ parser.add_argument("--tags", default=None, type=str,
212
+ help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3.")
213
+ parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
214
+ parser.add_argument("--verbose", default=False, action="store_true", help="Set verbose output.")
215
+ parser.add_argument("--debug", default=None, help="Debug file path to write pass with analysis")
216
+ parser.add_argument("-s", "--status", action="store_true", help="Ongoing status of the provided binary")
217
+
173
218
  args = parser.parse_args()
174
219
 
175
- if args.apikey:
176
- api.re_conf['apikey'] = args.apikey
177
- if args.host:
178
- api.re_conf['host'] = args.host
179
- if args.model:
180
- api.re_conf['model'] = args.model
220
+ # set re_conf args
221
+ for arg in ("apikey", "host", "model",):
222
+ if getattr(args, arg):
223
+ api.re_conf[arg] = getattr(args, arg)
224
+
225
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
181
226
 
182
227
  # display version and exit
183
228
  if args.version:
184
- version()
185
- exit(0)
186
-
187
- exec_fmt = None
188
- exec_isa = None
189
- base_address = 0
190
- if args.base_address:
191
- if args.base_address.upper()[:2] == "0X":
192
- base_address = int(args.base_address, 16)
193
- else:
194
- base_address = int(args.base_address)
229
+ return version()
195
230
 
231
+ # validate length of string tags
232
+ tags = None
233
+ if args.tags:
234
+ tags = parse_collections(args.tags)
196
235
 
197
- if args.dir:
198
- if not os.path.isdir(args.dir):
199
- rich_print(f'Error, {args.dir} is not a valid directory path')
200
- exit(-1)
236
+ collections = None
237
+ if args.collections:
238
+ collections = parse_collections(args.collections)
239
+
240
+ # auto analysis, uploads and starts analysis
241
+ if args.A:
242
+ args.upload = args.analyse = True
201
243
 
202
- files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
244
+ if args.dir:
245
+ files = iglob(os.path.abspath(args.dir) + "/**/*", recursive=True)
203
246
  ## perform operation on all files inside directory
204
- for file in track(files, description='Files in directory'):
247
+ for file in track(files, description="Files in directory"):
205
248
  if not os.path.isfile(file):
206
- #rich_print(f'[blue]Skipping non-file[/blue] {file}')
249
+ rerr.print(f"[blue]Skipping non-file:[/blue] {file}")
207
250
  continue
208
251
 
252
+ # upload binary
253
+ if args.upload:
254
+ api.RE_upload(file)
255
+
209
256
  if args.analyse:
210
257
  try:
211
258
  fpath, exec_fmt, exec_isa = verify_binary(file)
212
- rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
213
- rich_print(f'[green bold]Analysing[/green bold] {file}')
214
- api.RE_analyse(file, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
259
+ rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
215
260
  except Exception as e:
216
- rich_print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
261
+ rerr.print(f"[red bold][!] Error, binary exec type could not be verified:[/red bold] {file}")
262
+ rerr.print(f"[yellow] {e} [/yellow]")
263
+
264
+ rout.print(f"[green bold]Analysing:[/green bold] {file}")
265
+ api.RE_analyse(file, model_name=api.re_conf["model"], isa_options=args.isa,
266
+ platform_options=args.platform, dynamic_execution=args.dynamic_execution,
267
+ command_line_args=args.cmd_line_args, file_options=args.exec_format,
268
+ binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
269
+ duplicate=args.duplicate, debug_fpath=args.debug)
217
270
 
218
- elif args.delete:
271
+ if args.delete:
219
272
  try:
220
- rich_print(f'[green bold]Deleting analyses for[/green bold] {file}')
221
- api.RE_delete(args.binary, args.model)
273
+ rout.print(f"[green bold]Deleting analyses for:[/green bold] {file}")
274
+ api.RE_delete(file)
222
275
  except Exception as e:
223
- rich_print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
224
- else:
225
- rich_print(f'Error, -D only supports analyse or delete')
226
- exit(-1)
227
-
228
- exit(0)
229
-
230
- if args.A or args.analyse or args.extract or args.logs or args.delete or args.signature or args.similarity or args.upload or args.match or args.sbom:
231
- # verify binary is a file
276
+ rerr.print(f"[red bold][!] Error, could not delete analysis for:[/red bold] {file}")
277
+ rerr.print(f"[yellow] {e} [/yellow]")
278
+
279
+ if not (args.upload or args.analyse or args.delete):
280
+ rerr.print(f"Error, '-D' flag only supports upload, analyse, or delete.")
281
+ return -1
282
+ elif args.analyse or args.extract or args.logs or args.delete or \
283
+ args.upload or args.match or args.cves or args.sbom or args.status:
232
284
  try:
233
285
  fpath, exec_fmt, exec_isa = verify_binary(args.binary)
234
- rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
286
+ # keep stdout to data only
287
+ rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
235
288
  args.binary = fpath
289
+ except TypeError as e:
290
+ rerr.print("[bold red][!] Error, please supply a valid binary file using '-b' flag.[/bold red]")
291
+ rerr.print(f"[yellow] {e} [/yellow]")
292
+ return 0
236
293
  except Exception as e:
237
- print("[!] Error, please supply a valid binary file using '-b'.")
238
- parser.print_help()
239
- exit(-1)
240
-
241
- if args.upload:
242
- # upload binary first, them carry out actions
243
- print(f"[!] RE:upload not implemented. Use analyse.")
244
- exit(-1)
245
-
246
- if args.analyse:
247
- api.RE_analyse(args.binary, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
294
+ rerr.print(f"[bold red][!] Error, binary exec type could not be verified:[/bold red] {args.binary}")
295
+ rerr.print(f"[yellow] {e} [/yellow]")
248
296
 
249
- elif args.extract:
250
- embeddings = api.RE_embeddings(args.binary, args.model)
251
- print_json(data=embeddings)
297
+ if args.upload:
298
+ api.RE_upload(args.binary)
252
299
 
253
- elif args.signature and not args.ann:
254
- # Arithetic mean of symbol embeddings
255
- b_embed = api.RE_signature(args.binary, args.model)
256
- print_json(data=b_embed)
300
+ if not args.analyse:
301
+ return 0
257
302
 
258
- elif args.similarity:
259
- #compute binary similarity from list of executables
260
- if args.from_file:
261
- binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
262
- else:
263
- if not args.to:
264
- print(f"Error, please specify --from-file or --to to compute binary similarity against")
265
- exit(-1)
266
- binaries = args.to.split(",")
267
-
268
- # verify all binaries are valid files
269
- for b in binaries:
270
- verify_binary(b)
271
-
272
- binary_similarity(args.binary, binaries, args.model)
273
-
274
- elif args.ann:
275
- source = None
276
- # parse embedding json file
277
-
278
- if args.embedding:
279
- if not isfile(args.embedding):
280
- print("[!] Error, please supply a valid embedding JSON file using '-e'")
281
- parser.print_help()
282
- exit(-1)
283
-
284
- embedding = json.loads(open(args.embedding, 'r').read())
285
-
286
- elif (args.symbol or args.start_vaddr) and args.binary:
287
- if args.start_vaddr:
288
- if args.start_vaddr.upper()[:2] == "0X":
289
- vaddr = int(args.start_vaddr, 16) + base_address
290
- else:
291
- vaddr = int(args.start_vaddr) + base_address
292
-
293
- print(f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
294
- embeddings = api.RE_embeddings(args.binary, args.model)
295
- matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
296
- if len(matches) == 0:
297
- print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
298
- exit(-1)
299
- embedding = matches[0]['embedding']
300
- else:
301
- symb_name = args.symbol
302
- print(f"[+] Using symbol {args.symbol} from {args.binary}")
303
-
304
- embeddings = api.RE_embeddings(args.binary, args.model)
305
- matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
306
- if len(matches) == 0:
307
- print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
308
- exit(-1)
309
- embedding = matches[0]['embedding']
310
- elif args.binary and args.signature:
311
- print(f"[+] Searching ANN for binary embeddings {args.binary}")
312
- api.RE_nearest_binaries(api.RE_signature(args.binary, args.model), args.model, args.nns, args.collections)
313
- exit(0)
314
- else:
315
- print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).")
316
- parser.print_help()
317
- exit(-1)
318
-
319
-
320
- # check for valid regex
321
- if args.collections:
322
- try:
323
- re.compile(args.collections)
324
- except re.error as e:
325
- print(f"[!] Error, invalid regex for collections - {args.collections}")
326
- exit(-1)
327
-
328
- if args.found_in:
329
- if not os.path.isfile(args.found_in):
330
- print("[!] Error, --found-in flag requires a path to a binary to search from")
331
- exit(-1)
332
- print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
333
- embeddings = api.RE_embeddings(args.found_in, args.model)
334
- res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
335
- print_json(data=res)
336
- elif args.from_file:
337
- if not os.path.isfile(args.from_file):
338
- print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
339
- exit(-1)
340
- print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
341
- res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
342
- print_json(data=res)
343
- else:
344
- print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
345
- api.RE_nearest_symbols(embedding, args.model, int(args.nns), collections=args.collections)
346
-
347
-
348
- elif args.match:
349
- embeddings = None
350
- if args.from_file:
351
- embeddings = json.load(open(args.from_file, 'r'))
352
- elif args.found_in:
353
- if not os.path.isfile(args.found_in):
354
- print("[!] Error, --found-in flag requires a path to a binary to search from")
355
- exit(-1)
356
- print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
357
- embeddings = api.RE_embeddings(args.found_in, args.model)
358
- else:
359
- print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
360
- exit(-1)
361
-
362
- confidence = 0.99
363
- if args.confidence:
364
- confidences = {
365
- 'high': 0.99,
366
- 'medium': 0.95,
367
- 'low': 0.9,
368
- 'all': 0.0
369
- }
370
- if args.confidence in confidences.keys():
371
- confidence = confidences[args.confidence]
303
+ # upload binary first, them carry out actions
304
+ if args.analyse:
305
+ api.RE_analyse(args.binary, model_name=api.re_conf["model"], isa_options=args.isa,
306
+ platform_options=args.platform, dynamic_execution=args.dynamic_execution,
307
+ command_line_args=args.cmd_line_args, file_options=args.exec_format,
308
+ binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
309
+ duplicate=args.duplicate, debug_fpath=args.debug)
310
+
311
+ elif args.extract:
312
+ embeddings = api.RE_embeddings(args.binary).json()
313
+ print_json(data=embeddings)
314
+
315
+ elif args.match:
316
+ # parse confidences
317
+ confidence: float = 0.90
318
+ if args.confidence:
319
+ confidences = {
320
+ "high": 0.95,
321
+ "medium": 0.9,
322
+ "low": 0.7,
323
+ "partial": 0.5,
324
+ "all": 0.0
325
+ }
326
+ if args.confidence in confidences.keys():
327
+ confidence = confidences[args.confidence]
328
+
329
+ if args.from_file:
330
+ if not os.path.isfile(args.from_file) and not os.access(args.from_file, os.R_OK):
331
+ rerr.print("[bold red][!] Error, '--from-file' flag requires a path to a JSON embeddings file.[/bold red]")
332
+ return -1
333
+ rout.print(f"[+] Searching for symbols similar to embedding in binary: {args.from_file}")
334
+ embeddings = json.load(open(args.from_file))
335
+ elif args.found_in:
336
+ if not os.path.isfile(args.found_in) and not os.access(args.found_in, os.R_OK):
337
+ rerr.print("[bold red][!] Error, '--found-in' flag requires a path to a binary to search from.[/bold red]")
338
+ return -1
339
+ rout.print(f"[+] Matching symbols between {args.binary} and {args.found_in}.")
340
+ embeddings = api.RE_embeddings(args.found_in).json()["data"]["embedding"]
372
341
  else:
373
- confidence = float(args.confidence)
374
-
375
- match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
342
+ return match_for_each(args.binary, confidence, args.nns)
343
+
344
+ match(args.binary, embeddings, confidence=confidence, deviation=float(args.deviation))
376
345
 
346
+ elif args.logs:
347
+ api.RE_logs(args.binary)
377
348
 
378
- elif args.sca:
379
- api.RE_sca(args.binary)
349
+ elif args.delete:
350
+ api.RE_delete(args.binary)
380
351
 
381
- elif args.logs:
382
- api.RE_logs(args.binary, args.model)
352
+ elif args.sbom:
353
+ api.RE_SBOM(args.binary)
383
354
 
384
- elif args.delete:
385
- api.RE_delete(args.binary, args.model)
355
+ elif args.cves:
356
+ api.RE_cves(args.binary)
386
357
 
387
- elif args.sbom:
388
- api.RE_SBOM(args.binary, args.model)
358
+ elif args.status:
359
+ api.RE_status(args.binary, console=True)
389
360
 
390
- elif args.cves:
391
- api.RE_cves(args.binary, args.model)
392
361
  else:
393
- print("[!] Error, please supply an action command")
362
+ rerr.print("[bold red][!] Error, please supply an action command.[/bold red]")
394
363
  parser.print_help()
364
+ return 0
395
365
 
396
366
 
397
- if __name__ == '__main__':
398
- main()
367
+ if __name__ == "__main__":
368
+ exit(main())