reait 0.0.20__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/main.py CHANGED
@@ -1,29 +1,29 @@
1
- #!/usr/bin/env python
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
2
3
  from __future__ import print_function
3
4
 
4
5
  import logging
6
+ from pathlib import Path
7
+ from typing import Optional
5
8
 
6
9
  from rich import print_json
7
10
  from rich.progress import track
8
11
  from rich.console import Console
9
- from rich.table import Table
10
12
  import os
11
13
  import argparse
12
14
  import json
13
- from os.path import isfile
14
15
  from sys import exit, stdout, stderr
15
- from reait import api, __version__
16
16
  from scipy.spatial import distance
17
17
  from glob import iglob
18
18
  import numpy as np
19
- from concurrent.futures import ThreadPoolExecutor, as_completed
20
- from multiprocessing import cpu_count
21
19
 
22
- rerr = Console(file=stderr)
23
- rout = Console(file=stdout)
20
+ import api
24
21
 
22
+ rerr = Console(file=stderr, width=180)
23
+ rout = Console(file=stdout, width=180)
25
24
 
26
- def version():
25
+
26
+ def version() -> int:
27
27
  """
28
28
  Display program version
29
29
  """
@@ -37,45 +37,29 @@ def version():
37
37
  :: ::::::::::: :::
38
38
  :: ::::: :::: :::
39
39
  :::::::: :::::::: [/bold blue]
40
- [bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]
40
+ [bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]
41
41
  """)
42
42
  rout.print("[yellow]Config:[/yellow]")
43
43
  print_json(data=api.re_conf)
44
+ return 0
44
45
 
45
46
 
46
- def verify_binary(fpath_fmt: str):
47
- fmt = None
47
+ def verify_binary(fpath_fmt: str) -> tuple[str, str, str]:
48
48
  fpath = fpath_fmt
49
49
 
50
- # if ':' in fpath_fmt:
51
- # fpath, fmt = fpath_fmt.split(':')
52
-
53
- if not os.path.isfile(fpath):
54
- raise RuntimeError(f"File path {fpath} is not a file")
55
-
56
- # if getsize(fpath) > 1024 * 1024 * 10:
57
- # raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
58
-
59
- if not fmt:
60
- exec_format, exec_isa = api.file_type(fpath)
61
- else:
62
- if '-' not in fmt:
63
- raise RuntimeError(
64
- 'Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
65
-
66
- exec_format, exec_isa = fmt.split('-')
50
+ exec_format, exec_isa = api.file_type(fpath)
67
51
 
68
52
  return fpath, exec_format, exec_isa
69
53
 
70
54
 
71
- def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1):
55
+ def match(fpath: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1) -> None:
72
56
  """
73
57
  Match embeddings in fpath from a list of embeddings
74
58
  """
75
- print(f"Matching symbols from {fpath} with confidence {confidence}")
76
- sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
77
- b_embeds = api.RE_embeddings(fpath).json()
78
- source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
59
+ rout.print(f"Matching symbols from {fpath} with confidence {confidence}.")
60
+ sink_embed_mat = np.vstack(list(map(lambda x: x["embedding"], embeddings)))
61
+ b_embeds = api.RE_embeddings(fpath).json()["data"]
62
+ source_embed_mat = np.vstack(list(map(lambda x: x["embedding"], b_embeds)))
79
63
  # angular distance over cosine
80
64
  # closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
81
65
  closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
@@ -83,7 +67,7 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
83
67
  # closest = rescale_sim(closest)
84
68
  i, j = closest.shape
85
69
 
86
- for _i in track(range(i), description='Matching Symbols...'):
70
+ for _i in track(range(i), description="Matching Symbols..."):
87
71
  row = closest[_i, :]
88
72
  match_index, second_match = row.argsort()[::-1][:2]
89
73
  source_index = _i
@@ -95,201 +79,159 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
95
79
 
96
80
  if row[match_index] >= confidence:
97
81
  rout.print(
98
- f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
82
+ f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t"
83
+ f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
84
+ f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
99
85
  elif (m_confidence - s_confidence) > deviation:
100
86
  rout.print(
101
- f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
87
+ f"[bold magenta]Possible match[/bold magenta][yellow]\t"
88
+ f"Confidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t"
89
+ f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
90
+ f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
102
91
  else:
103
92
  rerr.print(
104
- f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
93
+ f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t"
94
+ f"{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
105
95
  pass
106
96
 
107
97
 
108
- def match_for_each(fpath: str, model_name: str, confidence: float = 0.95, collections=None):
98
+ def match_for_each(fpath: str, confidence: float = 0.9, nns: int = 1) -> int:
109
99
  """
110
100
  Match embeddings in fpath from a list of embeddings
111
101
  """
112
- if collections is None:
113
- collections = []
114
- print(f"Matching symbols from {fpath} with confidence {confidence}")
115
- b_embeds = api.RE_embeddings(fpath).json()
116
- b_hash = api.re_binary_id(fpath)
117
-
118
- with ThreadPoolExecutor(max_workers=cpu_count()) as p:
119
- # print(f"Collections: {collections}")
120
- partial = lambda x: api.RE_nearest_symbols(x['embedding'], model_name, 1, collections=collections,
121
- ignore_hashes=[b_hash]).json()
122
- res = {p.submit(partial, embed): embed for embed in b_embeds}
123
-
124
- for future in track(as_completed(res), description='Matching Symbols...'):
125
- # get result from future
126
- symbol = res[future]
127
-
128
- embedding = symbol['embedding']
129
- # do ANN call to match symbols, ignore functions from current file
130
- f_suggestions = api.RE_nearest_symbols(embedding, model_name, 1, collections=collections,
131
- ignore_hashes=[api.re_binary_id(fpath)]).json()
132
-
133
- if len(f_suggestions) == 0:
134
- # no match
135
- rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
136
- continue
102
+ nns = max(nns, 1)
137
103
 
138
- matched = f_suggestions[0]
139
- if matched['distance'] >= confidence:
140
- rout.print(
141
- f"\t[bold green]Found match![/bold green][yellow]\tConfidence: {matched['distance']:.05f}[/yellow]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]\t->\t[blue]{matched['name']}:{matched['sha_256_hash']}")
142
- continue
104
+ rout.print(f"Matching symbols from '{fpath}' with a confidence {confidence:.02f} and up to "
105
+ f"{nns} result{'' if nns == 1 else 's'} per function")
106
+ functions = api.RE_analyze_functions(fpath).json()["functions"]
107
+ function_matches = api.RE_nearest_functions(fpath, nns=nns, distance=1 - confidence).json()["function_matches"]
108
+
109
+ if len(function_matches) == 0:
110
+ rerr.print(f"[bold red]No matches found for a confidence of [/bold red] {confidence:.02f}")
111
+ return -1
112
+ else:
113
+ for function in functions:
114
+ matches = list(filter(lambda x: function["function_id"] == x["origin_function_id"], function_matches))
115
+
116
+ if len(matches):
117
+ rout.print(f"[bold green]Found {len(matches)} match{'' if len(matches) == 1 else 'es'} for "
118
+ f"[/bold green][blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
143
119
 
144
- rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
120
+ for match in matches:
121
+ rout.print(f"\t[yellow]Confidence: {match['confidence']:.05f}[/yellow]"
122
+ f"\t[blue]{match['nearest_neighbor_function_name']}"
123
+ f" ({match['nearest_neighbor_binary_name']})[/blue]")
124
+ else:
125
+ rout.print(f"[bold red]No matches found for[/bold red] "
126
+ f"[blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
127
+ return 0
145
128
 
146
129
 
147
- def parse_collections(collections: str):
130
+ def parse_collections(collections: str) -> Optional[list[str]]:
148
131
  """
149
- Return collections as list from CSV
132
+ Return collections as list from CSV
150
133
  """
151
134
  if not collections:
152
135
  return None
153
- return collections.split(',')
136
+ return collections.split(",")
154
137
 
155
138
 
156
139
  def rescale_sim(x):
157
140
  """
158
- Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values separated, map to hyperbolic space
141
+ Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low,
142
+ high values separated, map to hyperbolic space
159
143
  """
160
144
  return np.power(x, 5)
161
145
 
162
146
 
163
- def binary_similarity(fpath: str, fpaths: list, model_name: str):
164
- """
165
- Compute binary similarity between source and list of binary files
166
- """
167
- console = Console()
168
-
169
- table = Table(title=f"Binary Similarity to {fpath}")
170
- table.add_column("Binary", justify="right", style="cyan", no_wrap=True)
171
- table.add_column("SHA3-256", style="magenta", no_wrap=True)
172
- table.add_column("Similarity", style="yellow", no_wrap=True)
147
+ def validate_file(arg):
148
+ file = Path(arg)
149
+ if file.is_file():
150
+ return file.absolute()
151
+ raise FileNotFoundError(f"File path {arg} does not exists.")
173
152
 
174
- b_embed = api.RE_signature(fpath).json()
175
153
 
176
- b_sums = []
177
- for b in track(fpaths, description='Computing Binary Similarity...'):
178
- try:
179
- b_sum = api.RE_signature(b).json()
180
- b_sums.append(b_sum)
181
- except Exception as e:
182
- rerr.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.re_binary_id(b)}[/green bold]")
183
- rerr.print(e)
154
+ def validate_dir(arg):
155
+ dir = Path(arg)
156
+ if dir.is_dir():
157
+ return dir.absolute()
158
+ raise NotADirectoryError(f"Directory path {arg} does not exists.")
184
159
 
185
- if len(b_sums) > 0:
186
- # closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
187
- closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
188
160
 
189
- for binary, similarity in zip(fpaths, closest.tolist()[0]):
190
- # table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{rescale_sim(similarity):.05f}")
191
- table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{similarity:.05f}")
192
-
193
- rout.print(table)
194
-
195
-
196
- def main() -> None:
161
+ def main() -> int:
197
162
  """
198
163
  Tool entry
199
164
  """
200
165
  parser = argparse.ArgumentParser(add_help=False)
201
- parser.add_argument("-b", "--binary", default="",
166
+ parser.add_argument("-b", "--binary", type=validate_file,
202
167
  help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
203
168
  parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
204
- parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
205
- parser.add_argument("-a", "--analyse", action='store_true',
169
+ parser.add_argument("-D", "--dir", type=validate_dir, help="Path of directory to recursively analyse")
170
+ parser.add_argument("-a", "--analyse", action="store_true",
206
171
  help="Perform a full analysis and generate embeddings for every symbol")
207
- parser.add_argument("--no-embeddings", action='store_true',
208
- help="Only perform binary analysis. Do not generate embeddings for symbols")
209
172
  parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
210
- parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
211
- parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
212
- parser.add_argument("--duplicate", default=False, action='store_true', help="Duplicate an existing binary")
213
- parser.add_argument("-n", "--ann", action='store_true',
214
- help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
173
+ parser.add_argument("-A", action="store_true", help="Upload and Analyse a new binary")
174
+ parser.add_argument("-u", "--upload", action="store_true", help="Upload a new binary to remote server")
175
+ parser.add_argument("--duplicate", default=False, action="store_true", help="Duplicate an existing binary")
215
176
  parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
216
- parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
177
+ parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch", type=int)
217
178
  parser.add_argument("--collections", default=None,
218
179
  help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
219
180
  parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
220
181
  parser.add_argument("--from-file",
221
182
  help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
222
183
  parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
223
- # parser.add_argument("-C", "--sca", action="store_true",
224
- # help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
225
184
  parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
226
185
  parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
227
- parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
228
- parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
229
- parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
230
- parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
231
- parser.add_argument("-S", "--similarity", action='store_true',
232
- help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
233
- parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
234
- parser.add_argument("-M", "--match", action='store_true',
186
+ parser.add_argument("-x", "--extract", action="store_true", help="Fetch embeddings for binary")
187
+ parser.add_argument("-M", "--match", action="store_true",
235
188
  help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
236
- parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
237
- parser.add_argument("--deviation", default=0.2,
189
+ parser.add_argument("--confidence", default="high", choices=["high", "medium", "low", "partial", "all"],
190
+ help="Confidence threshold used to match symbols. Valid values are 'all', 'medium', 'low', 'partial' or 'high'[DEFAULT]")
191
+ parser.add_argument("--deviation", default=0.1, type=float,
238
192
  help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
239
- parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
240
- parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
241
- parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
193
+ parser.add_argument("-l", "--logs", action="store_true", help="Fetch analysis log file for binary")
194
+ parser.add_argument("-d", "--delete", action="store_true", help="Delete all metadata associated with binary")
195
+ parser.add_argument("-k", "--apikey", help="RevEng.AI Personal API key")
242
196
  parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
243
197
  parser.add_argument("-v", "--version", action="store_true", help="Display version information")
244
198
  parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
245
- help=argparse._('Show this help message and exit'))
199
+ help=argparse._("Show this help message and exit"))
246
200
  parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
247
201
  parser.add_argument("--exec-format", default=None,
248
202
  help="Override executable format. Valid values are pe, elf, macho, raw")
249
203
  parser.add_argument("--platform", default=None,
250
204
  help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
251
- parser.add_argument("--dynamic-execution", default=False, action='store_true',
205
+ parser.add_argument("--dynamic-execution", default=False, action="store_true",
252
206
  help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
253
207
  parser.add_argument("--cmd-line-args", default="",
254
208
  help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
255
209
  parser.add_argument("--scope", default="private", choices=["public", "private"],
256
210
  help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
257
211
  parser.add_argument("--tags", default=None, type=str,
258
- help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
212
+ help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3.")
259
213
  parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
260
- parser.add_argument("--verbose", default=False, action='store_true', help="Set verbose output.")
214
+ parser.add_argument("--verbose", default=False, action="store_true", help="Set verbose output.")
215
+ parser.add_argument("--debug", default=None, help="Debug file path to write pass with analysis")
216
+ parser.add_argument("-s", "--status", action="store_true", help="Ongoing status of the provided binary")
217
+
261
218
  args = parser.parse_args()
262
219
 
263
220
  # set re_conf args
264
- for arg in ('apikey', 'host', 'model'):
221
+ for arg in ("apikey", "host", "model",):
265
222
  if getattr(args, arg):
266
223
  api.re_conf[arg] = getattr(args, arg)
267
224
 
268
225
  logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
269
226
 
270
- # validate length of string tags
271
- if args.tags:
272
- # don't add non-content as tags
273
- if len(args.tags.strip()) == 0:
274
- args.tags = None
275
-
276
- else:
277
- # convert to list
278
- args.tags = args.tags.split(',')
279
-
280
227
  # display version and exit
281
228
  if args.version:
282
- version()
283
- exit(0)
284
-
285
- exec_fmt = None
286
- exec_isa = None
287
- base_address = 0
288
- if args.base_address:
289
- if args.base_address.upper()[:2] == "0X":
290
- base_address = int(args.base_address, 16)
291
- else:
292
- base_address = int(args.base_address)
229
+ return version()
230
+
231
+ # validate length of string tags
232
+ tags = None
233
+ if args.tags:
234
+ tags = parse_collections(args.tags)
293
235
 
294
236
  collections = None
295
237
  if args.collections:
@@ -297,19 +239,14 @@ def main() -> None:
297
239
 
298
240
  # auto analysis, uploads and starts analysis
299
241
  if args.A:
300
- args.upload = True
301
- args.analyse = True
242
+ args.upload = args.analyse = True
302
243
 
303
244
  if args.dir:
304
- if not os.path.isdir(args.dir):
305
- rerr.print(f'Error, {args.dir} is not a valid directory path')
306
- exit(-1)
307
-
308
- files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
245
+ files = iglob(os.path.abspath(args.dir) + "/**/*", recursive=True)
309
246
  ## perform operation on all files inside directory
310
- for file in track(files, description='Files in directory'):
247
+ for file in track(files, description="Files in directory"):
311
248
  if not os.path.isfile(file):
312
- rerr.print(f'[blue]Skipping non-file[/blue] {file}')
249
+ rerr.print(f"[blue]Skipping non-file:[/blue] {file}")
313
250
  continue
314
251
 
315
252
  # upload binary
@@ -319,196 +256,113 @@ def main() -> None:
319
256
  if args.analyse:
320
257
  try:
321
258
  fpath, exec_fmt, exec_isa = verify_binary(file)
322
- rout.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
323
- rout.print(f'[green bold]Analysing[/green bold] {file}')
324
- api.RE_analyse(file, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
325
- dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
326
- file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags,
327
- priority=args.priority, duplicate=args.duplicate)
259
+ rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
328
260
  except Exception as e:
329
- rerr.print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
261
+ rerr.print(f"[red bold][!] Error, binary exec type could not be verified:[/red bold] {file}")
262
+ rerr.print(f"[yellow] {e} [/yellow]")
263
+
264
+ rout.print(f"[green bold]Analysing:[/green bold] {file}")
265
+ api.RE_analyse(file, model_name=api.re_conf["model"], isa_options=args.isa,
266
+ platform_options=args.platform, dynamic_execution=args.dynamic_execution,
267
+ command_line_args=args.cmd_line_args, file_options=args.exec_format,
268
+ binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
269
+ duplicate=args.duplicate, debug_fpath=args.debug)
330
270
 
331
271
  if args.delete:
332
272
  try:
333
- rout.print(f'[green bold]Deleting analyses for[/green bold] {file}')
273
+ rout.print(f"[green bold]Deleting analyses for:[/green bold] {file}")
334
274
  api.RE_delete(file)
335
275
  except Exception as e:
336
- rerr.print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
276
+ rerr.print(f"[red bold][!] Error, could not delete analysis for:[/red bold] {file}")
337
277
  rerr.print(f"[yellow] {e} [/yellow]")
338
- if not (args.upload or args.analyse or args.delete):
339
- rerr.print(f'Error, -D only supports upload, analyse, or delete')
340
- exit(-1)
341
-
342
- exit(0)
343
278
 
344
- if args.A or args.analyse or args.extract or args.logs or args.delete or args.signature or args.similarity or args.upload or args.match or args.sbom:
345
- # verify binary is a file
279
+ if not (args.upload or args.analyse or args.delete):
280
+ rerr.print(f"Error, '-D' flag only supports upload, analyse, or delete.")
281
+ return -1
282
+ elif args.analyse or args.extract or args.logs or args.delete or \
283
+ args.upload or args.match or args.cves or args.sbom or args.status:
346
284
  try:
347
285
  fpath, exec_fmt, exec_isa = verify_binary(args.binary)
348
286
  # keep stdout to data only
349
- rerr.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
287
+ rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
350
288
  args.binary = fpath
289
+ except TypeError as e:
290
+ rerr.print("[bold red][!] Error, please supply a valid binary file using '-b' flag.[/bold red]")
291
+ rerr.print(f"[yellow] {e} [/yellow]")
292
+ return 0
351
293
  except Exception as e:
352
- rerr.print(f"[bold red]{str(e)}[/bold red]")
353
- rerr.print("[bold red][!] Error, please supply a valid binary file using '-b'.[/bold red]")
354
- # parser.print_help()
355
- exit(-1)
294
+ rerr.print(f"[bold red][!] Error, binary exec type could not be verified:[/bold red] {args.binary}")
295
+ rerr.print(f"[yellow] {e} [/yellow]")
356
296
 
357
- if args.upload:
297
+ if args.upload:
298
+ api.RE_upload(args.binary)
358
299
 
359
- api.RE_upload(args.binary)
300
+ if not args.analyse:
301
+ return 0
360
302
 
361
- if not args.analyse:
362
- exit(0)
363
303
  # upload binary first, them carry out actions
364
-
365
- if args.analyse:
366
- api.RE_analyse(args.binary, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
367
- dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
368
- file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags, priority=args.priority,
369
- duplicate=args.duplicate)
370
-
371
- elif args.extract:
372
- embeddings = api.RE_embeddings(args.binary).json()
373
- print_json(data=embeddings)
374
-
375
- elif args.signature and not args.ann:
376
- # Arithetic mean of symbol embeddings
377
- b_embed = api.RE_signature(args.binary).json()
378
- print_json(data=b_embed)
379
-
380
- elif args.similarity:
381
- # compute binary similarity from list of executables
382
- if args.from_file:
383
- binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
384
- else:
385
- if not args.to:
386
- print(f"Error, please specify --from-file or --to to compute binary similarity against")
387
- exit(-1)
388
- binaries = args.to.split(",")
389
-
390
- # verify all binaries are valid files
391
- for b in binaries:
392
- verify_binary(b)
393
-
394
- binary_similarity(args.binary, binaries, args.model)
395
-
396
- elif args.ann:
397
- # parse embedding json file
398
- if args.embedding:
399
- if not isfile(args.embedding):
400
- print("[!] Error, please supply a valid embedding JSON file using '-e'")
401
- parser.print_help()
402
- exit(-1)
403
-
404
- embedding = json.loads(open(args.embedding, 'r').read())
405
- elif (args.symbol or args.start_vaddr) and args.binary:
406
- if args.start_vaddr:
407
- if args.start_vaddr.upper()[:2] == "0X":
408
- vaddr = int(args.start_vaddr, 16) + base_address
409
- else:
410
- vaddr = int(args.start_vaddr) + base_address
411
-
412
- print(
413
- f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
414
- embeddings = api.RE_embeddings(args.binary).json()
415
- matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
416
- if len(matches) == 0:
417
- print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
418
- exit(-1)
419
- embedding = matches[0]['embedding']
304
+ if args.analyse:
305
+ api.RE_analyse(args.binary, model_name=api.re_conf["model"], isa_options=args.isa,
306
+ platform_options=args.platform, dynamic_execution=args.dynamic_execution,
307
+ command_line_args=args.cmd_line_args, file_options=args.exec_format,
308
+ binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
309
+ duplicate=args.duplicate, debug_fpath=args.debug)
310
+
311
+ elif args.extract:
312
+ embeddings = api.RE_embeddings(args.binary).json()
313
+ print_json(data=embeddings)
314
+
315
+ elif args.match:
316
+ # parse confidences
317
+ confidence: float = 0.90
318
+ if args.confidence:
319
+ confidences = {
320
+ "high": 0.95,
321
+ "medium": 0.9,
322
+ "low": 0.7,
323
+ "partial": 0.5,
324
+ "all": 0.0
325
+ }
326
+ if args.confidence in confidences.keys():
327
+ confidence = confidences[args.confidence]
328
+
329
+ if args.from_file:
330
+ if not os.path.isfile(args.from_file) and not os.access(args.from_file, os.R_OK):
331
+ rerr.print("[bold red][!] Error, '--from-file' flag requires a path to a JSON embeddings file.[/bold red]")
332
+ return -1
333
+ rout.print(f"[+] Searching for symbols similar to embedding in binary: {args.from_file}")
334
+ embeddings = json.load(open(args.from_file))
335
+ elif args.found_in:
336
+ if not os.path.isfile(args.found_in) and not os.access(args.found_in, os.R_OK):
337
+ rerr.print("[bold red][!] Error, '--found-in' flag requires a path to a binary to search from.[/bold red]")
338
+ return -1
339
+ rout.print(f"[+] Matching symbols between {args.binary} and {args.found_in}.")
340
+ embeddings = api.RE_embeddings(args.found_in).json()["data"]["embedding"]
420
341
  else:
421
- print(f"[+] Using symbol {args.symbol} from {args.binary}")
422
-
423
- embeddings = api.RE_embeddings(args.binary).json()
424
- matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
425
- if len(matches) == 0:
426
- print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
427
- exit(-1)
428
- embedding = matches[0]['embedding']
429
- elif args.binary and args.signature:
430
- print(f"[+] Searching ANN for binary embeddings {args.binary}")
431
- b_suggestions = api.RE_nearest_binaries(api.RE_signature(args.binary).json(), args.model, args.nns,
432
- collections, ignore_hashes=[api.re_binary_id(args.binary)])
433
- print_json(data=b_suggestions)
434
- exit(0)
435
- else:
436
- rerr.print("[bold red][!] Error, please supply a valid embedding JSON file using '-e', or select a function"
437
- " using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).[/bold red]")
438
- # parser.print_help()
439
- exit(-1)
440
-
441
- if args.found_in:
442
- if not os.path.isfile(args.found_in):
443
- print("[!] Error, --found-in flag requires a path to a binary to search from")
444
- exit(-1)
445
- print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
446
- embeddings = api.RE_embeddings(args.found_in).json()
447
- res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
448
- print_json(data=res)
449
- elif args.from_file:
450
- if not os.path.isfile(args.from_file):
451
- print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
452
- exit(-1)
453
- print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
454
- res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
455
- print_json(data=res)
456
- else:
457
- print(f"[+] Searching for similar symbols to embedding in "
458
- f"{'all' if not args.collections else args.collections} collections.")
459
- f_suggestions = api.RE_nearest_symbols(embedding["embedding"], args.model, int(args.nns),
460
- collections=collections).json()
461
- print_json(data=f_suggestions)
462
-
463
- elif args.match:
464
- # parse confidences
465
- confidence = 0.90
466
- if args.confidence:
467
- confidences = {
468
- 'high': 0.95,
469
- 'medium': 0.9,
470
- 'low': 0.7,
471
- 'partial': 0.5,
472
- 'all': 0.0
473
- }
474
- if args.confidence in confidences.keys():
475
- confidence = confidences[args.confidence]
476
- else:
477
- confidence = float(args.confidence)
478
-
479
- if args.from_file:
480
- embeddings = json.load(open(args.from_file, 'r'))
481
- elif args.found_in:
482
- if not os.path.isfile(args.found_in):
483
- print("[!] Error, --found-in flag requires a path to a binary to search from")
484
- exit(-1)
485
- print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
486
- embeddings = api.RE_embeddings(args.found_in).json()
487
- else:
488
- # print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
489
- match_for_each(args.binary, args.model, confidence, collections)
490
- exit(-1)
342
+ return match_for_each(args.binary, confidence, args.nns)
343
+
344
+ match(args.binary, embeddings, confidence=confidence, deviation=float(args.deviation))
491
345
 
492
- match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
346
+ elif args.logs:
347
+ api.RE_logs(args.binary)
493
348
 
494
- # elif args.sca:
495
- # api.RE_sca(args.binary)
349
+ elif args.delete:
350
+ api.RE_delete(args.binary)
496
351
 
497
- elif args.logs:
498
- api.RE_logs(args.binary)
352
+ elif args.sbom:
353
+ api.RE_SBOM(args.binary)
499
354
 
500
- elif args.delete:
501
- api.RE_delete(args.binary)
355
+ elif args.cves:
356
+ api.RE_cves(args.binary)
502
357
 
503
- elif args.sbom:
504
- api.RE_SBOM(args.binary)
358
+ elif args.status:
359
+ api.RE_status(args.binary, console=True)
505
360
 
506
- elif args.cves:
507
- api.RE_cves(args.binary)
508
361
  else:
509
- print("[!] Error, please supply an action command")
362
+ rerr.print("[bold red][!] Error, please supply an action command.[/bold red]")
510
363
  parser.print_help()
364
+ return 0
511
365
 
512
366
 
513
- if __name__ == '__main__':
514
- main()
367
+ if __name__ == "__main__":
368
+ exit(main())