reait 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/main.py CHANGED
@@ -1,53 +1,67 @@
1
1
  #!/usr/bin/env python
2
2
  from __future__ import print_function
3
- from hashlib import sha256
4
- from rich import print_json, print as rich_print
3
+
4
+ import logging
5
+
6
+ from rich import print_json
5
7
  from rich.progress import track
6
8
  from rich.console import Console
7
9
  from rich.table import Table
8
10
  import os
9
- import re
10
11
  import argparse
11
- import requests
12
- from numpy import array, vstack, mean, average
13
- from pandas import DataFrame
14
12
  import json
15
- import tomli
16
- from os.path import isfile, getsize
17
- from sys import exit
18
- from IPython import embed
19
- from reait import api
13
+ from os.path import isfile
14
+ from sys import exit, stdout, stderr
15
+ from reait import api, __version__
20
16
  from scipy.spatial import distance
21
- from scipy.special import expit
22
17
  from glob import iglob
23
18
  import numpy as np
19
+ from concurrent.futures import ThreadPoolExecutor, as_completed
20
+ from multiprocessing import cpu_count
21
+
22
+ rerr = Console(file=stderr)
23
+ rout = Console(file=stdout)
24
+
24
25
 
25
26
  def version():
26
27
  """
27
28
  Display program version
28
29
  """
29
- rich_print(f"[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]")
30
+ rout.print(f"""[bold blue] :::::::: ::::::::
31
+ :: :::: ::: :::
32
+ ::::::::::::::::::::
33
+ ::::: ::: ::::::
34
+ ::::::::::::::
35
+ .:: ::: ::::
36
+ :::::: ::: :::::::
37
+ :: ::::::::::: :::
38
+ :: ::::: :::: :::
39
+ :::::::: :::::::: [/bold blue]
40
+ [bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]
41
+ """)
42
+ rout.print("[yellow]Config:[/yellow]")
30
43
  print_json(data=api.re_conf)
31
44
 
32
45
 
33
46
  def verify_binary(fpath_fmt: str):
34
- fmt = None
35
- fpath = fpath_fmt
47
+ fmt = None
48
+ fpath = fpath_fmt
36
49
 
37
- if ':' in fpath_fmt:
38
- fpath, fmt = fpath_fmt.split(':')
50
+ # if ':' in fpath_fmt:
51
+ # fpath, fmt = fpath_fmt.split(':')
39
52
 
40
53
  if not os.path.isfile(fpath):
41
54
  raise RuntimeError(f"File path {fpath} is not a file")
42
55
 
43
- if getsize(fpath) > 1024 * 1024 * 10:
44
- raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
56
+ # if getsize(fpath) > 1024 * 1024 * 10:
57
+ # raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
45
58
 
46
59
  if not fmt:
47
60
  exec_format, exec_isa = api.file_type(fpath)
48
61
  else:
49
62
  if '-' not in fmt:
50
- raise RuntimeError('Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
63
+ raise RuntimeError(
64
+ 'Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
51
65
 
52
66
  exec_format, exec_isa = fmt.split('-')
53
67
 
@@ -60,13 +74,13 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
60
74
  """
61
75
  print(f"Matching symbols from {fpath} with confidence {confidence}")
62
76
  sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
63
- b_embeds = api.RE_embeddings(fpath, model_name)
77
+ b_embeds = api.RE_embeddings(fpath).json()
64
78
  source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
65
79
  # angular distance over cosine
66
- #closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
80
+ # closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
67
81
  closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
68
82
  # rescale to separate high end of (-1, 1.0)
69
- closest = rescale_sim(closest)
83
+ # closest = rescale_sim(closest)
70
84
  i, j = closest.shape
71
85
 
72
86
  for _i in track(range(i), description='Matching Symbols...'):
@@ -78,22 +92,74 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
78
92
  sink_symb = embeddings[sink_index]
79
93
  m_confidence = row[match_index]
80
94
  s_confidence = row[second_match]
81
-
95
+
82
96
  if row[match_index] >= confidence:
83
- rich_print(f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
97
+ rout.print(
98
+ f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
84
99
  elif (m_confidence - s_confidence) > deviation:
85
- rich_print(f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
100
+ rout.print(
101
+ f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
86
102
  else:
87
- #rich_print(f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
103
+ rerr.print(
104
+ f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
88
105
  pass
89
106
 
90
-
107
+
108
+ def match_for_each(fpath: str, model_name: str, confidence: float = 0.95, collections=None):
109
+ """
110
+ Match embeddings in fpath from a list of embeddings
111
+ """
112
+ if collections is None:
113
+ collections = []
114
+ print(f"Matching symbols from {fpath} with confidence {confidence}")
115
+ b_embeds = api.RE_embeddings(fpath).json()
116
+ b_hash = api.re_binary_id(fpath)
117
+
118
+ with ThreadPoolExecutor(max_workers=cpu_count()) as p:
119
+ # print(f"Collections: {collections}")
120
+ partial = lambda x: api.RE_nearest_symbols(x['embedding'], model_name, 1, collections=collections,
121
+ ignore_hashes=[b_hash]).json()
122
+ res = {p.submit(partial, embed): embed for embed in b_embeds}
123
+
124
+ for future in track(as_completed(res), description='Matching Symbols...'):
125
+ # get result from future
126
+ symbol = res[future]
127
+
128
+ embedding = symbol['embedding']
129
+ # do ANN call to match symbols, ignore functions from current file
130
+ f_suggestions = api.RE_nearest_symbols(embedding, model_name, 1, collections=collections,
131
+ ignore_hashes=[api.re_binary_id(fpath)]).json()
132
+
133
+ if len(f_suggestions) == 0:
134
+ # no match
135
+ rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
136
+ continue
137
+
138
+ matched = f_suggestions[0]
139
+ if matched['distance'] >= confidence:
140
+ rout.print(
141
+ f"\t[bold green]Found match![/bold green][yellow]\tConfidence: {matched['distance']:.05f}[/yellow]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]\t->\t[blue]{matched['name']}:{matched['sha_256_hash']}")
142
+ continue
143
+
144
+ rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
145
+
146
+
147
+ def parse_collections(collections: str):
148
+ """
149
+ Return collections as list from CSV
150
+ """
151
+ if not collections:
152
+ return None
153
+ return collections.split(',')
154
+
155
+
91
156
  def rescale_sim(x):
92
157
  """
93
- Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values seperated, map to hyperbolic space
158
+ Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values separated, map to hyperbolic space
94
159
  """
95
160
  return np.power(x, 5)
96
161
 
162
+
97
163
  def binary_similarity(fpath: str, fpaths: list, model_name: str):
98
164
  """
99
165
  Compute binary similarity between source and list of binary files
@@ -105,25 +171,26 @@ def binary_similarity(fpath: str, fpaths: list, model_name: str):
105
171
  table.add_column("SHA3-256", style="magenta", no_wrap=True)
106
172
  table.add_column("Similarity", style="yellow", no_wrap=True)
107
173
 
108
- b_embed = api.RE_signature(fpath, model_name)
174
+ b_embed = api.RE_signature(fpath).json()
109
175
 
110
176
  b_sums = []
111
177
  for b in track(fpaths, description='Computing Binary Similarity...'):
112
178
  try:
113
- b_sum = api.RE_signature(b, model_name)
179
+ b_sum = api.RE_signature(b).json()
114
180
  b_sums.append(b_sum)
115
181
  except Exception as e:
116
- console.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.binary_id(b)}[/green bold]")
117
- console.print(e)
182
+ rerr.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.re_binary_id(b)}[/green bold]")
183
+ rerr.print(e)
118
184
 
119
185
  if len(b_sums) > 0:
120
- #closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
121
- closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
186
+ # closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
187
+ closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
122
188
 
123
- for binary, similarity in zip(fpaths, closest.tolist()[0]):
124
- table.add_row(os.path.basename(binary), api.binary_id(binary), f"{rescale_sim(similarity):.05f}")
189
+ for binary, similarity in zip(fpaths, closest.tolist()[0]):
190
+ # table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{rescale_sim(similarity):.05f}")
191
+ table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{similarity:.05f}")
125
192
 
126
- console.print(table)
193
+ rout.print(table)
127
194
 
128
195
 
129
196
  def main() -> None:
@@ -131,53 +198,84 @@ def main() -> None:
131
198
  Tool entry
132
199
  """
133
200
  parser = argparse.ArgumentParser(add_help=False)
134
- parser.add_argument("-b", "--binary", default="", help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
201
+ parser.add_argument("-b", "--binary", default="",
202
+ help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
203
+ parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
135
204
  parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
136
- parser.add_argument("-a", "--analyse", action='store_true', help="Perform a full analysis and generate embeddings for every symbol")
137
- parser.add_argument("--no-embeddings", action='store_true', help="Only perform binary analysis. Do not generate embeddings for symbols")
205
+ parser.add_argument("-a", "--analyse", action='store_true',
206
+ help="Perform a full analysis and generate embeddings for every symbol")
207
+ parser.add_argument("--no-embeddings", action='store_true',
208
+ help="Only perform binary analysis. Do not generate embeddings for symbols")
138
209
  parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
139
210
  parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
140
211
  parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
141
- parser.add_argument("-n", "--ann", action='store_true', help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
142
- parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
212
+ parser.add_argument("--duplicate", default=False, action='store_true', help="Duplicate an existing binary")
213
+ parser.add_argument("-n", "--ann", action='store_true',
214
+ help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
215
+ parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
143
216
  parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
144
- parser.add_argument("--collections", default=None, help="Regex string to select RevEng.AI collections for filtering e.g., libc")
217
+ parser.add_argument("--collections", default=None,
218
+ help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
145
219
  parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
146
- parser.add_argument("--from-file", help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
220
+ parser.add_argument("--from-file",
221
+ help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
147
222
  parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
148
- parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
223
+ # parser.add_argument("-C", "--sca", action="store_true",
224
+ # help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
149
225
  parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
150
- parser.add_argument("-m", "--model", default="binnet-0.1", help="AI model used to generate embeddings")
226
+ parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
151
227
  parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
152
228
  parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
153
229
  parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
154
230
  parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
155
- parser.add_argument("-S", "--similarity", action='store_true', help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
231
+ parser.add_argument("-S", "--similarity", action='store_true',
232
+ help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
156
233
  parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
157
- parser.add_argument("-M", "--match", action='store_true', help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
234
+ parser.add_argument("-M", "--match", action='store_true',
235
+ help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
158
236
  parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
159
- parser.add_argument("--deviation", default=0.2, help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
237
+ parser.add_argument("--deviation", default=0.2,
238
+ help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
160
239
  parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
161
240
  parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
162
241
  parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
163
242
  parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
164
243
  parser.add_argument("-v", "--version", action="store_true", help="Display version information")
165
- parser.add_argument("--help", action="help", default=argparse.SUPPRESS, help=argparse._('Show this help message and exit'))
244
+ parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
245
+ help=argparse._('Show this help message and exit'))
166
246
  parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
167
- parser.add_argument("--exec-format", default=None, help="Override executable format. Valid values are pe, elf, macho, raw")
168
- parser.add_argument("--platform", default=None, help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
169
- parser.add_argument("--dynamic-execution", default=False, action='store_true', help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
170
- parser.add_argument("--cmd-line-args", default="", help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
171
- parser.add_argument("--scope", default="private", help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
172
- parser.add_argument("--tags", default=None, help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
247
+ parser.add_argument("--exec-format", default=None,
248
+ help="Override executable format. Valid values are pe, elf, macho, raw")
249
+ parser.add_argument("--platform", default=None,
250
+ help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
251
+ parser.add_argument("--dynamic-execution", default=False, action='store_true',
252
+ help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
253
+ parser.add_argument("--cmd-line-args", default="",
254
+ help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
255
+ parser.add_argument("--scope", default="private", choices=["public", "private"],
256
+ help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
257
+ parser.add_argument("--tags", default=None, type=str,
258
+ help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
259
+ parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
260
+ parser.add_argument("--verbose", default=False, action='store_true', help="Set verbose output.")
173
261
  args = parser.parse_args()
174
262
 
175
- if args.apikey:
176
- api.re_conf['apikey'] = args.apikey
177
- if args.host:
178
- api.re_conf['host'] = args.host
179
- if args.model:
180
- api.re_conf['model'] = args.model
263
+ # set re_conf args
264
+ for arg in ('apikey', 'host', 'model'):
265
+ if getattr(args, arg):
266
+ api.re_conf[arg] = getattr(args, arg)
267
+
268
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
269
+
270
+ # validate length of string tags
271
+ if args.tags:
272
+ # don't add non-content as tags
273
+ if len(args.tags.strip()) == 0:
274
+ args.tags = None
275
+
276
+ else:
277
+ # convert to list
278
+ args.tags = args.tags.split(',')
181
279
 
182
280
  # display version and exit
183
281
  if args.version:
@@ -193,36 +291,52 @@ def main() -> None:
193
291
  else:
194
292
  base_address = int(args.base_address)
195
293
 
294
+ collections = None
295
+ if args.collections:
296
+ collections = parse_collections(args.collections)
297
+
298
+ # auto analysis, uploads and starts analysis
299
+ if args.A:
300
+ args.upload = True
301
+ args.analyse = True
196
302
 
197
303
  if args.dir:
198
304
  if not os.path.isdir(args.dir):
199
- rich_print(f'Error, {args.dir} is not a valid directory path')
305
+ rerr.print(f'Error, {args.dir} is not a valid directory path')
200
306
  exit(-1)
201
307
 
202
308
  files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
203
309
  ## perform operation on all files inside directory
204
310
  for file in track(files, description='Files in directory'):
205
311
  if not os.path.isfile(file):
206
- #rich_print(f'[blue]Skipping non-file[/blue] {file}')
312
+ rerr.print(f'[blue]Skipping non-file[/blue] {file}')
207
313
  continue
208
314
 
315
+ # upload binary
316
+ if args.upload:
317
+ api.RE_upload(file)
318
+
209
319
  if args.analyse:
210
320
  try:
211
321
  fpath, exec_fmt, exec_isa = verify_binary(file)
212
- rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
213
- rich_print(f'[green bold]Analysing[/green bold] {file}')
214
- api.RE_analyse(file, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
322
+ rout.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
323
+ rout.print(f'[green bold]Analysing[/green bold] {file}')
324
+ api.RE_analyse(file, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
325
+ dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
326
+ file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags,
327
+ priority=args.priority, duplicate=args.duplicate)
215
328
  except Exception as e:
216
- rich_print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
329
+ rerr.print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
217
330
 
218
- elif args.delete:
331
+ if args.delete:
219
332
  try:
220
- rich_print(f'[green bold]Deleting analyses for[/green bold] {file}')
221
- api.RE_delete(args.binary, args.model)
333
+ rout.print(f'[green bold]Deleting analyses for[/green bold] {file}')
334
+ api.RE_delete(file)
222
335
  except Exception as e:
223
- rich_print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
224
- else:
225
- rich_print(f'Error, -D only supports analyse or delete')
336
+ rerr.print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
337
+ rerr.print(f"[yellow] {e} [/yellow]")
338
+ if not (args.upload or args.analyse or args.delete):
339
+ rerr.print(f'Error, -D only supports upload, analyse, or delete')
226
340
  exit(-1)
227
341
 
228
342
  exit(0)
@@ -231,32 +345,40 @@ def main() -> None:
231
345
  # verify binary is a file
232
346
  try:
233
347
  fpath, exec_fmt, exec_isa = verify_binary(args.binary)
234
- rich_print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
348
+ # keep stdout to data only
349
+ rerr.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
235
350
  args.binary = fpath
236
351
  except Exception as e:
237
- print("[!] Error, please supply a valid binary file using '-b'.")
238
- parser.print_help()
352
+ rerr.print(f"[bold red]{str(e)}[/bold red]")
353
+ rerr.print("[bold red][!] Error, please supply a valid binary file using '-b'.[/bold red]")
354
+ # parser.print_help()
239
355
  exit(-1)
240
356
 
241
357
  if args.upload:
358
+
359
+ api.RE_upload(args.binary)
360
+
361
+ if not args.analyse:
362
+ exit(0)
242
363
  # upload binary first, them carry out actions
243
- print(f"[!] RE:upload not implemented. Use analyse.")
244
- exit(-1)
245
364
 
246
365
  if args.analyse:
247
- api.RE_analyse(args.binary, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
366
+ api.RE_analyse(args.binary, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
367
+ dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
368
+ file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags, priority=args.priority,
369
+ duplicate=args.duplicate)
248
370
 
249
371
  elif args.extract:
250
- embeddings = api.RE_embeddings(args.binary, args.model)
372
+ embeddings = api.RE_embeddings(args.binary).json()
251
373
  print_json(data=embeddings)
252
374
 
253
375
  elif args.signature and not args.ann:
254
376
  # Arithetic mean of symbol embeddings
255
- b_embed = api.RE_signature(args.binary, args.model)
377
+ b_embed = api.RE_signature(args.binary).json()
256
378
  print_json(data=b_embed)
257
379
 
258
380
  elif args.similarity:
259
- #compute binary similarity from list of executables
381
+ # compute binary similarity from list of executables
260
382
  if args.from_file:
261
383
  binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
262
384
  else:
@@ -272,9 +394,7 @@ def main() -> None:
272
394
  binary_similarity(args.binary, binaries, args.model)
273
395
 
274
396
  elif args.ann:
275
- source = None
276
397
  # parse embedding json file
277
-
278
398
  if args.embedding:
279
399
  if not isfile(args.embedding):
280
400
  print("[!] Error, please supply a valid embedding JSON file using '-e'")
@@ -282,7 +402,6 @@ def main() -> None:
282
402
  exit(-1)
283
403
 
284
404
  embedding = json.loads(open(args.embedding, 'r').read())
285
-
286
405
  elif (args.symbol or args.start_vaddr) and args.binary:
287
406
  if args.start_vaddr:
288
407
  if args.start_vaddr.upper()[:2] == "0X":
@@ -290,18 +409,18 @@ def main() -> None:
290
409
  else:
291
410
  vaddr = int(args.start_vaddr) + base_address
292
411
 
293
- print(f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
294
- embeddings = api.RE_embeddings(args.binary, args.model)
412
+ print(
413
+ f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
414
+ embeddings = api.RE_embeddings(args.binary).json()
295
415
  matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
296
416
  if len(matches) == 0:
297
417
  print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
298
418
  exit(-1)
299
419
  embedding = matches[0]['embedding']
300
420
  else:
301
- symb_name = args.symbol
302
421
  print(f"[+] Using symbol {args.symbol} from {args.binary}")
303
422
 
304
- embeddings = api.RE_embeddings(args.binary, args.model)
423
+ embeddings = api.RE_embeddings(args.binary).json()
305
424
  matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
306
425
  if len(matches) == 0:
307
426
  print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
@@ -309,28 +428,22 @@ def main() -> None:
309
428
  embedding = matches[0]['embedding']
310
429
  elif args.binary and args.signature:
311
430
  print(f"[+] Searching ANN for binary embeddings {args.binary}")
312
- api.RE_nearest_binaries(api.RE_signature(args.binary, args.model), args.model, args.nns, args.collections)
431
+ b_suggestions = api.RE_nearest_binaries(api.RE_signature(args.binary).json(), args.model, args.nns,
432
+ collections, ignore_hashes=[api.re_binary_id(args.binary)])
433
+ print_json(data=b_suggestions)
313
434
  exit(0)
314
435
  else:
315
- print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).")
316
- parser.print_help()
436
+ rerr.print("[bold red][!] Error, please supply a valid embedding JSON file using '-e', or select a function"
437
+ " using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).[/bold red]")
438
+ # parser.print_help()
317
439
  exit(-1)
318
440
 
319
-
320
- # check for valid regex
321
- if args.collections:
322
- try:
323
- re.compile(args.collections)
324
- except re.error as e:
325
- print(f"[!] Error, invalid regex for collections - {args.collections}")
326
- exit(-1)
327
-
328
441
  if args.found_in:
329
442
  if not os.path.isfile(args.found_in):
330
443
  print("[!] Error, --found-in flag requires a path to a binary to search from")
331
444
  exit(-1)
332
445
  print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
333
- embeddings = api.RE_embeddings(args.found_in, args.model)
446
+ embeddings = api.RE_embeddings(args.found_in).json()
334
447
  res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
335
448
  print_json(data=res)
336
449
  elif args.from_file:
@@ -341,12 +454,28 @@ def main() -> None:
341
454
  res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
342
455
  print_json(data=res)
343
456
  else:
344
- print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
345
- api.RE_nearest_symbols(embedding, args.model, int(args.nns), collections=args.collections)
346
-
457
+ print(f"[+] Searching for similar symbols to embedding in "
458
+ f"{'all' if not args.collections else args.collections} collections.")
459
+ f_suggestions = api.RE_nearest_symbols(embedding["embedding"], args.model, int(args.nns),
460
+ collections=collections).json()
461
+ print_json(data=f_suggestions)
347
462
 
348
463
  elif args.match:
349
- embeddings = None
464
+ # parse confidences
465
+ confidence = 0.90
466
+ if args.confidence:
467
+ confidences = {
468
+ 'high': 0.95,
469
+ 'medium': 0.9,
470
+ 'low': 0.7,
471
+ 'partial': 0.5,
472
+ 'all': 0.0
473
+ }
474
+ if args.confidence in confidences.keys():
475
+ confidence = confidences[args.confidence]
476
+ else:
477
+ confidence = float(args.confidence)
478
+
350
479
  if args.from_file:
351
480
  embeddings = json.load(open(args.from_file, 'r'))
352
481
  elif args.found_in:
@@ -354,41 +483,28 @@ def main() -> None:
354
483
  print("[!] Error, --found-in flag requires a path to a binary to search from")
355
484
  exit(-1)
356
485
  print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
357
- embeddings = api.RE_embeddings(args.found_in, args.model)
486
+ embeddings = api.RE_embeddings(args.found_in).json()
358
487
  else:
359
- print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
488
+ # print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
489
+ match_for_each(args.binary, args.model, confidence, collections)
360
490
  exit(-1)
361
491
 
362
- confidence = 0.99
363
- if args.confidence:
364
- confidences = {
365
- 'high': 0.99,
366
- 'medium': 0.95,
367
- 'low': 0.9,
368
- 'all': 0.0
369
- }
370
- if args.confidence in confidences.keys():
371
- confidence = confidences[args.confidence]
372
- else:
373
- confidence = float(args.confidence)
374
-
375
492
  match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
376
493
 
377
-
378
- elif args.sca:
379
- api.RE_sca(args.binary)
494
+ # elif args.sca:
495
+ # api.RE_sca(args.binary)
380
496
 
381
497
  elif args.logs:
382
- api.RE_logs(args.binary, args.model)
498
+ api.RE_logs(args.binary)
383
499
 
384
500
  elif args.delete:
385
- api.RE_delete(args.binary, args.model)
501
+ api.RE_delete(args.binary)
386
502
 
387
503
  elif args.sbom:
388
- api.RE_SBOM(args.binary, args.model)
504
+ api.RE_SBOM(args.binary)
389
505
 
390
506
  elif args.cves:
391
- api.RE_cves(args.binary, args.model)
507
+ api.RE_cves(args.binary)
392
508
  else:
393
509
  print("[!] Error, please supply an action command")
394
510
  parser.print_help()