reait 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +3 -0
- reait/api.py +429 -174
- reait/main.py +246 -130
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/METADATA +45 -21
- reait-0.0.20.dist-info/RECORD +9 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/WHEEL +1 -1
- reait-0.0.18.dist-info/RECORD +0 -9
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/LICENSE +0 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/entry_points.txt +0 -0
- {reait-0.0.18.dist-info → reait-0.0.20.dist-info}/top_level.txt +0 -0
reait/main.py
CHANGED
@@ -1,53 +1,67 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
from __future__ import print_function
|
3
|
-
|
4
|
-
|
3
|
+
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from rich import print_json
|
5
7
|
from rich.progress import track
|
6
8
|
from rich.console import Console
|
7
9
|
from rich.table import Table
|
8
10
|
import os
|
9
|
-
import re
|
10
11
|
import argparse
|
11
|
-
import requests
|
12
|
-
from numpy import array, vstack, mean, average
|
13
|
-
from pandas import DataFrame
|
14
12
|
import json
|
15
|
-
import
|
16
|
-
from
|
17
|
-
from
|
18
|
-
from IPython import embed
|
19
|
-
from reait import api
|
13
|
+
from os.path import isfile
|
14
|
+
from sys import exit, stdout, stderr
|
15
|
+
from reait import api, __version__
|
20
16
|
from scipy.spatial import distance
|
21
|
-
from scipy.special import expit
|
22
17
|
from glob import iglob
|
23
18
|
import numpy as np
|
19
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
20
|
+
from multiprocessing import cpu_count
|
21
|
+
|
22
|
+
rerr = Console(file=stderr)
|
23
|
+
rout = Console(file=stdout)
|
24
|
+
|
24
25
|
|
25
26
|
def version():
|
26
27
|
"""
|
27
28
|
Display program version
|
28
29
|
"""
|
29
|
-
|
30
|
+
rout.print(f"""[bold blue] :::::::: ::::::::
|
31
|
+
:: :::: ::: :::
|
32
|
+
::::::::::::::::::::
|
33
|
+
::::: ::: ::::::
|
34
|
+
::::::::::::::
|
35
|
+
.:: ::: ::::
|
36
|
+
:::::: ::: :::::::
|
37
|
+
:: ::::::::::: :::
|
38
|
+
:: ::::: :::: :::
|
39
|
+
:::::::: :::::::: [/bold blue]
|
40
|
+
[bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]
|
41
|
+
""")
|
42
|
+
rout.print("[yellow]Config:[/yellow]")
|
30
43
|
print_json(data=api.re_conf)
|
31
44
|
|
32
45
|
|
33
46
|
def verify_binary(fpath_fmt: str):
|
34
|
-
fmt
|
35
|
-
fpath
|
47
|
+
fmt = None
|
48
|
+
fpath = fpath_fmt
|
36
49
|
|
37
|
-
if ':' in fpath_fmt:
|
38
|
-
|
50
|
+
# if ':' in fpath_fmt:
|
51
|
+
# fpath, fmt = fpath_fmt.split(':')
|
39
52
|
|
40
53
|
if not os.path.isfile(fpath):
|
41
54
|
raise RuntimeError(f"File path {fpath} is not a file")
|
42
55
|
|
43
|
-
if getsize(fpath) > 1024 * 1024 * 10:
|
44
|
-
|
56
|
+
# if getsize(fpath) > 1024 * 1024 * 10:
|
57
|
+
# raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
|
45
58
|
|
46
59
|
if not fmt:
|
47
60
|
exec_format, exec_isa = api.file_type(fpath)
|
48
61
|
else:
|
49
62
|
if '-' not in fmt:
|
50
|
-
raise RuntimeError(
|
63
|
+
raise RuntimeError(
|
64
|
+
'Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
|
51
65
|
|
52
66
|
exec_format, exec_isa = fmt.split('-')
|
53
67
|
|
@@ -60,13 +74,13 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
|
|
60
74
|
"""
|
61
75
|
print(f"Matching symbols from {fpath} with confidence {confidence}")
|
62
76
|
sink_embed_mat = np.vstack(list(map(lambda x: x['embedding'], embeddings)))
|
63
|
-
b_embeds = api.RE_embeddings(fpath
|
77
|
+
b_embeds = api.RE_embeddings(fpath).json()
|
64
78
|
source_embed_mat = np.vstack(list(map(lambda x: x['embedding'], b_embeds)))
|
65
79
|
# angular distance over cosine
|
66
|
-
#closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
|
80
|
+
# closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
|
67
81
|
closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
|
68
82
|
# rescale to separate high end of (-1, 1.0)
|
69
|
-
closest = rescale_sim(closest)
|
83
|
+
# closest = rescale_sim(closest)
|
70
84
|
i, j = closest.shape
|
71
85
|
|
72
86
|
for _i in track(range(i), description='Matching Symbols...'):
|
@@ -78,22 +92,74 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
|
|
78
92
|
sink_symb = embeddings[sink_index]
|
79
93
|
m_confidence = row[match_index]
|
80
94
|
s_confidence = row[second_match]
|
81
|
-
|
95
|
+
|
82
96
|
if row[match_index] >= confidence:
|
83
|
-
|
97
|
+
rout.print(
|
98
|
+
f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
84
99
|
elif (m_confidence - s_confidence) > deviation:
|
85
|
-
|
100
|
+
rout.print(
|
101
|
+
f"[bold magenta]Possible match[/bold magenta][yellow]\tConfidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
86
102
|
else:
|
87
|
-
|
103
|
+
rerr.print(
|
104
|
+
f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
|
88
105
|
pass
|
89
106
|
|
90
|
-
|
107
|
+
|
108
|
+
def match_for_each(fpath: str, model_name: str, confidence: float = 0.95, collections=None):
|
109
|
+
"""
|
110
|
+
Match embeddings in fpath from a list of embeddings
|
111
|
+
"""
|
112
|
+
if collections is None:
|
113
|
+
collections = []
|
114
|
+
print(f"Matching symbols from {fpath} with confidence {confidence}")
|
115
|
+
b_embeds = api.RE_embeddings(fpath).json()
|
116
|
+
b_hash = api.re_binary_id(fpath)
|
117
|
+
|
118
|
+
with ThreadPoolExecutor(max_workers=cpu_count()) as p:
|
119
|
+
# print(f"Collections: {collections}")
|
120
|
+
partial = lambda x: api.RE_nearest_symbols(x['embedding'], model_name, 1, collections=collections,
|
121
|
+
ignore_hashes=[b_hash]).json()
|
122
|
+
res = {p.submit(partial, embed): embed for embed in b_embeds}
|
123
|
+
|
124
|
+
for future in track(as_completed(res), description='Matching Symbols...'):
|
125
|
+
# get result from future
|
126
|
+
symbol = res[future]
|
127
|
+
|
128
|
+
embedding = symbol['embedding']
|
129
|
+
# do ANN call to match symbols, ignore functions from current file
|
130
|
+
f_suggestions = api.RE_nearest_symbols(embedding, model_name, 1, collections=collections,
|
131
|
+
ignore_hashes=[api.re_binary_id(fpath)]).json()
|
132
|
+
|
133
|
+
if len(f_suggestions) == 0:
|
134
|
+
# no match
|
135
|
+
rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
|
136
|
+
continue
|
137
|
+
|
138
|
+
matched = f_suggestions[0]
|
139
|
+
if matched['distance'] >= confidence:
|
140
|
+
rout.print(
|
141
|
+
f"\t[bold green]Found match![/bold green][yellow]\tConfidence: {matched['distance']:.05f}[/yellow]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]\t->\t[blue]{matched['name']}:{matched['sha_256_hash']}")
|
142
|
+
continue
|
143
|
+
|
144
|
+
rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
|
145
|
+
|
146
|
+
|
147
|
+
def parse_collections(collections: str):
|
148
|
+
"""
|
149
|
+
Return collections as list from CSV
|
150
|
+
"""
|
151
|
+
if not collections:
|
152
|
+
return None
|
153
|
+
return collections.split(',')
|
154
|
+
|
155
|
+
|
91
156
|
def rescale_sim(x):
|
92
157
|
"""
|
93
|
-
Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values
|
158
|
+
Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low, high values separated, map to hyperbolic space
|
94
159
|
"""
|
95
160
|
return np.power(x, 5)
|
96
161
|
|
162
|
+
|
97
163
|
def binary_similarity(fpath: str, fpaths: list, model_name: str):
|
98
164
|
"""
|
99
165
|
Compute binary similarity between source and list of binary files
|
@@ -105,25 +171,26 @@ def binary_similarity(fpath: str, fpaths: list, model_name: str):
|
|
105
171
|
table.add_column("SHA3-256", style="magenta", no_wrap=True)
|
106
172
|
table.add_column("Similarity", style="yellow", no_wrap=True)
|
107
173
|
|
108
|
-
b_embed = api.RE_signature(fpath
|
174
|
+
b_embed = api.RE_signature(fpath).json()
|
109
175
|
|
110
176
|
b_sums = []
|
111
177
|
for b in track(fpaths, description='Computing Binary Similarity...'):
|
112
178
|
try:
|
113
|
-
b_sum = api.RE_signature(b
|
179
|
+
b_sum = api.RE_signature(b).json()
|
114
180
|
b_sums.append(b_sum)
|
115
181
|
except Exception as e:
|
116
|
-
|
117
|
-
|
182
|
+
rerr.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.re_binary_id(b)}[/green bold]")
|
183
|
+
rerr.print(e)
|
118
184
|
|
119
185
|
if len(b_sums) > 0:
|
120
|
-
|
121
|
-
|
186
|
+
# closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
|
187
|
+
closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
|
122
188
|
|
123
|
-
|
124
|
-
|
189
|
+
for binary, similarity in zip(fpaths, closest.tolist()[0]):
|
190
|
+
# table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{rescale_sim(similarity):.05f}")
|
191
|
+
table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{similarity:.05f}")
|
125
192
|
|
126
|
-
|
193
|
+
rout.print(table)
|
127
194
|
|
128
195
|
|
129
196
|
def main() -> None:
|
@@ -131,53 +198,84 @@ def main() -> None:
|
|
131
198
|
Tool entry
|
132
199
|
"""
|
133
200
|
parser = argparse.ArgumentParser(add_help=False)
|
134
|
-
parser.add_argument("-b", "--binary", default="",
|
201
|
+
parser.add_argument("-b", "--binary", default="",
|
202
|
+
help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
|
203
|
+
parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
|
135
204
|
parser.add_argument("-D", "--dir", default="", help="Path of directory to recursively analyse")
|
136
|
-
parser.add_argument("-a", "--analyse", action='store_true',
|
137
|
-
|
205
|
+
parser.add_argument("-a", "--analyse", action='store_true',
|
206
|
+
help="Perform a full analysis and generate embeddings for every symbol")
|
207
|
+
parser.add_argument("--no-embeddings", action='store_true',
|
208
|
+
help="Only perform binary analysis. Do not generate embeddings for symbols")
|
138
209
|
parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
|
139
210
|
parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
|
140
211
|
parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
|
141
|
-
parser.add_argument("
|
142
|
-
parser.add_argument("--
|
212
|
+
parser.add_argument("--duplicate", default=False, action='store_true', help="Duplicate an existing binary")
|
213
|
+
parser.add_argument("-n", "--ann", action='store_true',
|
214
|
+
help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
|
215
|
+
parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
|
143
216
|
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
|
144
|
-
parser.add_argument("--collections", default=None,
|
217
|
+
parser.add_argument("--collections", default=None,
|
218
|
+
help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
|
145
219
|
parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
|
146
|
-
parser.add_argument("--from-file",
|
220
|
+
parser.add_argument("--from-file",
|
221
|
+
help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
|
147
222
|
parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
|
148
|
-
parser.add_argument("-C", "--sca", action="store_true",
|
223
|
+
# parser.add_argument("-C", "--sca", action="store_true",
|
224
|
+
# help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
|
149
225
|
parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
|
150
|
-
parser.add_argument("-m", "--model", default=
|
226
|
+
parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
|
151
227
|
parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
|
152
228
|
parser.add_argument("--start-vaddr", help="Start virtual address of the function to extract embeddings")
|
153
229
|
parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
|
154
230
|
parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
|
155
|
-
parser.add_argument("-S", "--similarity", action='store_true',
|
231
|
+
parser.add_argument("-S", "--similarity", action='store_true',
|
232
|
+
help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
|
156
233
|
parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
|
157
|
-
parser.add_argument("-M", "--match", action='store_true',
|
234
|
+
parser.add_argument("-M", "--match", action='store_true',
|
235
|
+
help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
|
158
236
|
parser.add_argument("--confidence", default="high", help="Confidence threshold used to match symbols.")
|
159
|
-
parser.add_argument("--deviation", default=0.2,
|
237
|
+
parser.add_argument("--deviation", default=0.2,
|
238
|
+
help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
|
160
239
|
parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
|
161
240
|
parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
|
162
241
|
parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
|
163
242
|
parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
|
164
243
|
parser.add_argument("-v", "--version", action="store_true", help="Display version information")
|
165
|
-
parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
|
244
|
+
parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
|
245
|
+
help=argparse._('Show this help message and exit'))
|
166
246
|
parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
|
167
|
-
parser.add_argument("--exec-format", default=None,
|
168
|
-
|
169
|
-
parser.add_argument("--
|
170
|
-
|
171
|
-
parser.add_argument("--
|
172
|
-
|
247
|
+
parser.add_argument("--exec-format", default=None,
|
248
|
+
help="Override executable format. Valid values are pe, elf, macho, raw")
|
249
|
+
parser.add_argument("--platform", default=None,
|
250
|
+
help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
|
251
|
+
parser.add_argument("--dynamic-execution", default=False, action='store_true',
|
252
|
+
help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
|
253
|
+
parser.add_argument("--cmd-line-args", default="",
|
254
|
+
help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
|
255
|
+
parser.add_argument("--scope", default="private", choices=["public", "private"],
|
256
|
+
help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
|
257
|
+
parser.add_argument("--tags", default=None, type=str,
|
258
|
+
help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3..")
|
259
|
+
parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
|
260
|
+
parser.add_argument("--verbose", default=False, action='store_true', help="Set verbose output.")
|
173
261
|
args = parser.parse_args()
|
174
262
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
263
|
+
# set re_conf args
|
264
|
+
for arg in ('apikey', 'host', 'model'):
|
265
|
+
if getattr(args, arg):
|
266
|
+
api.re_conf[arg] = getattr(args, arg)
|
267
|
+
|
268
|
+
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
269
|
+
|
270
|
+
# validate length of string tags
|
271
|
+
if args.tags:
|
272
|
+
# don't add non-content as tags
|
273
|
+
if len(args.tags.strip()) == 0:
|
274
|
+
args.tags = None
|
275
|
+
|
276
|
+
else:
|
277
|
+
# convert to list
|
278
|
+
args.tags = args.tags.split(',')
|
181
279
|
|
182
280
|
# display version and exit
|
183
281
|
if args.version:
|
@@ -193,36 +291,52 @@ def main() -> None:
|
|
193
291
|
else:
|
194
292
|
base_address = int(args.base_address)
|
195
293
|
|
294
|
+
collections = None
|
295
|
+
if args.collections:
|
296
|
+
collections = parse_collections(args.collections)
|
297
|
+
|
298
|
+
# auto analysis, uploads and starts analysis
|
299
|
+
if args.A:
|
300
|
+
args.upload = True
|
301
|
+
args.analyse = True
|
196
302
|
|
197
303
|
if args.dir:
|
198
304
|
if not os.path.isdir(args.dir):
|
199
|
-
|
305
|
+
rerr.print(f'Error, {args.dir} is not a valid directory path')
|
200
306
|
exit(-1)
|
201
307
|
|
202
308
|
files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
|
203
309
|
## perform operation on all files inside directory
|
204
310
|
for file in track(files, description='Files in directory'):
|
205
311
|
if not os.path.isfile(file):
|
206
|
-
|
312
|
+
rerr.print(f'[blue]Skipping non-file[/blue] {file}')
|
207
313
|
continue
|
208
314
|
|
315
|
+
# upload binary
|
316
|
+
if args.upload:
|
317
|
+
api.RE_upload(file)
|
318
|
+
|
209
319
|
if args.analyse:
|
210
320
|
try:
|
211
321
|
fpath, exec_fmt, exec_isa = verify_binary(file)
|
212
|
-
|
213
|
-
|
214
|
-
api.RE_analyse(file,
|
322
|
+
rout.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
|
323
|
+
rout.print(f'[green bold]Analysing[/green bold] {file}')
|
324
|
+
api.RE_analyse(file, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
|
325
|
+
dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
|
326
|
+
file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags,
|
327
|
+
priority=args.priority, duplicate=args.duplicate)
|
215
328
|
except Exception as e:
|
216
|
-
|
329
|
+
rerr.print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
|
217
330
|
|
218
|
-
|
331
|
+
if args.delete:
|
219
332
|
try:
|
220
|
-
|
221
|
-
api.RE_delete(
|
333
|
+
rout.print(f'[green bold]Deleting analyses for[/green bold] {file}')
|
334
|
+
api.RE_delete(file)
|
222
335
|
except Exception as e:
|
223
|
-
|
224
|
-
|
225
|
-
|
336
|
+
rerr.print(f"[red bold][!] Error, could not delete analysis for [/red bold] {file}")
|
337
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
338
|
+
if not (args.upload or args.analyse or args.delete):
|
339
|
+
rerr.print(f'Error, -D only supports upload, analyse, or delete')
|
226
340
|
exit(-1)
|
227
341
|
|
228
342
|
exit(0)
|
@@ -231,32 +345,40 @@ def main() -> None:
|
|
231
345
|
# verify binary is a file
|
232
346
|
try:
|
233
347
|
fpath, exec_fmt, exec_isa = verify_binary(args.binary)
|
234
|
-
|
348
|
+
# keep stdout to data only
|
349
|
+
rerr.print(f'Found {fpath}:{exec_fmt}-{exec_isa}')
|
235
350
|
args.binary = fpath
|
236
351
|
except Exception as e:
|
237
|
-
print("[
|
238
|
-
|
352
|
+
rerr.print(f"[bold red]{str(e)}[/bold red]")
|
353
|
+
rerr.print("[bold red][!] Error, please supply a valid binary file using '-b'.[/bold red]")
|
354
|
+
# parser.print_help()
|
239
355
|
exit(-1)
|
240
356
|
|
241
357
|
if args.upload:
|
358
|
+
|
359
|
+
api.RE_upload(args.binary)
|
360
|
+
|
361
|
+
if not args.analyse:
|
362
|
+
exit(0)
|
242
363
|
# upload binary first, them carry out actions
|
243
|
-
print(f"[!] RE:upload not implemented. Use analyse.")
|
244
|
-
exit(-1)
|
245
364
|
|
246
365
|
if args.analyse:
|
247
|
-
api.RE_analyse(args.binary,
|
366
|
+
api.RE_analyse(args.binary, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
|
367
|
+
dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
|
368
|
+
file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags, priority=args.priority,
|
369
|
+
duplicate=args.duplicate)
|
248
370
|
|
249
371
|
elif args.extract:
|
250
|
-
embeddings = api.RE_embeddings(args.binary
|
372
|
+
embeddings = api.RE_embeddings(args.binary).json()
|
251
373
|
print_json(data=embeddings)
|
252
374
|
|
253
375
|
elif args.signature and not args.ann:
|
254
376
|
# Arithetic mean of symbol embeddings
|
255
|
-
b_embed = api.RE_signature(args.binary
|
377
|
+
b_embed = api.RE_signature(args.binary).json()
|
256
378
|
print_json(data=b_embed)
|
257
379
|
|
258
380
|
elif args.similarity:
|
259
|
-
#compute binary similarity from list of executables
|
381
|
+
# compute binary similarity from list of executables
|
260
382
|
if args.from_file:
|
261
383
|
binaries = list(map(lambda x: x.strip(), open(args.from_file, 'r').readlines()))
|
262
384
|
else:
|
@@ -272,9 +394,7 @@ def main() -> None:
|
|
272
394
|
binary_similarity(args.binary, binaries, args.model)
|
273
395
|
|
274
396
|
elif args.ann:
|
275
|
-
source = None
|
276
397
|
# parse embedding json file
|
277
|
-
|
278
398
|
if args.embedding:
|
279
399
|
if not isfile(args.embedding):
|
280
400
|
print("[!] Error, please supply a valid embedding JSON file using '-e'")
|
@@ -282,7 +402,6 @@ def main() -> None:
|
|
282
402
|
exit(-1)
|
283
403
|
|
284
404
|
embedding = json.loads(open(args.embedding, 'r').read())
|
285
|
-
|
286
405
|
elif (args.symbol or args.start_vaddr) and args.binary:
|
287
406
|
if args.start_vaddr:
|
288
407
|
if args.start_vaddr.upper()[:2] == "0X":
|
@@ -290,18 +409,18 @@ def main() -> None:
|
|
290
409
|
else:
|
291
410
|
vaddr = int(args.start_vaddr) + base_address
|
292
411
|
|
293
|
-
print(
|
294
|
-
|
412
|
+
print(
|
413
|
+
f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
|
414
|
+
embeddings = api.RE_embeddings(args.binary).json()
|
295
415
|
matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
|
296
416
|
if len(matches) == 0:
|
297
417
|
print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
|
298
418
|
exit(-1)
|
299
419
|
embedding = matches[0]['embedding']
|
300
420
|
else:
|
301
|
-
symb_name = args.symbol
|
302
421
|
print(f"[+] Using symbol {args.symbol} from {args.binary}")
|
303
422
|
|
304
|
-
embeddings = api.RE_embeddings(args.binary
|
423
|
+
embeddings = api.RE_embeddings(args.binary).json()
|
305
424
|
matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
|
306
425
|
if len(matches) == 0:
|
307
426
|
print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
|
@@ -309,28 +428,22 @@ def main() -> None:
|
|
309
428
|
embedding = matches[0]['embedding']
|
310
429
|
elif args.binary and args.signature:
|
311
430
|
print(f"[+] Searching ANN for binary embeddings {args.binary}")
|
312
|
-
api.RE_nearest_binaries(api.RE_signature(args.binary
|
431
|
+
b_suggestions = api.RE_nearest_binaries(api.RE_signature(args.binary).json(), args.model, args.nns,
|
432
|
+
collections, ignore_hashes=[api.re_binary_id(args.binary)])
|
433
|
+
print_json(data=b_suggestions)
|
313
434
|
exit(0)
|
314
435
|
else:
|
315
|
-
print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function
|
316
|
-
|
436
|
+
rerr.print("[bold red][!] Error, please supply a valid embedding JSON file using '-e', or select a function"
|
437
|
+
" using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).[/bold red]")
|
438
|
+
# parser.print_help()
|
317
439
|
exit(-1)
|
318
440
|
|
319
|
-
|
320
|
-
# check for valid regex
|
321
|
-
if args.collections:
|
322
|
-
try:
|
323
|
-
re.compile(args.collections)
|
324
|
-
except re.error as e:
|
325
|
-
print(f"[!] Error, invalid regex for collections - {args.collections}")
|
326
|
-
exit(-1)
|
327
|
-
|
328
441
|
if args.found_in:
|
329
442
|
if not os.path.isfile(args.found_in):
|
330
443
|
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
331
444
|
exit(-1)
|
332
445
|
print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
|
333
|
-
embeddings = api.RE_embeddings(args.found_in
|
446
|
+
embeddings = api.RE_embeddings(args.found_in).json()
|
334
447
|
res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
|
335
448
|
print_json(data=res)
|
336
449
|
elif args.from_file:
|
@@ -341,12 +454,28 @@ def main() -> None:
|
|
341
454
|
res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
|
342
455
|
print_json(data=res)
|
343
456
|
else:
|
344
|
-
print(f"[+] Searching for similar symbols to embedding in
|
345
|
-
|
346
|
-
|
457
|
+
print(f"[+] Searching for similar symbols to embedding in "
|
458
|
+
f"{'all' if not args.collections else args.collections} collections.")
|
459
|
+
f_suggestions = api.RE_nearest_symbols(embedding["embedding"], args.model, int(args.nns),
|
460
|
+
collections=collections).json()
|
461
|
+
print_json(data=f_suggestions)
|
347
462
|
|
348
463
|
elif args.match:
|
349
|
-
|
464
|
+
# parse confidences
|
465
|
+
confidence = 0.90
|
466
|
+
if args.confidence:
|
467
|
+
confidences = {
|
468
|
+
'high': 0.95,
|
469
|
+
'medium': 0.9,
|
470
|
+
'low': 0.7,
|
471
|
+
'partial': 0.5,
|
472
|
+
'all': 0.0
|
473
|
+
}
|
474
|
+
if args.confidence in confidences.keys():
|
475
|
+
confidence = confidences[args.confidence]
|
476
|
+
else:
|
477
|
+
confidence = float(args.confidence)
|
478
|
+
|
350
479
|
if args.from_file:
|
351
480
|
embeddings = json.load(open(args.from_file, 'r'))
|
352
481
|
elif args.found_in:
|
@@ -354,41 +483,28 @@ def main() -> None:
|
|
354
483
|
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
355
484
|
exit(-1)
|
356
485
|
print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
|
357
|
-
embeddings = api.RE_embeddings(args.found_in
|
486
|
+
embeddings = api.RE_embeddings(args.found_in).json()
|
358
487
|
else:
|
359
|
-
print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
|
488
|
+
# print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
|
489
|
+
match_for_each(args.binary, args.model, confidence, collections)
|
360
490
|
exit(-1)
|
361
491
|
|
362
|
-
confidence = 0.99
|
363
|
-
if args.confidence:
|
364
|
-
confidences = {
|
365
|
-
'high': 0.99,
|
366
|
-
'medium': 0.95,
|
367
|
-
'low': 0.9,
|
368
|
-
'all': 0.0
|
369
|
-
}
|
370
|
-
if args.confidence in confidences.keys():
|
371
|
-
confidence = confidences[args.confidence]
|
372
|
-
else:
|
373
|
-
confidence = float(args.confidence)
|
374
|
-
|
375
492
|
match(args.binary, args.model, embeddings, confidence=confidence, deviation=float(args.deviation))
|
376
493
|
|
377
|
-
|
378
|
-
|
379
|
-
api.RE_sca(args.binary)
|
494
|
+
# elif args.sca:
|
495
|
+
# api.RE_sca(args.binary)
|
380
496
|
|
381
497
|
elif args.logs:
|
382
|
-
api.RE_logs(args.binary
|
498
|
+
api.RE_logs(args.binary)
|
383
499
|
|
384
500
|
elif args.delete:
|
385
|
-
api.RE_delete(args.binary
|
501
|
+
api.RE_delete(args.binary)
|
386
502
|
|
387
503
|
elif args.sbom:
|
388
|
-
api.RE_SBOM(args.binary
|
504
|
+
api.RE_SBOM(args.binary)
|
389
505
|
|
390
506
|
elif args.cves:
|
391
|
-
api.RE_cves(args.binary
|
507
|
+
api.RE_cves(args.binary)
|
392
508
|
else:
|
393
509
|
print("[!] Error, please supply an action command")
|
394
510
|
parser.print_help()
|