reait 0.0.20__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +2 -3
- reait/api.py +296 -246
- reait/main.py +184 -330
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/METADATA +3 -36
- reait-1.0.0.dist-info/RECORD +9 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/WHEEL +1 -1
- reait-0.0.20.dist-info/RECORD +0 -9
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/LICENSE +0 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/entry_points.txt +0 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/top_level.txt +0 -0
reait/main.py
CHANGED
@@ -1,29 +1,29 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
2
3
|
from __future__ import print_function
|
3
4
|
|
4
5
|
import logging
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Optional
|
5
8
|
|
6
9
|
from rich import print_json
|
7
10
|
from rich.progress import track
|
8
11
|
from rich.console import Console
|
9
|
-
from rich.table import Table
|
10
12
|
import os
|
11
13
|
import argparse
|
12
14
|
import json
|
13
|
-
from os.path import isfile
|
14
15
|
from sys import exit, stdout, stderr
|
15
|
-
from reait import api, __version__
|
16
16
|
from scipy.spatial import distance
|
17
17
|
from glob import iglob
|
18
18
|
import numpy as np
|
19
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
20
|
-
from multiprocessing import cpu_count
|
21
19
|
|
22
|
-
|
23
|
-
rout = Console(file=stdout)
|
20
|
+
import api
|
24
21
|
|
22
|
+
rerr = Console(file=stderr, width=180)
|
23
|
+
rout = Console(file=stdout, width=180)
|
25
24
|
|
26
|
-
|
25
|
+
|
26
|
+
def version() -> int:
|
27
27
|
"""
|
28
28
|
Display program version
|
29
29
|
"""
|
@@ -37,45 +37,29 @@ def version():
|
|
37
37
|
:: ::::::::::: :::
|
38
38
|
:: ::::: :::: :::
|
39
39
|
:::::::: :::::::: [/bold blue]
|
40
|
-
[bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]
|
40
|
+
[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]
|
41
41
|
""")
|
42
42
|
rout.print("[yellow]Config:[/yellow]")
|
43
43
|
print_json(data=api.re_conf)
|
44
|
+
return 0
|
44
45
|
|
45
46
|
|
46
|
-
def verify_binary(fpath_fmt: str):
|
47
|
-
fmt = None
|
47
|
+
def verify_binary(fpath_fmt: str) -> tuple[str, str, str]:
|
48
48
|
fpath = fpath_fmt
|
49
49
|
|
50
|
-
|
51
|
-
# fpath, fmt = fpath_fmt.split(':')
|
52
|
-
|
53
|
-
if not os.path.isfile(fpath):
|
54
|
-
raise RuntimeError(f"File path {fpath} is not a file")
|
55
|
-
|
56
|
-
# if getsize(fpath) > 1024 * 1024 * 10:
|
57
|
-
# raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
|
58
|
-
|
59
|
-
if not fmt:
|
60
|
-
exec_format, exec_isa = api.file_type(fpath)
|
61
|
-
else:
|
62
|
-
if '-' not in fmt:
|
63
|
-
raise RuntimeError(
|
64
|
-
'Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
|
65
|
-
|
66
|
-
exec_format, exec_isa = fmt.split('-')
|
50
|
+
exec_format, exec_isa = api.file_type(fpath)
|
67
51
|
|
68
52
|
return fpath, exec_format, exec_isa
|
69
53
|
|
70
54
|
|
71
|
-
def match(fpath: str,
|
55
|
+
def match(fpath: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1) -> None:
|
72
56
|
"""
|
73
57
|
Match embeddings in fpath from a list of embeddings
|
74
58
|
"""
|
75
|
-
print(f"Matching symbols from {fpath} with confidence {confidence}")
|
76
|
-
sink_embed_mat = np.vstack(list(map(lambda x: x[
|
77
|
-
b_embeds = api.RE_embeddings(fpath).json()
|
78
|
-
source_embed_mat = np.vstack(list(map(lambda x: x[
|
59
|
+
rout.print(f"Matching symbols from {fpath} with confidence {confidence}.")
|
60
|
+
sink_embed_mat = np.vstack(list(map(lambda x: x["embedding"], embeddings)))
|
61
|
+
b_embeds = api.RE_embeddings(fpath).json()["data"]
|
62
|
+
source_embed_mat = np.vstack(list(map(lambda x: x["embedding"], b_embeds)))
|
79
63
|
# angular distance over cosine
|
80
64
|
# closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
|
81
65
|
closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
|
@@ -83,7 +67,7 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
|
|
83
67
|
# closest = rescale_sim(closest)
|
84
68
|
i, j = closest.shape
|
85
69
|
|
86
|
-
for _i in track(range(i), description=
|
70
|
+
for _i in track(range(i), description="Matching Symbols..."):
|
87
71
|
row = closest[_i, :]
|
88
72
|
match_index, second_match = row.argsort()[::-1][:2]
|
89
73
|
source_index = _i
|
@@ -95,201 +79,159 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
|
|
95
79
|
|
96
80
|
if row[match_index] >= confidence:
|
97
81
|
rout.print(
|
98
|
-
f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t
|
82
|
+
f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t"
|
83
|
+
f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
|
84
|
+
f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
99
85
|
elif (m_confidence - s_confidence) > deviation:
|
100
86
|
rout.print(
|
101
|
-
f"[bold magenta]Possible match[/bold magenta][yellow]\
|
87
|
+
f"[bold magenta]Possible match[/bold magenta][yellow]\t"
|
88
|
+
f"Confidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t"
|
89
|
+
f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
|
90
|
+
f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
102
91
|
else:
|
103
92
|
rerr.print(
|
104
|
-
f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t
|
93
|
+
f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t"
|
94
|
+
f"{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
|
105
95
|
pass
|
106
96
|
|
107
97
|
|
108
|
-
def match_for_each(fpath: str,
|
98
|
+
def match_for_each(fpath: str, confidence: float = 0.9, nns: int = 1) -> int:
|
109
99
|
"""
|
110
100
|
Match embeddings in fpath from a list of embeddings
|
111
101
|
"""
|
112
|
-
|
113
|
-
collections = []
|
114
|
-
print(f"Matching symbols from {fpath} with confidence {confidence}")
|
115
|
-
b_embeds = api.RE_embeddings(fpath).json()
|
116
|
-
b_hash = api.re_binary_id(fpath)
|
117
|
-
|
118
|
-
with ThreadPoolExecutor(max_workers=cpu_count()) as p:
|
119
|
-
# print(f"Collections: {collections}")
|
120
|
-
partial = lambda x: api.RE_nearest_symbols(x['embedding'], model_name, 1, collections=collections,
|
121
|
-
ignore_hashes=[b_hash]).json()
|
122
|
-
res = {p.submit(partial, embed): embed for embed in b_embeds}
|
123
|
-
|
124
|
-
for future in track(as_completed(res), description='Matching Symbols...'):
|
125
|
-
# get result from future
|
126
|
-
symbol = res[future]
|
127
|
-
|
128
|
-
embedding = symbol['embedding']
|
129
|
-
# do ANN call to match symbols, ignore functions from current file
|
130
|
-
f_suggestions = api.RE_nearest_symbols(embedding, model_name, 1, collections=collections,
|
131
|
-
ignore_hashes=[api.re_binary_id(fpath)]).json()
|
132
|
-
|
133
|
-
if len(f_suggestions) == 0:
|
134
|
-
# no match
|
135
|
-
rerr.print(f"\t[bold red]No match for[/bold red]\t[blue]{symbol['name']}:{symbol['vaddr']}[/blue]")
|
136
|
-
continue
|
102
|
+
nns = max(nns, 1)
|
137
103
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
104
|
+
rout.print(f"Matching symbols from '{fpath}' with a confidence {confidence:.02f} and up to "
|
105
|
+
f"{nns} result{'' if nns == 1 else 's'} per function")
|
106
|
+
functions = api.RE_analyze_functions(fpath).json()["functions"]
|
107
|
+
function_matches = api.RE_nearest_functions(fpath, nns=nns, distance=1 - confidence).json()["function_matches"]
|
108
|
+
|
109
|
+
if len(function_matches) == 0:
|
110
|
+
rerr.print(f"[bold red]No matches found for a confidence of [/bold red] {confidence:.02f}")
|
111
|
+
return -1
|
112
|
+
else:
|
113
|
+
for function in functions:
|
114
|
+
matches = list(filter(lambda x: function["function_id"] == x["origin_function_id"], function_matches))
|
115
|
+
|
116
|
+
if len(matches):
|
117
|
+
rout.print(f"[bold green]Found {len(matches)} match{'' if len(matches) == 1 else 'es'} for "
|
118
|
+
f"[/bold green][blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
|
143
119
|
|
144
|
-
|
120
|
+
for match in matches:
|
121
|
+
rout.print(f"\t[yellow]Confidence: {match['confidence']:.05f}[/yellow]"
|
122
|
+
f"\t[blue]{match['nearest_neighbor_function_name']}"
|
123
|
+
f" ({match['nearest_neighbor_binary_name']})[/blue]")
|
124
|
+
else:
|
125
|
+
rout.print(f"[bold red]No matches found for[/bold red] "
|
126
|
+
f"[blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
|
127
|
+
return 0
|
145
128
|
|
146
129
|
|
147
|
-
def parse_collections(collections: str):
|
130
|
+
def parse_collections(collections: str) -> Optional[list[str]]:
|
148
131
|
"""
|
149
|
-
|
132
|
+
Return collections as list from CSV
|
150
133
|
"""
|
151
134
|
if not collections:
|
152
135
|
return None
|
153
|
-
return collections.split(
|
136
|
+
return collections.split(",")
|
154
137
|
|
155
138
|
|
156
139
|
def rescale_sim(x):
|
157
140
|
"""
|
158
|
-
|
141
|
+
Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low,
|
142
|
+
high values separated, map to hyperbolic space
|
159
143
|
"""
|
160
144
|
return np.power(x, 5)
|
161
145
|
|
162
146
|
|
163
|
-
def
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
table = Table(title=f"Binary Similarity to {fpath}")
|
170
|
-
table.add_column("Binary", justify="right", style="cyan", no_wrap=True)
|
171
|
-
table.add_column("SHA3-256", style="magenta", no_wrap=True)
|
172
|
-
table.add_column("Similarity", style="yellow", no_wrap=True)
|
147
|
+
def validate_file(arg):
|
148
|
+
file = Path(arg)
|
149
|
+
if file.is_file():
|
150
|
+
return file.absolute()
|
151
|
+
raise FileNotFoundError(f"File path {arg} does not exists.")
|
173
152
|
|
174
|
-
b_embed = api.RE_signature(fpath).json()
|
175
153
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
except Exception as e:
|
182
|
-
rerr.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.re_binary_id(b)}[/green bold]")
|
183
|
-
rerr.print(e)
|
154
|
+
def validate_dir(arg):
|
155
|
+
dir = Path(arg)
|
156
|
+
if dir.is_dir():
|
157
|
+
return dir.absolute()
|
158
|
+
raise NotADirectoryError(f"Directory path {arg} does not exists.")
|
184
159
|
|
185
|
-
if len(b_sums) > 0:
|
186
|
-
# closest = 1.0 - distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), 'cosine')
|
187
|
-
closest = distance.cdist(np.expand_dims(b_embed, axis=0), np.vstack(b_sums), api.angular_distance)
|
188
160
|
|
189
|
-
|
190
|
-
# table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{rescale_sim(similarity):.05f}")
|
191
|
-
table.add_row(os.path.basename(binary), api.re_binary_id(binary), f"{similarity:.05f}")
|
192
|
-
|
193
|
-
rout.print(table)
|
194
|
-
|
195
|
-
|
196
|
-
def main() -> None:
|
161
|
+
def main() -> int:
|
197
162
|
"""
|
198
163
|
Tool entry
|
199
164
|
"""
|
200
165
|
parser = argparse.ArgumentParser(add_help=False)
|
201
|
-
parser.add_argument("-b", "--binary",
|
166
|
+
parser.add_argument("-b", "--binary", type=validate_file,
|
202
167
|
help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
|
203
168
|
parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
|
204
|
-
parser.add_argument("-D", "--dir",
|
205
|
-
parser.add_argument("-a", "--analyse", action=
|
169
|
+
parser.add_argument("-D", "--dir", type=validate_dir, help="Path of directory to recursively analyse")
|
170
|
+
parser.add_argument("-a", "--analyse", action="store_true",
|
206
171
|
help="Perform a full analysis and generate embeddings for every symbol")
|
207
|
-
parser.add_argument("--no-embeddings", action='store_true',
|
208
|
-
help="Only perform binary analysis. Do not generate embeddings for symbols")
|
209
172
|
parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
|
210
|
-
parser.add_argument("-A", action=
|
211
|
-
parser.add_argument("-u", "--upload", action=
|
212
|
-
parser.add_argument("--duplicate", default=False, action=
|
213
|
-
parser.add_argument("-n", "--ann", action='store_true',
|
214
|
-
help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
|
173
|
+
parser.add_argument("-A", action="store_true", help="Upload and Analyse a new binary")
|
174
|
+
parser.add_argument("-u", "--upload", action="store_true", help="Upload a new binary to remote server")
|
175
|
+
parser.add_argument("--duplicate", default=False, action="store_true", help="Duplicate an existing binary")
|
215
176
|
parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
|
216
|
-
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
|
177
|
+
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch", type=int)
|
217
178
|
parser.add_argument("--collections", default=None,
|
218
179
|
help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
|
219
180
|
parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
|
220
181
|
parser.add_argument("--from-file",
|
221
182
|
help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
|
222
183
|
parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
|
223
|
-
# parser.add_argument("-C", "--sca", action="store_true",
|
224
|
-
# help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
|
225
184
|
parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
|
226
185
|
parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
|
227
|
-
parser.add_argument("-x", "--extract", action=
|
228
|
-
parser.add_argument("
|
229
|
-
parser.add_argument("--symbol", help="Name of the symbol to extract embeddings")
|
230
|
-
parser.add_argument("-s", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
|
231
|
-
parser.add_argument("-S", "--similarity", action='store_true',
|
232
|
-
help="Compute similarity from a list of binaries. Option can be used with --from-file or -t flag with CSV of file paths. All binaries must be analysed prior to being used.")
|
233
|
-
parser.add_argument("-t", "--to", help="CSV list of executables to compute binary similarity against")
|
234
|
-
parser.add_argument("-M", "--match", action='store_true',
|
186
|
+
parser.add_argument("-x", "--extract", action="store_true", help="Fetch embeddings for binary")
|
187
|
+
parser.add_argument("-M", "--match", action="store_true",
|
235
188
|
help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
|
236
|
-
parser.add_argument("--confidence", default="high",
|
237
|
-
|
189
|
+
parser.add_argument("--confidence", default="high", choices=["high", "medium", "low", "partial", "all"],
|
190
|
+
help="Confidence threshold used to match symbols. Valid values are 'all', 'medium', 'low', 'partial' or 'high'[DEFAULT]")
|
191
|
+
parser.add_argument("--deviation", default=0.1, type=float,
|
238
192
|
help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
|
239
|
-
parser.add_argument("-l", "--logs", action=
|
240
|
-
parser.add_argument("-d", "--delete", action=
|
241
|
-
parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
|
193
|
+
parser.add_argument("-l", "--logs", action="store_true", help="Fetch analysis log file for binary")
|
194
|
+
parser.add_argument("-d", "--delete", action="store_true", help="Delete all metadata associated with binary")
|
195
|
+
parser.add_argument("-k", "--apikey", help="RevEng.AI Personal API key")
|
242
196
|
parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
|
243
197
|
parser.add_argument("-v", "--version", action="store_true", help="Display version information")
|
244
198
|
parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
|
245
|
-
help=argparse._(
|
199
|
+
help=argparse._("Show this help message and exit"))
|
246
200
|
parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
|
247
201
|
parser.add_argument("--exec-format", default=None,
|
248
202
|
help="Override executable format. Valid values are pe, elf, macho, raw")
|
249
203
|
parser.add_argument("--platform", default=None,
|
250
204
|
help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
|
251
|
-
parser.add_argument("--dynamic-execution", default=False, action=
|
205
|
+
parser.add_argument("--dynamic-execution", default=False, action="store_true",
|
252
206
|
help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
|
253
207
|
parser.add_argument("--cmd-line-args", default="",
|
254
208
|
help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
|
255
209
|
parser.add_argument("--scope", default="private", choices=["public", "private"],
|
256
210
|
help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
|
257
211
|
parser.add_argument("--tags", default=None, type=str,
|
258
|
-
help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3
|
212
|
+
help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3.")
|
259
213
|
parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
|
260
|
-
parser.add_argument("--verbose", default=False, action=
|
214
|
+
parser.add_argument("--verbose", default=False, action="store_true", help="Set verbose output.")
|
215
|
+
parser.add_argument("--debug", default=None, help="Debug file path to write pass with analysis")
|
216
|
+
parser.add_argument("-s", "--status", action="store_true", help="Ongoing status of the provided binary")
|
217
|
+
|
261
218
|
args = parser.parse_args()
|
262
219
|
|
263
220
|
# set re_conf args
|
264
|
-
for arg in (
|
221
|
+
for arg in ("apikey", "host", "model",):
|
265
222
|
if getattr(args, arg):
|
266
223
|
api.re_conf[arg] = getattr(args, arg)
|
267
224
|
|
268
225
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
269
226
|
|
270
|
-
# validate length of string tags
|
271
|
-
if args.tags:
|
272
|
-
# don't add non-content as tags
|
273
|
-
if len(args.tags.strip()) == 0:
|
274
|
-
args.tags = None
|
275
|
-
|
276
|
-
else:
|
277
|
-
# convert to list
|
278
|
-
args.tags = args.tags.split(',')
|
279
|
-
|
280
227
|
# display version and exit
|
281
228
|
if args.version:
|
282
|
-
version()
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
if args.base_address:
|
289
|
-
if args.base_address.upper()[:2] == "0X":
|
290
|
-
base_address = int(args.base_address, 16)
|
291
|
-
else:
|
292
|
-
base_address = int(args.base_address)
|
229
|
+
return version()
|
230
|
+
|
231
|
+
# validate length of string tags
|
232
|
+
tags = None
|
233
|
+
if args.tags:
|
234
|
+
tags = parse_collections(args.tags)
|
293
235
|
|
294
236
|
collections = None
|
295
237
|
if args.collections:
|
@@ -297,19 +239,14 @@ def main() -> None:
|
|
297
239
|
|
298
240
|
# auto analysis, uploads and starts analysis
|
299
241
|
if args.A:
|
300
|
-
args.upload = True
|
301
|
-
args.analyse = True
|
242
|
+
args.upload = args.analyse = True
|
302
243
|
|
303
244
|
if args.dir:
|
304
|
-
|
305
|
-
rerr.print(f'Error, {args.dir} is not a valid directory path')
|
306
|
-
exit(-1)
|
307
|
-
|
308
|
-
files = iglob(os.path.abspath(args.dir) + '/**/*', recursive=True)
|
245
|
+
files = iglob(os.path.abspath(args.dir) + "/**/*", recursive=True)
|
309
246
|
## perform operation on all files inside directory
|
310
|
-
for file in track(files, description=
|
247
|
+
for file in track(files, description="Files in directory"):
|
311
248
|
if not os.path.isfile(file):
|
312
|
-
rerr.print(f
|
249
|
+
rerr.print(f"[blue]Skipping non-file:[/blue] {file}")
|
313
250
|
continue
|
314
251
|
|
315
252
|
# upload binary
|
@@ -319,196 +256,113 @@ def main() -> None:
|
|
319
256
|
if args.analyse:
|
320
257
|
try:
|
321
258
|
fpath, exec_fmt, exec_isa = verify_binary(file)
|
322
|
-
rout.print(f
|
323
|
-
rout.print(f'[green bold]Analysing[/green bold] {file}')
|
324
|
-
api.RE_analyse(file, model_name=args.model, isa_options=args.isa, platform_options=args.platform,
|
325
|
-
dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args,
|
326
|
-
file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags,
|
327
|
-
priority=args.priority, duplicate=args.duplicate)
|
259
|
+
rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
|
328
260
|
except Exception as e:
|
329
|
-
rerr.print(f"[red bold][!] Error, binary exec type could not be verified[/red bold] {file}")
|
261
|
+
rerr.print(f"[red bold][!] Error, binary exec type could not be verified:[/red bold] {file}")
|
262
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
263
|
+
|
264
|
+
rout.print(f"[green bold]Analysing:[/green bold] {file}")
|
265
|
+
api.RE_analyse(file, model_name=api.re_conf["model"], isa_options=args.isa,
|
266
|
+
platform_options=args.platform, dynamic_execution=args.dynamic_execution,
|
267
|
+
command_line_args=args.cmd_line_args, file_options=args.exec_format,
|
268
|
+
binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
|
269
|
+
duplicate=args.duplicate, debug_fpath=args.debug)
|
330
270
|
|
331
271
|
if args.delete:
|
332
272
|
try:
|
333
|
-
rout.print(f
|
273
|
+
rout.print(f"[green bold]Deleting analyses for:[/green bold] {file}")
|
334
274
|
api.RE_delete(file)
|
335
275
|
except Exception as e:
|
336
|
-
rerr.print(f"[red bold][!] Error, could not delete analysis for
|
276
|
+
rerr.print(f"[red bold][!] Error, could not delete analysis for:[/red bold] {file}")
|
337
277
|
rerr.print(f"[yellow] {e} [/yellow]")
|
338
|
-
if not (args.upload or args.analyse or args.delete):
|
339
|
-
rerr.print(f'Error, -D only supports upload, analyse, or delete')
|
340
|
-
exit(-1)
|
341
|
-
|
342
|
-
exit(0)
|
343
278
|
|
344
|
-
|
345
|
-
|
279
|
+
if not (args.upload or args.analyse or args.delete):
|
280
|
+
rerr.print(f"Error, '-D' flag only supports upload, analyse, or delete.")
|
281
|
+
return -1
|
282
|
+
elif args.analyse or args.extract or args.logs or args.delete or \
|
283
|
+
args.upload or args.match or args.cves or args.sbom or args.status:
|
346
284
|
try:
|
347
285
|
fpath, exec_fmt, exec_isa = verify_binary(args.binary)
|
348
286
|
# keep stdout to data only
|
349
|
-
|
287
|
+
rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
|
350
288
|
args.binary = fpath
|
289
|
+
except TypeError as e:
|
290
|
+
rerr.print("[bold red][!] Error, please supply a valid binary file using '-b' flag.[/bold red]")
|
291
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
292
|
+
return 0
|
351
293
|
except Exception as e:
|
352
|
-
rerr.print(f"[bold red]
|
353
|
-
rerr.print("[
|
354
|
-
# parser.print_help()
|
355
|
-
exit(-1)
|
294
|
+
rerr.print(f"[bold red][!] Error, binary exec type could not be verified:[/bold red] {args.binary}")
|
295
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
356
296
|
|
357
|
-
|
297
|
+
if args.upload:
|
298
|
+
api.RE_upload(args.binary)
|
358
299
|
|
359
|
-
|
300
|
+
if not args.analyse:
|
301
|
+
return 0
|
360
302
|
|
361
|
-
if not args.analyse:
|
362
|
-
exit(0)
|
363
303
|
# upload binary first, them carry out actions
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
parser.print_help()
|
402
|
-
exit(-1)
|
403
|
-
|
404
|
-
embedding = json.loads(open(args.embedding, 'r').read())
|
405
|
-
elif (args.symbol or args.start_vaddr) and args.binary:
|
406
|
-
if args.start_vaddr:
|
407
|
-
if args.start_vaddr.upper()[:2] == "0X":
|
408
|
-
vaddr = int(args.start_vaddr, 16) + base_address
|
409
|
-
else:
|
410
|
-
vaddr = int(args.start_vaddr) + base_address
|
411
|
-
|
412
|
-
print(
|
413
|
-
f"[+] Using symbol starting at vaddr {hex(vaddr)} from {args.binary} (image_base:{hex(base_address)})")
|
414
|
-
embeddings = api.RE_embeddings(args.binary).json()
|
415
|
-
matches = list(filter(lambda x: x['vaddr'] == vaddr, embeddings))
|
416
|
-
if len(matches) == 0:
|
417
|
-
print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
|
418
|
-
exit(-1)
|
419
|
-
embedding = matches[0]['embedding']
|
304
|
+
if args.analyse:
|
305
|
+
api.RE_analyse(args.binary, model_name=api.re_conf["model"], isa_options=args.isa,
|
306
|
+
platform_options=args.platform, dynamic_execution=args.dynamic_execution,
|
307
|
+
command_line_args=args.cmd_line_args, file_options=args.exec_format,
|
308
|
+
binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
|
309
|
+
duplicate=args.duplicate, debug_fpath=args.debug)
|
310
|
+
|
311
|
+
elif args.extract:
|
312
|
+
embeddings = api.RE_embeddings(args.binary).json()
|
313
|
+
print_json(data=embeddings)
|
314
|
+
|
315
|
+
elif args.match:
|
316
|
+
# parse confidences
|
317
|
+
confidence: float = 0.90
|
318
|
+
if args.confidence:
|
319
|
+
confidences = {
|
320
|
+
"high": 0.95,
|
321
|
+
"medium": 0.9,
|
322
|
+
"low": 0.7,
|
323
|
+
"partial": 0.5,
|
324
|
+
"all": 0.0
|
325
|
+
}
|
326
|
+
if args.confidence in confidences.keys():
|
327
|
+
confidence = confidences[args.confidence]
|
328
|
+
|
329
|
+
if args.from_file:
|
330
|
+
if not os.path.isfile(args.from_file) and not os.access(args.from_file, os.R_OK):
|
331
|
+
rerr.print("[bold red][!] Error, '--from-file' flag requires a path to a JSON embeddings file.[/bold red]")
|
332
|
+
return -1
|
333
|
+
rout.print(f"[+] Searching for symbols similar to embedding in binary: {args.from_file}")
|
334
|
+
embeddings = json.load(open(args.from_file))
|
335
|
+
elif args.found_in:
|
336
|
+
if not os.path.isfile(args.found_in) and not os.access(args.found_in, os.R_OK):
|
337
|
+
rerr.print("[bold red][!] Error, '--found-in' flag requires a path to a binary to search from.[/bold red]")
|
338
|
+
return -1
|
339
|
+
rout.print(f"[+] Matching symbols between {args.binary} and {args.found_in}.")
|
340
|
+
embeddings = api.RE_embeddings(args.found_in).json()["data"]["embedding"]
|
420
341
|
else:
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
|
425
|
-
if len(matches) == 0:
|
426
|
-
print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
|
427
|
-
exit(-1)
|
428
|
-
embedding = matches[0]['embedding']
|
429
|
-
elif args.binary and args.signature:
|
430
|
-
print(f"[+] Searching ANN for binary embeddings {args.binary}")
|
431
|
-
b_suggestions = api.RE_nearest_binaries(api.RE_signature(args.binary).json(), args.model, args.nns,
|
432
|
-
collections, ignore_hashes=[api.re_binary_id(args.binary)])
|
433
|
-
print_json(data=b_suggestions)
|
434
|
-
exit(0)
|
435
|
-
else:
|
436
|
-
rerr.print("[bold red][!] Error, please supply a valid embedding JSON file using '-e', or select a function"
|
437
|
-
" using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).[/bold red]")
|
438
|
-
# parser.print_help()
|
439
|
-
exit(-1)
|
440
|
-
|
441
|
-
if args.found_in:
|
442
|
-
if not os.path.isfile(args.found_in):
|
443
|
-
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
444
|
-
exit(-1)
|
445
|
-
print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
|
446
|
-
embeddings = api.RE_embeddings(args.found_in).json()
|
447
|
-
res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
|
448
|
-
print_json(data=res)
|
449
|
-
elif args.from_file:
|
450
|
-
if not os.path.isfile(args.from_file):
|
451
|
-
print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
|
452
|
-
exit(-1)
|
453
|
-
print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
|
454
|
-
res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
|
455
|
-
print_json(data=res)
|
456
|
-
else:
|
457
|
-
print(f"[+] Searching for similar symbols to embedding in "
|
458
|
-
f"{'all' if not args.collections else args.collections} collections.")
|
459
|
-
f_suggestions = api.RE_nearest_symbols(embedding["embedding"], args.model, int(args.nns),
|
460
|
-
collections=collections).json()
|
461
|
-
print_json(data=f_suggestions)
|
462
|
-
|
463
|
-
elif args.match:
|
464
|
-
# parse confidences
|
465
|
-
confidence = 0.90
|
466
|
-
if args.confidence:
|
467
|
-
confidences = {
|
468
|
-
'high': 0.95,
|
469
|
-
'medium': 0.9,
|
470
|
-
'low': 0.7,
|
471
|
-
'partial': 0.5,
|
472
|
-
'all': 0.0
|
473
|
-
}
|
474
|
-
if args.confidence in confidences.keys():
|
475
|
-
confidence = confidences[args.confidence]
|
476
|
-
else:
|
477
|
-
confidence = float(args.confidence)
|
478
|
-
|
479
|
-
if args.from_file:
|
480
|
-
embeddings = json.load(open(args.from_file, 'r'))
|
481
|
-
elif args.found_in:
|
482
|
-
if not os.path.isfile(args.found_in):
|
483
|
-
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
484
|
-
exit(-1)
|
485
|
-
print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
|
486
|
-
embeddings = api.RE_embeddings(args.found_in).json()
|
487
|
-
else:
|
488
|
-
# print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
|
489
|
-
match_for_each(args.binary, args.model, confidence, collections)
|
490
|
-
exit(-1)
|
342
|
+
return match_for_each(args.binary, confidence, args.nns)
|
343
|
+
|
344
|
+
match(args.binary, embeddings, confidence=confidence, deviation=float(args.deviation))
|
491
345
|
|
492
|
-
|
346
|
+
elif args.logs:
|
347
|
+
api.RE_logs(args.binary)
|
493
348
|
|
494
|
-
|
495
|
-
|
349
|
+
elif args.delete:
|
350
|
+
api.RE_delete(args.binary)
|
496
351
|
|
497
|
-
|
498
|
-
|
352
|
+
elif args.sbom:
|
353
|
+
api.RE_SBOM(args.binary)
|
499
354
|
|
500
|
-
|
501
|
-
|
355
|
+
elif args.cves:
|
356
|
+
api.RE_cves(args.binary)
|
502
357
|
|
503
|
-
|
504
|
-
|
358
|
+
elif args.status:
|
359
|
+
api.RE_status(args.binary, console=True)
|
505
360
|
|
506
|
-
elif args.cves:
|
507
|
-
api.RE_cves(args.binary)
|
508
361
|
else:
|
509
|
-
print("[!] Error, please supply an action command")
|
362
|
+
rerr.print("[bold red][!] Error, please supply an action command.[/bold red]")
|
510
363
|
parser.print_help()
|
364
|
+
return 0
|
511
365
|
|
512
366
|
|
513
|
-
if __name__ ==
|
514
|
-
main()
|
367
|
+
if __name__ == "__main__":
|
368
|
+
exit(main())
|