reait 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +2 -0
- reait/api.py +523 -218
- reait/main.py +265 -295
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/METADATA +40 -51
- reait-1.0.0.dist-info/RECORD +9 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/WHEEL +1 -1
- reait-0.0.19.dist-info/RECORD +0 -9
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/LICENSE +0 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/entry_points.txt +0 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/top_level.txt +0 -0
reait/main.py
CHANGED
@@ -1,75 +1,73 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
2
3
|
from __future__ import print_function
|
3
|
-
|
4
|
-
|
4
|
+
|
5
|
+
import logging
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
from rich import print_json
|
5
10
|
from rich.progress import track
|
6
11
|
from rich.console import Console
|
7
|
-
from rich.table import Table
|
8
12
|
import os
|
9
|
-
import re
|
10
13
|
import argparse
|
11
|
-
import requests
|
12
|
-
from numpy import array, vstack, mean, average
|
13
|
-
from pandas import DataFrame
|
14
14
|
import json
|
15
|
-
import
|
16
|
-
from os.path import isfile, getsize
|
17
|
-
from sys import exit
|
18
|
-
from IPython import embed
|
19
|
-
from reait import api
|
15
|
+
from sys import exit, stdout, stderr
|
20
16
|
from scipy.spatial import distance
|
21
|
-
from scipy.special import expit
|
22
17
|
from glob import iglob
|
23
18
|
import numpy as np
|
24
19
|
|
25
|
-
|
20
|
+
import api
|
21
|
+
|
22
|
+
rerr = Console(file=stderr, width=180)
|
23
|
+
rout = Console(file=stdout, width=180)
|
24
|
+
|
25
|
+
|
26
|
+
def version() -> int:
|
26
27
|
"""
|
27
28
|
Display program version
|
28
29
|
"""
|
29
|
-
|
30
|
+
rout.print(f"""[bold blue] :::::::: ::::::::
|
31
|
+
:: :::: ::: :::
|
32
|
+
::::::::::::::::::::
|
33
|
+
::::: ::: ::::::
|
34
|
+
::::::::::::::
|
35
|
+
.:: ::: ::::
|
36
|
+
:::::: ::: :::::::
|
37
|
+
:: ::::::::::: :::
|
38
|
+
:: ::::: :::: :::
|
39
|
+
:::::::: :::::::: [/bold blue]
|
40
|
+
[bold red]reait[/bold red] [bold bright_green]v{api.__version__}[/bold bright_green]
|
41
|
+
""")
|
42
|
+
rout.print("[yellow]Config:[/yellow]")
|
30
43
|
print_json(data=api.re_conf)
|
44
|
+
return 0
|
31
45
|
|
32
46
|
|
33
|
-
def verify_binary(fpath_fmt: str):
|
34
|
-
|
35
|
-
fpath = fpath_fmt
|
36
|
-
|
37
|
-
if ':' in fpath_fmt:
|
38
|
-
fpath, fmt = fpath_fmt.split(':')
|
39
|
-
|
40
|
-
if not os.path.isfile(fpath):
|
41
|
-
raise RuntimeError(f"File path {fpath} is not a file")
|
47
|
+
def verify_binary(fpath_fmt: str) -> tuple[str, str, str]:
|
48
|
+
fpath = fpath_fmt
|
42
49
|
|
43
|
-
|
44
|
-
raise RuntimeError("Refusing to analyse file over 10MB. Please use a RevEng.AI SRE integration")
|
45
|
-
|
46
|
-
if not fmt:
|
47
|
-
exec_format, exec_isa = api.file_type(fpath)
|
48
|
-
else:
|
49
|
-
if '-' not in fmt:
|
50
|
-
raise RuntimeError('Binary type must follow format {EXEC_FORMAT}-{ISA}. Use EXEC_FORMAT raw for memory dumps e.g. raw-x86')
|
51
|
-
|
52
|
-
exec_format, exec_isa = fmt.split('-')
|
50
|
+
exec_format, exec_isa = api.file_type(fpath)
|
53
51
|
|
54
52
|
return fpath, exec_format, exec_isa
|
55
53
|
|
56
54
|
|
57
|
-
def match(fpath: str,
|
55
|
+
def match(fpath: str, embeddings: list, confidence: float = 0.95, deviation: float = 0.1) -> None:
|
58
56
|
"""
|
59
57
|
Match embeddings in fpath from a list of embeddings
|
60
58
|
"""
|
61
|
-
print(f"Matching symbols from {fpath} with confidence {confidence}")
|
62
|
-
sink_embed_mat = np.vstack(list(map(lambda x: x[
|
63
|
-
b_embeds = api.RE_embeddings(fpath
|
64
|
-
source_embed_mat = np.vstack(list(map(lambda x: x[
|
59
|
+
rout.print(f"Matching symbols from {fpath} with confidence {confidence}.")
|
60
|
+
sink_embed_mat = np.vstack(list(map(lambda x: x["embedding"], embeddings)))
|
61
|
+
b_embeds = api.RE_embeddings(fpath).json()["data"]
|
62
|
+
source_embed_mat = np.vstack(list(map(lambda x: x["embedding"], b_embeds)))
|
65
63
|
# angular distance over cosine
|
66
|
-
#closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
|
64
|
+
# closest = 1.0 - distance.cdist(source_embed_mat, sink_embed_mat, 'cosine')
|
67
65
|
closest = distance.cdist(source_embed_mat, sink_embed_mat, api.angular_distance)
|
68
66
|
# rescale to separate high end of (-1, 1.0)
|
69
|
-
closest = rescale_sim(closest)
|
67
|
+
# closest = rescale_sim(closest)
|
70
68
|
i, j = closest.shape
|
71
69
|
|
72
|
-
for _i in track(range(i), description=
|
70
|
+
for _i in track(range(i), description="Matching Symbols..."):
|
73
71
|
row = closest[_i, :]
|
74
72
|
match_index, second_match = row.argsort()[::-1][:2]
|
75
73
|
source_index = _i
|
@@ -78,321 +76,293 @@ def match(fpath: str, model_name: str, embeddings: list, confidence: float = 0.9
|
|
78
76
|
sink_symb = embeddings[sink_index]
|
79
77
|
m_confidence = row[match_index]
|
80
78
|
s_confidence = row[second_match]
|
81
|
-
|
79
|
+
|
82
80
|
if row[match_index] >= confidence:
|
83
|
-
|
81
|
+
rout.print(
|
82
|
+
f"[bold green]Found match![/bold green][yellow]\tConfidence: {m_confidence:.05f}[/yellow]\t"
|
83
|
+
f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
|
84
|
+
f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
84
85
|
elif (m_confidence - s_confidence) > deviation:
|
85
|
-
|
86
|
+
rout.print(
|
87
|
+
f"[bold magenta]Possible match[/bold magenta][yellow]\t"
|
88
|
+
f"Confidence: {m_confidence:.05f}/{s_confidence:.05f}[/yellow]\t"
|
89
|
+
f"[blue]{source_symb['name']}:{source_symb['vaddr']}[/blue]\t->\t"
|
90
|
+
f"[blue]{sink_symb['name']}:{sink_symb['vaddr']}")
|
86
91
|
else:
|
87
|
-
|
92
|
+
rerr.print(
|
93
|
+
f"[bold red]No match for[/bold red]\t[blue]{source_symb['name']}:{source_symb['vaddr']}\t"
|
94
|
+
f"{sink_symb['name']} - {m_confidence:0.05f}[/blue]")
|
88
95
|
pass
|
89
96
|
|
90
|
-
|
91
|
-
def
|
97
|
+
|
98
|
+
def match_for_each(fpath: str, confidence: float = 0.9, nns: int = 1) -> int:
|
92
99
|
"""
|
93
|
-
|
100
|
+
Match embeddings in fpath from a list of embeddings
|
94
101
|
"""
|
95
|
-
|
102
|
+
nns = max(nns, 1)
|
103
|
+
|
104
|
+
rout.print(f"Matching symbols from '{fpath}' with a confidence {confidence:.02f} and up to "
|
105
|
+
f"{nns} result{'' if nns == 1 else 's'} per function")
|
106
|
+
functions = api.RE_analyze_functions(fpath).json()["functions"]
|
107
|
+
function_matches = api.RE_nearest_functions(fpath, nns=nns, distance=1 - confidence).json()["function_matches"]
|
108
|
+
|
109
|
+
if len(function_matches) == 0:
|
110
|
+
rerr.print(f"[bold red]No matches found for a confidence of [/bold red] {confidence:.02f}")
|
111
|
+
return -1
|
112
|
+
else:
|
113
|
+
for function in functions:
|
114
|
+
matches = list(filter(lambda x: function["function_id"] == x["origin_function_id"], function_matches))
|
115
|
+
|
116
|
+
if len(matches):
|
117
|
+
rout.print(f"[bold green]Found {len(matches)} match{'' if len(matches) == 1 else 'es'} for "
|
118
|
+
f"[/bold green][blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
|
96
119
|
|
97
|
-
|
120
|
+
for match in matches:
|
121
|
+
rout.print(f"\t[yellow]Confidence: {match['confidence']:.05f}[/yellow]"
|
122
|
+
f"\t[blue]{match['nearest_neighbor_function_name']}"
|
123
|
+
f" ({match['nearest_neighbor_binary_name']})[/blue]")
|
124
|
+
else:
|
125
|
+
rout.print(f"[bold red]No matches found for[/bold red] "
|
126
|
+
f"[blue]{function['function_name']}: {function['function_vaddr']:#x}[/blue]")
|
127
|
+
return 0
|
128
|
+
|
129
|
+
|
130
|
+
def parse_collections(collections: str) -> Optional[list[str]]:
|
98
131
|
"""
|
99
|
-
|
132
|
+
Return collections as list from CSV
|
100
133
|
"""
|
101
|
-
|
134
|
+
if not collections:
|
135
|
+
return None
|
136
|
+
return collections.split(",")
|
102
137
|
|
103
|
-
table = Table(title=f"Binary Similarity to {fpath}")
|
104
|
-
table.add_column("Binary", justify="right", style="cyan", no_wrap=True)
|
105
|
-
table.add_column("SHA3-256", style="magenta", no_wrap=True)
|
106
|
-
table.add_column("Similarity", style="yellow", no_wrap=True)
|
107
138
|
|
108
|
-
|
139
|
+
def rescale_sim(x):
|
140
|
+
"""
|
141
|
+
Too many values close to 0.999, 0.99999, 0.998, rescale so small values are very low,
|
142
|
+
high values separated, map to hyperbolic space
|
143
|
+
"""
|
144
|
+
return np.power(x, 5)
|
109
145
|
|
110
|
-
b_sums = []
|
111
|
-
for b in track(fpaths, description='Computing Binary Similarity...'):
|
112
|
-
try:
|
113
|
-
b_sum = api.RE_signature(b, model_name)
|
114
|
-
b_sums.append(b_sum)
|
115
|
-
except Exception as e:
|
116
|
-
console.print(f"\n[red bold]{b} Not Analysed[/red bold] - [green bold]{api.binary_id(b)}[/green bold]")
|
117
|
-
console.print(e)
|
118
146
|
|
119
|
-
|
120
|
-
|
121
|
-
|
147
|
+
def validate_file(arg):
|
148
|
+
file = Path(arg)
|
149
|
+
if file.is_file():
|
150
|
+
return file.absolute()
|
151
|
+
raise FileNotFoundError(f"File path {arg} does not exists.")
|
122
152
|
|
123
|
-
for binary, similarity in zip(fpaths, closest.tolist()[0]):
|
124
|
-
table.add_row(os.path.basename(binary), api.binary_id(binary), f"{rescale_sim(similarity):.05f}")
|
125
153
|
|
126
|
-
|
154
|
+
def validate_dir(arg):
|
155
|
+
dir = Path(arg)
|
156
|
+
if dir.is_dir():
|
157
|
+
return dir.absolute()
|
158
|
+
raise NotADirectoryError(f"Directory path {arg} does not exists.")
|
127
159
|
|
128
160
|
|
129
|
-
def main() ->
|
161
|
+
def main() -> int:
|
130
162
|
"""
|
131
163
|
Tool entry
|
132
164
|
"""
|
133
165
|
parser = argparse.ArgumentParser(add_help=False)
|
134
|
-
parser.add_argument("-b", "--binary",
|
135
|
-
|
136
|
-
parser.add_argument("-
|
137
|
-
parser.add_argument("
|
166
|
+
parser.add_argument("-b", "--binary", type=validate_file,
|
167
|
+
help="Path of binary to analyse, use ./path:{exec_format} to specify executable format e.g. ./path:raw-x86_64")
|
168
|
+
parser.add_argument("-B", "--binary-hash", default="", help="Hex-encoded SHA-256 hash of the binary to use")
|
169
|
+
parser.add_argument("-D", "--dir", type=validate_dir, help="Path of directory to recursively analyse")
|
170
|
+
parser.add_argument("-a", "--analyse", action="store_true",
|
171
|
+
help="Perform a full analysis and generate embeddings for every symbol")
|
138
172
|
parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
|
139
|
-
parser.add_argument("-A", action=
|
140
|
-
parser.add_argument("-u", "--upload", action=
|
141
|
-
parser.add_argument("
|
142
|
-
parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
|
143
|
-
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
|
144
|
-
parser.add_argument("--collections", default=None,
|
173
|
+
parser.add_argument("-A", action="store_true", help="Upload and Analyse a new binary")
|
174
|
+
parser.add_argument("-u", "--upload", action="store_true", help="Upload a new binary to remote server")
|
175
|
+
parser.add_argument("--duplicate", default=False, action="store_true", help="Duplicate an existing binary")
|
176
|
+
parser.add_argument("-e", "--embedding", help="Path of JSON file containing a BinNet embedding")
|
177
|
+
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch", type=int)
|
178
|
+
parser.add_argument("--collections", default=None,
|
179
|
+
help="Comma Seperated Value of collections to search from e.g. libxml2,libpcap. Used to select RevEng.AI collections for filtering search results")
|
145
180
|
parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
|
146
|
-
parser.add_argument("--from-file",
|
181
|
+
parser.add_argument("--from-file",
|
182
|
+
help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
|
147
183
|
parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
|
148
|
-
parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
|
149
184
|
parser.add_argument("--sbom", action="store_true", help="Generate SBOM for binary")
|
150
|
-
parser.add_argument("-m", "--model", default=
|
151
|
-
parser.add_argument("-x", "--extract", action=
|
152
|
-
parser.add_argument("
|
153
|
-
|
154
|
-
parser.add_argument("
|
155
|
-
|
156
|
-
parser.add_argument("
|
157
|
-
|
158
|
-
parser.add_argument("--
|
159
|
-
parser.add_argument("--
|
160
|
-
parser.add_argument("-
|
161
|
-
parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
|
162
|
-
parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
|
185
|
+
parser.add_argument("-m", "--model", default=None, help="AI model used to generate embeddings")
|
186
|
+
parser.add_argument("-x", "--extract", action="store_true", help="Fetch embeddings for binary")
|
187
|
+
parser.add_argument("-M", "--match", action="store_true",
|
188
|
+
help="Match functions in binary file. Can be used with --confidence, --deviation, --from-file, --found-in.")
|
189
|
+
parser.add_argument("--confidence", default="high", choices=["high", "medium", "low", "partial", "all"],
|
190
|
+
help="Confidence threshold used to match symbols. Valid values are 'all', 'medium', 'low', 'partial' or 'high'[DEFAULT]")
|
191
|
+
parser.add_argument("--deviation", default=0.1, type=float,
|
192
|
+
help="Deviation in prediction confidence between outlier and next highest symbol. Use if confident symbol is present in binary but not matching.")
|
193
|
+
parser.add_argument("-l", "--logs", action="store_true", help="Fetch analysis log file for binary")
|
194
|
+
parser.add_argument("-d", "--delete", action="store_true", help="Delete all metadata associated with binary")
|
195
|
+
parser.add_argument("-k", "--apikey", help="RevEng.AI Personal API key")
|
163
196
|
parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
|
164
197
|
parser.add_argument("-v", "--version", action="store_true", help="Display version information")
|
165
|
-
parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
|
198
|
+
parser.add_argument("--help", action="help", default=argparse.SUPPRESS,
|
199
|
+
help=argparse._("Show this help message and exit"))
|
166
200
|
parser.add_argument("--isa", default=None, help="Override executable ISA. Valid values are x86, x86_64, ARMv7")
|
167
|
-
parser.add_argument("--exec-format", default=None,
|
168
|
-
|
169
|
-
parser.add_argument("--
|
170
|
-
|
171
|
-
parser.add_argument("--
|
172
|
-
|
201
|
+
parser.add_argument("--exec-format", default=None,
|
202
|
+
help="Override executable format. Valid values are pe, elf, macho, raw")
|
203
|
+
parser.add_argument("--platform", default=None,
|
204
|
+
help="Override OS platform. Valid values are Windows, Linux, OSX, OpenBSD")
|
205
|
+
parser.add_argument("--dynamic-execution", default=False, action="store_true",
|
206
|
+
help="Enable dynamic execution in sandbox during analysis. Analysis will include any auto unpacked malware samples")
|
207
|
+
parser.add_argument("--cmd-line-args", default="",
|
208
|
+
help="Command line arguments to pass when running binary sample in the sandbox. Only used when run with --dynamic-execution")
|
209
|
+
parser.add_argument("--scope", default="private", choices=["public", "private"],
|
210
|
+
help="Override analysis visibility (scope). Valid values are 'public' or 'private'[DEFAULT]")
|
211
|
+
parser.add_argument("--tags", default=None, type=str,
|
212
|
+
help="Assign tags to an analysis. Valid responses are tag1,tag2,tag3.")
|
213
|
+
parser.add_argument("--priority", default=0, type=int, help="Add priority to processing queue.")
|
214
|
+
parser.add_argument("--verbose", default=False, action="store_true", help="Set verbose output.")
|
215
|
+
parser.add_argument("--debug", default=None, help="Debug file path to write pass with analysis")
|
216
|
+
parser.add_argument("-s", "--status", action="store_true", help="Ongoing status of the provided binary")
|
217
|
+
|
173
218
|
args = parser.parse_args()
|
174
219
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
220
|
+
# set re_conf args
|
221
|
+
for arg in ("apikey", "host", "model",):
|
222
|
+
if getattr(args, arg):
|
223
|
+
api.re_conf[arg] = getattr(args, arg)
|
224
|
+
|
225
|
+
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
181
226
|
|
182
227
|
# display version and exit
|
183
228
|
if args.version:
|
184
|
-
version()
|
185
|
-
exit(0)
|
186
|
-
|
187
|
-
exec_fmt = None
|
188
|
-
exec_isa = None
|
189
|
-
base_address = 0
|
190
|
-
if args.base_address:
|
191
|
-
if args.base_address.upper()[:2] == "0X":
|
192
|
-
base_address = int(args.base_address, 16)
|
193
|
-
else:
|
194
|
-
base_address = int(args.base_address)
|
229
|
+
return version()
|
195
230
|
|
231
|
+
# validate length of string tags
|
232
|
+
tags = None
|
233
|
+
if args.tags:
|
234
|
+
tags = parse_collections(args.tags)
|
196
235
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
236
|
+
collections = None
|
237
|
+
if args.collections:
|
238
|
+
collections = parse_collections(args.collections)
|
239
|
+
|
240
|
+
# auto analysis, uploads and starts analysis
|
241
|
+
if args.A:
|
242
|
+
args.upload = args.analyse = True
|
201
243
|
|
202
|
-
|
244
|
+
if args.dir:
|
245
|
+
files = iglob(os.path.abspath(args.dir) + "/**/*", recursive=True)
|
203
246
|
## perform operation on all files inside directory
|
204
|
-
for file in track(files, description=
|
247
|
+
for file in track(files, description="Files in directory"):
|
205
248
|
if not os.path.isfile(file):
|
206
|
-
|
249
|
+
rerr.print(f"[blue]Skipping non-file:[/blue] {file}")
|
207
250
|
continue
|
208
251
|
|
252
|
+
# upload binary
|
253
|
+
if args.upload:
|
254
|
+
api.RE_upload(file)
|
255
|
+
|
209
256
|
if args.analyse:
|
210
257
|
try:
|
211
258
|
fpath, exec_fmt, exec_isa = verify_binary(file)
|
212
|
-
|
213
|
-
rich_print(f'[green bold]Analysing[/green bold] {file}')
|
214
|
-
api.RE_analyse(file, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
|
259
|
+
rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
|
215
260
|
except Exception as e:
|
216
|
-
|
261
|
+
rerr.print(f"[red bold][!] Error, binary exec type could not be verified:[/red bold] {file}")
|
262
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
263
|
+
|
264
|
+
rout.print(f"[green bold]Analysing:[/green bold] {file}")
|
265
|
+
api.RE_analyse(file, model_name=api.re_conf["model"], isa_options=args.isa,
|
266
|
+
platform_options=args.platform, dynamic_execution=args.dynamic_execution,
|
267
|
+
command_line_args=args.cmd_line_args, file_options=args.exec_format,
|
268
|
+
binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
|
269
|
+
duplicate=args.duplicate, debug_fpath=args.debug)
|
217
270
|
|
218
|
-
|
271
|
+
if args.delete:
|
219
272
|
try:
|
220
|
-
|
221
|
-
api.RE_delete(
|
273
|
+
rout.print(f"[green bold]Deleting analyses for:[/green bold] {file}")
|
274
|
+
api.RE_delete(file)
|
222
275
|
except Exception as e:
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
# verify binary is a file
|
276
|
+
rerr.print(f"[red bold][!] Error, could not delete analysis for:[/red bold] {file}")
|
277
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
278
|
+
|
279
|
+
if not (args.upload or args.analyse or args.delete):
|
280
|
+
rerr.print(f"Error, '-D' flag only supports upload, analyse, or delete.")
|
281
|
+
return -1
|
282
|
+
elif args.analyse or args.extract or args.logs or args.delete or \
|
283
|
+
args.upload or args.match or args.cves or args.sbom or args.status:
|
232
284
|
try:
|
233
285
|
fpath, exec_fmt, exec_isa = verify_binary(args.binary)
|
234
|
-
|
286
|
+
# keep stdout to data only
|
287
|
+
rout.print(f"Found {fpath}: {exec_fmt}-{exec_isa}")
|
235
288
|
args.binary = fpath
|
289
|
+
except TypeError as e:
|
290
|
+
rerr.print("[bold red][!] Error, please supply a valid binary file using '-b' flag.[/bold red]")
|
291
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
292
|
+
return 0
|
236
293
|
except Exception as e:
|
237
|
-
print("[!] Error,
|
238
|
-
|
239
|
-
exit(-1)
|
240
|
-
|
241
|
-
if args.upload:
|
242
|
-
# upload binary first, them carry out actions
|
243
|
-
print(f"[!] RE:upload not implemented. Use analyse.")
|
244
|
-
exit(-1)
|
245
|
-
|
246
|
-
if args.analyse:
|
247
|
-
api.RE_analyse(args.binary, model=args.model, isa_options=args.isa, platform_options=args.platform, dynamic_execution=args.dynamic_execution, command_line_args=args.cmd_line_args, file_options=args.exec_format, scope=args.scope.upper(), tags=args.tags)
|
294
|
+
rerr.print(f"[bold red][!] Error, binary exec type could not be verified:[/bold red] {args.binary}")
|
295
|
+
rerr.print(f"[yellow] {e} [/yellow]")
|
248
296
|
|
249
|
-
|
250
|
-
|
251
|
-
print_json(data=embeddings)
|
297
|
+
if args.upload:
|
298
|
+
api.RE_upload(args.binary)
|
252
299
|
|
253
|
-
|
254
|
-
|
255
|
-
b_embed = api.RE_signature(args.binary, args.model)
|
256
|
-
print_json(data=b_embed)
|
300
|
+
if not args.analyse:
|
301
|
+
return 0
|
257
302
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
if len(matches) == 0:
|
297
|
-
print(f"[!] Error, could not find symbol at {hex(vaddr)} in {args.binary}")
|
298
|
-
exit(-1)
|
299
|
-
embedding = matches[0]['embedding']
|
300
|
-
else:
|
301
|
-
symb_name = args.symbol
|
302
|
-
print(f"[+] Using symbol {args.symbol} from {args.binary}")
|
303
|
-
|
304
|
-
embeddings = api.RE_embeddings(args.binary, args.model)
|
305
|
-
matches = list(filter(lambda x: x['name'] == args.symbol, embeddings))
|
306
|
-
if len(matches) == 0:
|
307
|
-
print(f"[!] Error, could not find symbol at {args.symbol} in {args.binary}")
|
308
|
-
exit(-1)
|
309
|
-
embedding = matches[0]['embedding']
|
310
|
-
elif args.binary and args.signature:
|
311
|
-
print(f"[+] Searching ANN for binary embeddings {args.binary}")
|
312
|
-
api.RE_nearest_binaries(api.RE_signature(args.binary, args.model), args.model, args.nns, args.collections)
|
313
|
-
exit(0)
|
314
|
-
else:
|
315
|
-
print("[!] Error, please supply a valid embedding JSON file using '-e', or select a function using --start-vaddr or --symbol (NB: -b flag is needed for both of these options).")
|
316
|
-
parser.print_help()
|
317
|
-
exit(-1)
|
318
|
-
|
319
|
-
|
320
|
-
# check for valid regex
|
321
|
-
if args.collections:
|
322
|
-
try:
|
323
|
-
re.compile(args.collections)
|
324
|
-
except re.error as e:
|
325
|
-
print(f"[!] Error, invalid regex for collections - {args.collections}")
|
326
|
-
exit(-1)
|
327
|
-
|
328
|
-
if args.found_in:
|
329
|
-
if not os.path.isfile(args.found_in):
|
330
|
-
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
331
|
-
exit(-1)
|
332
|
-
print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
|
333
|
-
embeddings = api.RE_embeddings(args.found_in, args.model)
|
334
|
-
res = api.RE_compute_distance(embedding, embeddings, int(args.nns))
|
335
|
-
print_json(data=res)
|
336
|
-
elif args.from_file:
|
337
|
-
if not os.path.isfile(args.from_file):
|
338
|
-
print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
|
339
|
-
exit(-1)
|
340
|
-
print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
|
341
|
-
res = api.RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
|
342
|
-
print_json(data=res)
|
343
|
-
else:
|
344
|
-
print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
|
345
|
-
api.RE_nearest_symbols(embedding, args.model, int(args.nns), collections=args.collections)
|
346
|
-
|
347
|
-
|
348
|
-
elif args.match:
|
349
|
-
embeddings = None
|
350
|
-
if args.from_file:
|
351
|
-
embeddings = json.load(open(args.from_file, 'r'))
|
352
|
-
elif args.found_in:
|
353
|
-
if not os.path.isfile(args.found_in):
|
354
|
-
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
355
|
-
exit(-1)
|
356
|
-
print(f"[+] Matching symbols between {args.binary} and {args.found_in}")
|
357
|
-
embeddings = api.RE_embeddings(args.found_in, args.model)
|
358
|
-
else:
|
359
|
-
print("No --from-file or --found-in, matching from global symbol database (unstrip) not currently")
|
360
|
-
exit(-1)
|
361
|
-
|
362
|
-
confidence = 0.99
|
363
|
-
if args.confidence:
|
364
|
-
confidences = {
|
365
|
-
'high': 0.99,
|
366
|
-
'medium': 0.95,
|
367
|
-
'low': 0.9,
|
368
|
-
'all': 0.0
|
369
|
-
}
|
370
|
-
if args.confidence in confidences.keys():
|
371
|
-
confidence = confidences[args.confidence]
|
303
|
+
# upload binary first, them carry out actions
|
304
|
+
if args.analyse:
|
305
|
+
api.RE_analyse(args.binary, model_name=api.re_conf["model"], isa_options=args.isa,
|
306
|
+
platform_options=args.platform, dynamic_execution=args.dynamic_execution,
|
307
|
+
command_line_args=args.cmd_line_args, file_options=args.exec_format,
|
308
|
+
binary_scope=args.scope.upper(), tags=tags, priority=args.priority,
|
309
|
+
duplicate=args.duplicate, debug_fpath=args.debug)
|
310
|
+
|
311
|
+
elif args.extract:
|
312
|
+
embeddings = api.RE_embeddings(args.binary).json()
|
313
|
+
print_json(data=embeddings)
|
314
|
+
|
315
|
+
elif args.match:
|
316
|
+
# parse confidences
|
317
|
+
confidence: float = 0.90
|
318
|
+
if args.confidence:
|
319
|
+
confidences = {
|
320
|
+
"high": 0.95,
|
321
|
+
"medium": 0.9,
|
322
|
+
"low": 0.7,
|
323
|
+
"partial": 0.5,
|
324
|
+
"all": 0.0
|
325
|
+
}
|
326
|
+
if args.confidence in confidences.keys():
|
327
|
+
confidence = confidences[args.confidence]
|
328
|
+
|
329
|
+
if args.from_file:
|
330
|
+
if not os.path.isfile(args.from_file) and not os.access(args.from_file, os.R_OK):
|
331
|
+
rerr.print("[bold red][!] Error, '--from-file' flag requires a path to a JSON embeddings file.[/bold red]")
|
332
|
+
return -1
|
333
|
+
rout.print(f"[+] Searching for symbols similar to embedding in binary: {args.from_file}")
|
334
|
+
embeddings = json.load(open(args.from_file))
|
335
|
+
elif args.found_in:
|
336
|
+
if not os.path.isfile(args.found_in) and not os.access(args.found_in, os.R_OK):
|
337
|
+
rerr.print("[bold red][!] Error, '--found-in' flag requires a path to a binary to search from.[/bold red]")
|
338
|
+
return -1
|
339
|
+
rout.print(f"[+] Matching symbols between {args.binary} and {args.found_in}.")
|
340
|
+
embeddings = api.RE_embeddings(args.found_in).json()["data"]["embedding"]
|
372
341
|
else:
|
373
|
-
|
374
|
-
|
375
|
-
|
342
|
+
return match_for_each(args.binary, confidence, args.nns)
|
343
|
+
|
344
|
+
match(args.binary, embeddings, confidence=confidence, deviation=float(args.deviation))
|
376
345
|
|
346
|
+
elif args.logs:
|
347
|
+
api.RE_logs(args.binary)
|
377
348
|
|
378
|
-
|
379
|
-
|
349
|
+
elif args.delete:
|
350
|
+
api.RE_delete(args.binary)
|
380
351
|
|
381
|
-
|
382
|
-
|
352
|
+
elif args.sbom:
|
353
|
+
api.RE_SBOM(args.binary)
|
383
354
|
|
384
|
-
|
385
|
-
|
355
|
+
elif args.cves:
|
356
|
+
api.RE_cves(args.binary)
|
386
357
|
|
387
|
-
|
388
|
-
|
358
|
+
elif args.status:
|
359
|
+
api.RE_status(args.binary, console=True)
|
389
360
|
|
390
|
-
elif args.cves:
|
391
|
-
api.RE_cves(args.binary, args.model)
|
392
361
|
else:
|
393
|
-
print("[!] Error, please supply an action command")
|
362
|
+
rerr.print("[bold red][!] Error, please supply an action command.[/bold red]")
|
394
363
|
parser.print_help()
|
364
|
+
return 0
|
395
365
|
|
396
366
|
|
397
|
-
if __name__ ==
|
398
|
-
main()
|
367
|
+
if __name__ == "__main__":
|
368
|
+
exit(main())
|