reait 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +0 -0
- reait-0.0.13.data/scripts/reait → reait/__main__.py +135 -28
- {reait-0.0.13.dist-info → reait-0.0.15.dist-info}/METADATA +42 -7
- reait-0.0.15.dist-info/RECORD +9 -0
- {reait-0.0.13.dist-info → reait-0.0.15.dist-info}/WHEEL +1 -1
- reait-0.0.15.dist-info/entry_points.txt +2 -0
- reait-0.0.15.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- reait-0.0.13.dist-info/RECORD +0 -6
- reait-0.0.13.dist-info/top_level.txt +0 -1
- {reait-0.0.13.dist-info → reait-0.0.15.dist-info}/LICENSE +0 -0
reait/__init__.py
ADDED
File without changes
|
@@ -1,27 +1,29 @@
|
|
1
|
-
|
1
|
+
#!/usr/bin/env python
|
2
2
|
from __future__ import print_function
|
3
3
|
from json import dumps
|
4
4
|
from os import system
|
5
5
|
from tqdm import tqdm
|
6
6
|
from hashlib import sha256
|
7
|
-
from rich import print_json
|
7
|
+
from rich import print_json, print as rich_print
|
8
8
|
from sklearn.metrics.pairwise import cosine_similarity
|
9
9
|
import os
|
10
|
-
|
10
|
+
import re
|
11
|
+
import argparse
|
11
12
|
import requests
|
12
|
-
from numpy import array, vstack
|
13
|
+
from numpy import array, vstack, mean
|
13
14
|
from pandas import DataFrame
|
14
15
|
import json
|
15
16
|
import tomli
|
16
17
|
from os.path import isfile
|
17
18
|
from sys import exit
|
18
|
-
|
19
|
+
from IPython import embed
|
19
20
|
|
20
|
-
__version__ = "0.0.
|
21
|
+
__version__ = "0.0.15"
|
21
22
|
|
22
23
|
re_conf = {
|
23
24
|
'apikey' : 'l1br3',
|
24
|
-
'host' : 'https://api.reveng.ai'
|
25
|
+
'host' : 'https://api.reveng.ai',
|
26
|
+
'model': 'binnet-0.1'
|
25
27
|
}
|
26
28
|
|
27
29
|
def reveng_req(r: requests.request, end_point: str, data=None, ex_headers: dict = None, params=None):
|
@@ -47,11 +49,14 @@ def RE_delete(fpath: str):
|
|
47
49
|
return
|
48
50
|
|
49
51
|
|
50
|
-
def RE_analyse(fpath: str):
|
52
|
+
def RE_analyse(fpath: str, model: str = None):
|
51
53
|
"""
|
52
54
|
Start analysis job for binary file
|
53
55
|
"""
|
54
|
-
|
56
|
+
params={}
|
57
|
+
if model:
|
58
|
+
params['model'] = model
|
59
|
+
res = reveng_req(requests.post, f"analyse", data=open(fpath, 'rb').read(), params=params)
|
55
60
|
if res.status_code == 200:
|
56
61
|
print("[+] Successfully submitted binary for analysis.")
|
57
62
|
print(f"[+] {fpath} - {binary_id(fpath)}")
|
@@ -65,6 +70,24 @@ def RE_analyse(fpath: str):
|
|
65
70
|
res.raise_for_status()
|
66
71
|
|
67
72
|
|
73
|
+
def RE_upload(fpath: str):
|
74
|
+
"""
|
75
|
+
Upload binary to Server
|
76
|
+
"""
|
77
|
+
res = reveng_req(requests.post, f"upload", data=open(fpath, 'rb').read())
|
78
|
+
if res.status_code == 200:
|
79
|
+
print("[+] Successfully uploaded binary to your account.")
|
80
|
+
print(f"[+] {fpath} - {binary_id(fpath)}")
|
81
|
+
return res
|
82
|
+
|
83
|
+
if res.status_code == 400:
|
84
|
+
if 'already exists' in json.loads(res.text)['reason']:
|
85
|
+
print(f"[-] {fpath} already exists. Please check the results log file for {binary_id(fpath)}")
|
86
|
+
return True
|
87
|
+
|
88
|
+
res.raise_for_status()
|
89
|
+
|
90
|
+
|
68
91
|
def RE_embeddings(fpath: str):
|
69
92
|
"""
|
70
93
|
Fetch symbol embeddings
|
@@ -94,11 +117,33 @@ def RE_logs(fpath: str):
|
|
94
117
|
res.raise_for_status()
|
95
118
|
|
96
119
|
|
97
|
-
def
|
120
|
+
def RE_cves(fpath: str):
|
121
|
+
"""
|
122
|
+
Check for known CVEs in Binary
|
123
|
+
"""
|
124
|
+
bin_id = binary_id(fpath)
|
125
|
+
res = reveng_req(requests.get, f"/cves/{bin_id}")
|
126
|
+
if res.status_code == 200:
|
127
|
+
cves = json.loads(res.text)
|
128
|
+
rich_print(f"[bold blue]Checking for known CVEs embedded inside [/bold blue] [bold bright_green]{fpath}[/bold bright_green]:")
|
129
|
+
if len(cves) == 0:
|
130
|
+
rich_print(f"[bold bright_green]0 CVEs found.[/bold bright_green]")
|
131
|
+
else:
|
132
|
+
rich_print(f"[bold red]Warning CVEs found![/bold red]")
|
133
|
+
print_json(data=cves)
|
134
|
+
return
|
135
|
+
elif res.status_code == 404:
|
136
|
+
print(f"[!] Error, binary analysis for {bin_id} not found.")
|
137
|
+
return
|
138
|
+
|
139
|
+
res.raise_for_status()
|
140
|
+
|
141
|
+
|
142
|
+
#def RE_compute_distance(embedding: list, fpath_source: str, nns: int = 5):
|
143
|
+
def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
|
98
144
|
"""
|
99
|
-
|
145
|
+
Compute the cosine distance between source embedding and embeddinsg from binary
|
100
146
|
"""
|
101
|
-
embeddings = RE_embeddings(fpath_source)
|
102
147
|
df = DataFrame(data=embeddings)
|
103
148
|
np_embedding = array(embedding).reshape(1, -1)
|
104
149
|
source_embeddings = vstack(df['embedding'].values)
|
@@ -112,15 +157,18 @@ def RE_compute_distance(embedding: list, fpath_source: str, nns: int = 5):
|
|
112
157
|
return json_sims
|
113
158
|
|
114
159
|
|
115
|
-
def RE_nearest_symbols(embedding: list, nns: int = 5):
|
160
|
+
def RE_nearest_symbols(embedding: list, nns: int = 5, collections : list = None):
|
116
161
|
"""
|
117
162
|
Get function name suggestions for an embedding
|
163
|
+
:param embedding: embedding vector as python list
|
118
164
|
:param nns: Number of nearest neighbors
|
119
|
-
:param
|
165
|
+
:param collections: str RegEx to search through RevEng.AI collections
|
120
166
|
"""
|
121
167
|
params={'nns': nns}
|
122
|
-
|
123
|
-
|
168
|
+
|
169
|
+
if collections:
|
170
|
+
params['collections'] = collections
|
171
|
+
|
124
172
|
res = reveng_req(requests.post, "ann", data=json.dumps(embedding), params=params)
|
125
173
|
res.raise_for_status()
|
126
174
|
f_suggestions = res.json()
|
@@ -152,44 +200,71 @@ def version():
|
|
152
200
|
"""
|
153
201
|
Display program version
|
154
202
|
"""
|
155
|
-
|
203
|
+
rich_print(f"[bold red]reait[/bold red] [bold bright_green]v{__version__}[/bold bright_green]")
|
156
204
|
print_json(data=re_conf)
|
157
205
|
|
158
|
-
|
206
|
+
|
207
|
+
def main() -> None:
|
208
|
+
"""
|
209
|
+
Tool entry
|
210
|
+
"""
|
159
211
|
parse_config()
|
160
|
-
parser = ArgumentParser()
|
161
|
-
parser.add_argument("-b", "--binary", default="", help="Path
|
162
|
-
parser.add_argument("-a", "--analyse", action='store_true', help="
|
212
|
+
parser = argparse.ArgumentParser(add_help=False)
|
213
|
+
parser.add_argument("-b", "--binary", default="", help="Path of binary to analyse")
|
214
|
+
parser.add_argument("-a", "--analyse", action='store_true', help="Perform a full analysis and generate embeddings for every symbol")
|
215
|
+
parser.add_argument("--no-embeddings", action='store_true', help="Only perform binary analysis. Do not generate embeddings for symbols")
|
216
|
+
parser.add_argument("--base-address", help="Image base of the executable image to map for remote analysis")
|
217
|
+
parser.add_argument("-A", action='store_true', help="Upload and Analyse a new binary")
|
218
|
+
parser.add_argument("-u", "--upload", action='store_true', help="Upload a new binary to remote server")
|
163
219
|
parser.add_argument("-n", "--ann", action='store_true', help="Fetch Approximate Nearest Neighbours (ANNs) for embedding")
|
164
220
|
parser.add_argument("--embedding", help="Path of JSON file containing a BinNet embedding")
|
165
221
|
parser.add_argument("--nns", default="5", help="Number of approximate nearest neighbors to fetch")
|
222
|
+
parser.add_argument("--collections", default=None, help="Regex string to select RevEng.AI collections for filtering e.g., libc")
|
166
223
|
parser.add_argument("--found-in", help="ANN flag to limit to embeddings returned to those found in specific binary")
|
167
|
-
|
224
|
+
parser.add_argument("--from-file", help="ANN flag to limit to embeddings returned to those found in JSON embeddings file")
|
225
|
+
parser.add_argument("-c", "--cves", action="store_true", help="Check for CVEs found inside binary")
|
226
|
+
parser.add_argument("-C", "--sca", action="store_true", help="Perform Software Composition Anaysis to identify common libraries embedded in binary")
|
227
|
+
parser.add_argument("-m", "--model", default="binnet-0.1", help="AI model used to generate embeddings")
|
168
228
|
parser.add_argument("-x", "--extract", action='store_true', help="Fetch embeddings for binary")
|
229
|
+
parser.add_argument("--start-address", help="Start vaddr of the function to extract embeddings")
|
230
|
+
parser.add_argument("--end-address", help="End vaddr of the function to extract embeddings")
|
231
|
+
parser.add_argument("-s", "--summary", action='store_true', help="Average symbol embeddings in binary")
|
232
|
+
parser.add_argument("-S", "--signature", action='store_true', help="Generate a RevEng.AI binary signature")
|
169
233
|
parser.add_argument("-l", "--logs", action='store_true', help="Fetch analysis log file for binary")
|
170
|
-
parser.add_argument("-d", "--delete", action='store_true', help="
|
234
|
+
parser.add_argument("-d", "--delete", action='store_true', help="Delete all metadata associated with binary")
|
171
235
|
parser.add_argument("-k", "--apikey", help="RevEng.AI API key")
|
172
|
-
parser.add_argument("-
|
236
|
+
parser.add_argument("-h", "--host", help="Analysis Host (https://api.reveng.ai)")
|
173
237
|
parser.add_argument("-v", "--version", action="store_true", help="Display version information")
|
238
|
+
parser.add_argument("--help", action="help", default=argparse.SUPPRESS, help=argparse._('Show this help message and exit'))
|
174
239
|
args = parser.parse_args()
|
175
240
|
|
176
241
|
if args.apikey:
|
177
242
|
re_conf['apikey'] = args.apikey
|
178
243
|
if args.host:
|
179
244
|
re_conf['host'] = args.host
|
245
|
+
if args.model:
|
246
|
+
re_conf['model'] = args.model
|
180
247
|
|
181
248
|
# display version and exit
|
182
249
|
if args.version:
|
183
250
|
version()
|
184
251
|
exit(0)
|
185
252
|
|
186
|
-
if args.analyse or args.extract or args.logs or args.delete:
|
253
|
+
if args.A or args.analyse or args.extract or args.logs or args.delete or args.summary or args.upload:
|
187
254
|
# verify binary is a file
|
188
255
|
if not os.path.isfile(args.binary):
|
189
256
|
print("[!] Error, please supply a valid binary file using '-b'.")
|
190
257
|
parser.print_help()
|
191
258
|
exit(-1)
|
192
259
|
|
260
|
+
if args.upload:
|
261
|
+
# upload binary first, them carry out actions
|
262
|
+
print(f"[!] RE:upload not implemented. Use analyse.")
|
263
|
+
exit(-1)
|
264
|
+
|
265
|
+
if args.A:
|
266
|
+
RE_analyse(args.binary)
|
267
|
+
|
193
268
|
if args.analyse:
|
194
269
|
RE_analyse(args.binary)
|
195
270
|
|
@@ -197,6 +272,12 @@ if __name__ == '__main__':
|
|
197
272
|
embeddings = RE_embeddings(args.binary)
|
198
273
|
print_json(data=embeddings)
|
199
274
|
|
275
|
+
elif args.summary:
|
276
|
+
# Arithetic mean of symbol embeddings
|
277
|
+
embeddings = RE_embeddings(args.binary)
|
278
|
+
b_embed = mean(vstack(list(map(lambda x: array(x['embedding']), embeddings))), axis=0)
|
279
|
+
print_json(data=b_embed.tolist())
|
280
|
+
|
200
281
|
elif args.ann:
|
201
282
|
source = None
|
202
283
|
# parse embedding json file
|
@@ -207,15 +288,32 @@ if __name__ == '__main__':
|
|
207
288
|
|
208
289
|
embedding = json.loads(open(args.embedding, 'r').read())
|
209
290
|
|
291
|
+
# check for valid regex
|
292
|
+
if args.collections:
|
293
|
+
try:
|
294
|
+
re.compile(args.collections)
|
295
|
+
except re.error as e:
|
296
|
+
print(f"[!] Error, invalid regex for collections - {args.collections}")
|
297
|
+
exit(-1)
|
298
|
+
|
210
299
|
if args.found_in:
|
211
300
|
if not os.path.isfile(args.found_in):
|
212
301
|
print("[!] Error, --found-in flag requires a path to a binary to search from")
|
213
302
|
exit(-1)
|
214
303
|
print(f"[+] Searching for symbols similar to embedding in binary {args.found_in}")
|
215
|
-
|
304
|
+
embeddings = RE_embeddings(args.found_in)
|
305
|
+
res = RE_compute_distance(embedding, embeddings, int(args.nns))
|
306
|
+
print_json(data=res)
|
307
|
+
elif args.from_file:
|
308
|
+
if not os.path.isfile(args.from_file):
|
309
|
+
print("[!] Error, --from-file flag requires a path to a JSON embeddings file")
|
310
|
+
exit(-1)
|
311
|
+
print(f"[+] Searching for symbols similar to embedding in binary {args.from_file}")
|
312
|
+
res = RE_compute_distance(embedding, json.load(open(args.from_file, "r")), int(args.nns))
|
216
313
|
print_json(data=res)
|
217
314
|
else:
|
218
|
-
|
315
|
+
print(f"[+] Searching for similar symbols to embedding in {'all' if not args.collections else args.collections} collections.")
|
316
|
+
RE_nearest_symbols(embedding, int(args.nns), collections=args.collections)
|
219
317
|
|
220
318
|
elif args.logs:
|
221
319
|
RE_logs(args.binary)
|
@@ -223,7 +321,16 @@ if __name__ == '__main__':
|
|
223
321
|
elif args.delete:
|
224
322
|
RE_delete(args.binary)
|
225
323
|
|
324
|
+
elif args.cves:
|
325
|
+
RE_cves(args.binary)
|
326
|
+
elif args.signature:
|
327
|
+
print(f"[!] Error, feature not available yet")
|
328
|
+
exit(-1)
|
329
|
+
|
226
330
|
else:
|
227
|
-
print("[!] Error, please
|
331
|
+
print("[!] Error, please supply an action command")
|
228
332
|
parser.print_help()
|
229
333
|
|
334
|
+
|
335
|
+
if __name__ == '__main__':
|
336
|
+
main()
|
@@ -1,7 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: reait
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.15
|
4
4
|
Home-page: https://github.com/RevEng-AI/reait
|
5
|
+
Author: James Patrick-Evans
|
5
6
|
Project-URL: Homepage, https://github.com/RevEng-AI/reait
|
6
7
|
Project-URL: Bug Tracker, https://github.com/RevEng-AI/reait/issues
|
7
8
|
Project-URL: Organisation Homepage, https://reveng.ai
|
@@ -22,9 +23,10 @@ Requires-Dist: pandas
|
|
22
23
|
Requires-Dist: numpy
|
23
24
|
|
24
25
|
# reait
|
25
|
-
RevEng.AI Toolkit
|
26
26
|
|
27
|
-
|
27
|
+
## <ins>R</ins>ev<ins>E</ins>ng.<ins>AI</ins> <ins>T</ins>oolkit
|
28
|
+
|
29
|
+
Analyse compiled executable binaries using the RevEng.AI API. This tool allows you to search for similar components across different compiled executable programs, identify known vulnerabilities in stripped executables, and generate "YARA-like" AI signatures for entire binary files. More details about the API can be found at [docs.reveng.ai](https://docs.reveng.ai).
|
28
30
|
|
29
31
|
NB: We are in Alpha. We support GNU/Linux ELF and Windows PE executables for x86_64, and focus our support for x86_64 Linux ELF executables.
|
30
32
|
|
@@ -57,20 +59,52 @@ Once an analysis is complete, you may access RevEng.AI's BinNet embeddings for a
|
|
57
59
|
`reait -b /usr/bin/true -x | jq ".[] | select(.vaddr==$((0x19f0))).embedding" > embedding.json`
|
58
60
|
|
59
61
|
|
60
|
-
### Search for similar symbols
|
61
|
-
To query our database of similar symbols based on an embedding, use `-n` to search using Approximate Nearest Neighbours. The `--nns` allows you to specify the number of results returned. A list of
|
62
|
+
### Search for similar symbols using an embedding
|
63
|
+
To query our database of similar symbols based on an embedding, use `-n` to search using Approximate Nearest Neighbours. The `--nns` allows you to specify the number of results returned. A list of symbols with their names, distance (similarity), RevEng.AI collection set, source code filename, source code line number, and file creation timestamp is returned.
|
62
64
|
|
63
65
|
`reait -e embedding.json -n`
|
64
66
|
|
65
67
|
NB: A smaller distance indicates a higher degree of similarity.
|
66
68
|
|
67
|
-
####
|
68
|
-
To search for the most similar symbols found in a
|
69
|
+
#### Specific Search
|
70
|
+
To search for the most similar symbols found in a specific binary, use the `--found-in` option with a path to the executable to search from.
|
69
71
|
|
70
72
|
`reait -n --embedding /tmp/sha256_init.json --found-in ~/malware.exe --nns 5`
|
71
73
|
|
72
74
|
This downloads embeddings from `malware.exe` and computes the cosine similarity between all symbols and `sha256_init.json`. The returned results lists the most similar symbol locations by cosine similarity score (1.0 most similar, -1.0 dissimilar).
|
73
75
|
|
76
|
+
The `--from-file` option may also be used to limit the search to a custom file containing a JSON list of embeddings.
|
77
|
+
|
78
|
+
|
79
|
+
#### Limited Search
|
80
|
+
To search for most similar symbols from a set of RevEng.AI collections, use the `--collections` options with a RegEx to match collection names. For example:
|
81
|
+
|
82
|
+
`reait -n --embedding my_func.json --collections "(libc.*|lib.*crypt.*)"`
|
83
|
+
|
84
|
+
RevEng.AI collections are sets of pre-analysed executable objects. To create custom collection sets e.g., malware collections, please create a RevEng.AI account.
|
85
|
+
|
86
|
+
### RevEng.AI embedding models
|
87
|
+
To use specific RevEng.AI AI models, or for training custom models, use `-m` to specify the model. The default option is to use the latest development model. Available models are `binnet-0.1` and `dexter`.
|
88
|
+
|
89
|
+
`reait -b /usr/bin/true -m dexter -a`
|
90
|
+
|
91
|
+
### Software Composition Analysis
|
92
|
+
To identify known open source software components embedded inside a binary, use the `-C` flag.
|
93
|
+
|
94
|
+
#### Stripped Binary CVE Checker
|
95
|
+
To check for known vulnerabilities found with embedded software components, use `-c` or `--cves`.
|
96
|
+
|
97
|
+
|
98
|
+
### RevEng.AI Binary Signature
|
99
|
+
To generate an AI functional description of an entire binary file, use the `-S` flag. NB: Under development.
|
100
|
+
|
101
|
+
|
102
|
+
### Binary embedding
|
103
|
+
Produce a dumb fingerprint for the whole binary by calculating the arithmetic mean of all symbol embeddings.
|
104
|
+
|
105
|
+
`reait -b /usr/bin/true -s`
|
106
|
+
|
107
|
+
|
74
108
|
|
75
109
|
## Configuration
|
76
110
|
|
@@ -79,6 +113,7 @@ This downloads embeddings from `malware.exe` and computes the cosine similarity
|
|
79
113
|
```
|
80
114
|
apikey = "l1br3"
|
81
115
|
host = "https://api.reveng.ai"
|
116
|
+
model = "binnet-0.1"
|
82
117
|
```
|
83
118
|
|
84
119
|
## Contact
|
@@ -0,0 +1,9 @@
|
|
1
|
+
reait/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
reait/__main__.py,sha256=HeD5Pg7_fWaHH7JZ241t3bod_8Gs0CfihuwU3-qOloc,12768
|
3
|
+
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
reait-0.0.15.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
5
|
+
reait-0.0.15.dist-info/METADATA,sha256=7rwUG5WAvUFhEBPrOgVul7ci0kVWaPWd82npjcVknOQ,5261
|
6
|
+
reait-0.0.15.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
7
|
+
reait-0.0.15.dist-info/entry_points.txt,sha256=OgGvIbjDbTIqotTSK2Gi1D_CJIVWzOjs1Hl3T-za-uE,46
|
8
|
+
reait-0.0.15.dist-info/top_level.txt,sha256=AXxC4J_UsjYtyxWQSPi9r4ygoKIXz6u94cW2bdgw714,12
|
9
|
+
reait-0.0.15.dist-info/RECORD,,
|
tests/__init__.py
ADDED
File without changes
|
reait-0.0.13.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
reait-0.0.13.data/scripts/reait,sha256=1LLJ0T4oeUFJtcSy-Ocxifuy3KSPbKH8YwLdWoZVC8c,7830
|
2
|
-
reait-0.0.13.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
3
|
-
reait-0.0.13.dist-info/METADATA,sha256=e8S2y484-mC0nEXphurKRqoN1Yj1K9_xJyu0MPhZbQk,3575
|
4
|
-
reait-0.0.13.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
5
|
-
reait-0.0.13.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
6
|
-
reait-0.0.13.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
|
File without changes
|