reait 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +3 -0
- reait/api.py +429 -174
- reait/main.py +246 -130
- {reait-0.0.19.dist-info → reait-0.0.20.dist-info}/METADATA +43 -21
- reait-0.0.20.dist-info/RECORD +9 -0
- {reait-0.0.19.dist-info → reait-0.0.20.dist-info}/WHEEL +1 -1
- reait-0.0.19.dist-info/RECORD +0 -9
- {reait-0.0.19.dist-info → reait-0.0.20.dist-info}/LICENSE +0 -0
- {reait-0.0.19.dist-info → reait-0.0.20.dist-info}/entry_points.txt +0 -0
- {reait-0.0.19.dist-info → reait-0.0.20.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,307 +1,561 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
from __future__ import print_function, annotations
|
4
|
+
|
3
5
|
from hashlib import sha256
|
4
|
-
|
6
|
+
|
5
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import argparse
|
8
|
+
from os.path import basename, exists, expanduser
|
9
|
+
from requests import request, Response, HTTPError
|
9
10
|
import requests
|
10
|
-
from numpy import array, vstack,
|
11
|
+
from numpy import array, vstack, dot, arccos, pi
|
11
12
|
from pandas import DataFrame
|
12
13
|
import json
|
13
14
|
import tomli
|
14
|
-
|
15
|
-
from
|
16
|
-
from IPython import embed
|
17
|
-
import lief
|
15
|
+
import logging
|
16
|
+
from lief import parse, ELF, PE, MachO
|
18
17
|
|
19
|
-
__version__ = "0.0.19"
|
20
18
|
|
21
19
|
re_conf = {
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
"apikey": "l1br3",
|
21
|
+
"host": "https://api.reveng.ai",
|
22
|
+
"model": "binnet-0.2-x86"
|
25
23
|
}
|
26
24
|
|
27
|
-
|
25
|
+
|
26
|
+
logger = logging.getLogger("REAIT")
|
27
|
+
|
28
|
+
|
29
|
+
def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, params=None,
|
30
|
+
json_data: dict = None, timeout: int = 30) -> Response:
|
31
|
+
"""
|
32
|
+
Constructs and sends a Request
|
33
|
+
:param r: Method for the new Request
|
34
|
+
:param end_point: Endpoint to add to the base URL
|
35
|
+
:param ex_headers: Extended HTTP headers to add
|
36
|
+
:param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
|
37
|
+
:param params: Dictionary, list of tuples or bytes to send in the query string for the query string
|
38
|
+
:param json_data: A JSON serializable Python object to send in the body
|
39
|
+
:param timeout: Number of seconds to stop waiting for a Response
|
40
|
+
"""
|
28
41
|
url = f"{re_conf['host']}/{end_point}"
|
29
|
-
headers = {
|
42
|
+
headers = {"Authorization": re_conf["apikey"]}
|
43
|
+
|
30
44
|
if ex_headers:
|
31
45
|
headers.update(ex_headers)
|
32
|
-
return r(url, headers=headers, data=data, params=params)
|
33
46
|
|
47
|
+
logger.debug("Making request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s",
|
48
|
+
url, headers, data, json_data, params)
|
49
|
+
|
50
|
+
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
|
51
|
+
|
52
|
+
logger.debug("Making response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
53
|
+
url, response.headers, response.status_code, response.text)
|
54
|
+
|
55
|
+
return response
|
56
|
+
|
57
|
+
|
58
|
+
def re_hash_check(bin_id: str) -> bool:
|
59
|
+
status = False
|
60
|
+
res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
|
34
61
|
|
35
|
-
def RE_delete(fpath: str, model_name: str):
|
36
|
-
"""
|
37
|
-
Delete analysis results for Binary ID in command
|
38
|
-
"""
|
39
|
-
bin_id = binary_id(fpath)
|
40
|
-
params = { 'model_name': model_name }
|
41
|
-
res = reveng_req(requests.delete, f"/analyse/{bin_id}", params=params)
|
42
62
|
if res.status_code == 200:
|
43
|
-
|
44
|
-
|
45
|
-
|
63
|
+
binaries_data = res.json()["binaries"]
|
64
|
+
status = len(binaries_data) > 0
|
65
|
+
elif res.status_code == 400:
|
66
|
+
logger.warning("Bad Request: %s", res.text)
|
46
67
|
else:
|
47
|
-
|
48
|
-
|
68
|
+
logger.error("Internal Server Error.")
|
69
|
+
|
70
|
+
res.raise_for_status()
|
71
|
+
return status
|
72
|
+
|
49
73
|
|
74
|
+
# Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
|
75
|
+
# Assumes a file has been passed, correct hash only
|
76
|
+
# Returns the BID of the binary_id (hash)
|
77
|
+
def re_bid_search(bin_id: str) -> int:
|
78
|
+
res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All")
|
79
|
+
|
80
|
+
bid = -1
|
81
|
+
|
82
|
+
# Valid request
|
83
|
+
if res.status_code == 200:
|
84
|
+
# Check only one record is returned
|
85
|
+
binaries_data = res.json()["binaries"]
|
86
|
+
|
87
|
+
if len(binaries_data) > 1:
|
88
|
+
logger.info("%d matches found for hash: %s.", len(binaries_data), bin_id)
|
89
|
+
|
90
|
+
if len(binaries_data) > 1:
|
91
|
+
options_dict = {}
|
92
|
+
|
93
|
+
for idx, binary in enumerate(binaries_data):
|
94
|
+
logger.info("[%d] - ID: {}, Name: %s, Creation: %s, Model: %s, Owner: %s, Status: %s",
|
95
|
+
idx, binary["binary_id"], binary["binary_name"], binary["creation"],
|
96
|
+
binary["model_name"], binary["owner"], binary["status"])
|
97
|
+
|
98
|
+
options_dict[idx] = binary["binary_id"]
|
99
|
+
|
100
|
+
user_input = input("[+] Please enter the option you want to use for this operation:")
|
101
|
+
|
102
|
+
try:
|
103
|
+
option_number = int(user_input)
|
104
|
+
|
105
|
+
bid = options_dict.get(option_number, -1)
|
106
|
+
|
107
|
+
if bid == -1:
|
108
|
+
logger.warning("Invalid option.")
|
109
|
+
except Exception:
|
110
|
+
bid = -1
|
111
|
+
logger.warning("Invalid option.")
|
112
|
+
# Only 1 match found
|
113
|
+
elif len(binaries_data) == 1:
|
114
|
+
binary = binaries_data[0]
|
115
|
+
bid = binary["binary_id"]
|
116
|
+
else:
|
117
|
+
logger.warning("No matches found for hash: %s.", bin_id)
|
118
|
+
elif len(binaries_data) == 1:
|
119
|
+
binary = binaries_data[0]
|
120
|
+
bid = binary["binary_id"]
|
121
|
+
|
122
|
+
logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
|
123
|
+
"Creation: %s, Model: %s, Owner: %s, Status: %s",
|
124
|
+
bid, binary["binary_name"], binary["creation"],
|
125
|
+
binary["model_name"], binary["owner"], binary["status"])
|
126
|
+
else:
|
127
|
+
logger.warning("No matches found for hash: %s.", bin_id)
|
128
|
+
elif res.status_code == 400:
|
129
|
+
logger.warning("Bad Request: %s", res.text)
|
130
|
+
else:
|
131
|
+
logger.error("Internal Server Error.")
|
50
132
|
|
51
|
-
|
133
|
+
res.raise_for_status()
|
134
|
+
return bid
|
135
|
+
|
136
|
+
|
137
|
+
def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
52
138
|
"""
|
53
|
-
|
139
|
+
Delete analysis results for Binary ID in command
|
140
|
+
:param fpath: File path for binary to analyse
|
141
|
+
:param binary_id: ID of binary
|
54
142
|
"""
|
55
|
-
|
56
|
-
|
57
|
-
|
143
|
+
bin_id = re_binary_id(fpath)
|
144
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
145
|
+
|
146
|
+
if bid == -1:
|
147
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
148
|
+
|
149
|
+
res = reveng_req(requests.delete, f"analyse/{bid}")
|
150
|
+
|
151
|
+
if res.status_code == 200:
|
152
|
+
logger.info("Securely deleted %s analysis.", bin_id)
|
153
|
+
elif res.status_code == 404:
|
154
|
+
logger.warning("Error analysis not found for %s.", bin_id)
|
155
|
+
else:
|
156
|
+
logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
|
157
|
+
|
158
|
+
res.raise_for_status()
|
159
|
+
return res
|
160
|
+
|
161
|
+
|
162
|
+
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, platform_options: str = None,
|
163
|
+
file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None,
|
164
|
+
scope: str = None, tags: list = None, priority: int = 0,
|
165
|
+
duplicate: bool = False, symbols: dict = None) -> Response:
|
166
|
+
"""
|
167
|
+
Start analysis job for binary file
|
168
|
+
:param fpath: File path for binary to analyse
|
169
|
+
:param model_name: Binary model name
|
170
|
+
:param isa_options: Executable ISA
|
171
|
+
:param file_options: File options
|
172
|
+
:param platform_options: OS platform
|
173
|
+
:param dynamic_execution: Enable dynamic execution in sandbox during analysis
|
174
|
+
:param command_line_args: Command line arguments to pass when running binary sample in the sandbox
|
175
|
+
:param scope: Analysis visibility
|
176
|
+
:param tags: Assign tags to an analysis
|
177
|
+
:param priority: Priority to processing queue
|
178
|
+
:param duplicate: Duplicate an existing binary
|
179
|
+
:param symbols: List of functions
|
180
|
+
"""
|
181
|
+
bin_id = re_binary_id(fpath)
|
182
|
+
result = re_hash_check(bin_id)
|
183
|
+
|
184
|
+
if result and duplicate is False:
|
185
|
+
logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
186
|
+
bin_id)
|
187
|
+
raise HTTPError(f"Duplicate analysis for hash: {bin_id}")
|
188
|
+
|
189
|
+
filename = basename(fpath)
|
190
|
+
|
191
|
+
params = {"file_name": filename, "sha_256_hash": bin_id}
|
192
|
+
|
193
|
+
for p_name in ("model_name", "isa_options", "platform_options", "file_options",
|
194
|
+
"dynamic_execution", "command_line_args", "scope", "tags", "priority", "symbols"):
|
58
195
|
p_value = locals()[p_name]
|
196
|
+
|
59
197
|
if p_value:
|
60
198
|
params[p_name] = p_value
|
61
199
|
|
62
|
-
res = reveng_req(requests.post, f"analyse",
|
200
|
+
res = reveng_req(requests.post, f"analyse", json_data=params)
|
201
|
+
|
63
202
|
if res.status_code == 200:
|
64
|
-
|
65
|
-
|
66
|
-
|
203
|
+
logger.info("Successfully submitted binary for analysis. %s - %s", fpath, re_binary_id(fpath))
|
204
|
+
elif res.status_code == 400:
|
205
|
+
response = res.json()
|
67
206
|
|
68
|
-
|
69
|
-
|
70
|
-
if 'error' in response.keys():
|
71
|
-
print(f"[-] Error analysing {fpath} - {response['error']}. Please check the results log file for {binary_id(fpath)}")
|
72
|
-
return res
|
207
|
+
if "error" in response.keys():
|
208
|
+
logger.warning("Error analysing %s - %s", fpath, response["error"])
|
73
209
|
|
74
210
|
res.raise_for_status()
|
211
|
+
return res
|
75
212
|
|
76
213
|
|
77
|
-
def RE_upload(fpath: str):
|
214
|
+
def RE_upload(fpath: str) -> Response | bool:
|
78
215
|
"""
|
79
|
-
|
216
|
+
Upload binary to Server
|
217
|
+
:param fpath: File path for binary to analyse
|
80
218
|
"""
|
81
|
-
|
219
|
+
bin_id = re_binary_id(fpath)
|
220
|
+
result = re_hash_check(bin_id)
|
221
|
+
|
222
|
+
if result:
|
223
|
+
logger.info("File %s - %s already exists. Skipping upload...", basename(fpath), re_binary_id(fpath))
|
224
|
+
return True
|
225
|
+
|
226
|
+
res = reveng_req(requests.post, f"upload", data=open(fpath, "rb").read())
|
227
|
+
|
82
228
|
if res.status_code == 200:
|
83
|
-
|
84
|
-
|
85
|
-
|
229
|
+
logger.info("Successfully uploaded binary to your account. %s - %s", fpath, re_binary_id(fpath))
|
230
|
+
elif res.status_code == 400:
|
231
|
+
response = res.json()
|
86
232
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
233
|
+
if "error" in response.keys():
|
234
|
+
logger.warning("Error uploading %s - %s", fpath, response["error"])
|
235
|
+
elif res.status_code == 413:
|
236
|
+
logger.warning("File too large. Please upload files under 100MB.")
|
237
|
+
elif res.status_code == 500:
|
238
|
+
logger.error("Internal Server Error. Please contact support. Skipping upload...")
|
91
239
|
|
92
240
|
res.raise_for_status()
|
241
|
+
return res
|
93
242
|
|
94
243
|
|
95
|
-
def RE_embeddings(fpath: str,
|
244
|
+
def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
96
245
|
"""
|
97
|
-
|
246
|
+
Fetch symbol embeddings
|
247
|
+
:param fpath: File path for binary to analyse
|
248
|
+
:param binary_id: ID of binary
|
98
249
|
"""
|
99
|
-
|
100
|
-
|
250
|
+
bin_id = re_binary_id(fpath)
|
251
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
252
|
+
|
253
|
+
if bid == -1:
|
254
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
255
|
+
|
256
|
+
res = reveng_req(requests.get, f"embeddings/{bid}")
|
257
|
+
|
101
258
|
if res.status_code == 400:
|
102
|
-
|
259
|
+
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
260
|
+
bin_id)
|
103
261
|
|
104
262
|
res.raise_for_status()
|
105
|
-
return res
|
263
|
+
return res
|
106
264
|
|
107
265
|
|
108
|
-
def RE_signature(fpath: str,
|
266
|
+
def RE_signature(fpath: str, binary_id: int = 0) -> Response:
|
109
267
|
"""
|
110
|
-
|
268
|
+
Fetch binary BinNet signature
|
269
|
+
:param fpath: File path for binary to analyse
|
270
|
+
:param binary_id: ID of binary
|
111
271
|
"""
|
112
|
-
|
113
|
-
|
272
|
+
bin_id = re_binary_id(fpath)
|
273
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
274
|
+
|
275
|
+
if bid == -1:
|
276
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
277
|
+
|
278
|
+
res = reveng_req(requests.get, f"signature/{bid}")
|
279
|
+
|
114
280
|
if res.status_code == 425:
|
115
|
-
|
281
|
+
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
282
|
+
bin_id)
|
116
283
|
|
117
284
|
res.raise_for_status()
|
118
|
-
return res
|
285
|
+
return res
|
119
286
|
|
120
287
|
|
121
|
-
def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
|
288
|
+
def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
|
289
|
+
model: str = None) -> Response:
|
122
290
|
"""
|
123
|
-
|
291
|
+
Fetch embedding for custom symbol range
|
292
|
+
:param fpath: File path for binary to analyse
|
293
|
+
:param start_vaddr: Start virtual address of the function to extract embeddings
|
294
|
+
:param end_vaddr: End virtual address of the function to extract embeddings
|
295
|
+
:param base_vaddr: Base address of the binary
|
296
|
+
:param model: Binary model name
|
124
297
|
"""
|
125
298
|
params = {}
|
126
299
|
|
127
300
|
if end_vaddr:
|
128
|
-
params[
|
301
|
+
params["end_vaddr"] = end_vaddr
|
129
302
|
if base_vaddr:
|
130
|
-
params[
|
303
|
+
params["base_vaddr"] = base_vaddr
|
131
304
|
if model:
|
132
|
-
params[
|
305
|
+
params["models"] = model
|
306
|
+
|
307
|
+
bin_id = re_binary_id(fpath)
|
308
|
+
|
309
|
+
res = reveng_req(requests.get, f"embedding/{bin_id}/{start_vaddr}", params=params)
|
133
310
|
|
134
|
-
res = reveng_req(requests.get, f"embedding/{binary_id(fpath)}/{start_vaddr}", params=params)
|
135
311
|
if res.status_code == 425:
|
136
|
-
|
137
|
-
|
312
|
+
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
313
|
+
bin_id)
|
138
314
|
|
139
315
|
res.raise_for_status()
|
140
|
-
return res
|
316
|
+
return res
|
141
317
|
|
142
318
|
|
143
|
-
def RE_logs(fpath: str,
|
319
|
+
def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
|
144
320
|
"""
|
145
|
-
|
321
|
+
Get the logs for an analysis associated to Binary ID in command
|
322
|
+
:param fpath: File path for binary to analyse
|
323
|
+
:param binary_id: ID of binary
|
324
|
+
:param console: Show response in console
|
146
325
|
"""
|
147
|
-
bin_id =
|
148
|
-
|
149
|
-
|
150
|
-
if
|
151
|
-
|
152
|
-
|
326
|
+
bin_id = re_binary_id(fpath)
|
327
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
328
|
+
|
329
|
+
if bid == -1:
|
330
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
331
|
+
|
332
|
+
res = reveng_req(requests.get, f"logs/{bid}")
|
333
|
+
|
334
|
+
if res.status_code == 200 and console:
|
335
|
+
logger.info("Logs found for %s:\n%s", bin_id, res.text)
|
153
336
|
elif res.status_code == 404:
|
154
|
-
|
155
|
-
return
|
337
|
+
logger.warning("Error, logs not found for %s.", bin_id)
|
156
338
|
|
157
339
|
res.raise_for_status()
|
340
|
+
return res
|
158
341
|
|
159
342
|
|
160
|
-
def RE_cves(fpath: str,
|
343
|
+
def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
161
344
|
"""
|
162
|
-
|
345
|
+
Check for known CVEs in Binary
|
346
|
+
:param fpath: File path for binary to analyse
|
347
|
+
:param binary_id: ID of binary
|
163
348
|
"""
|
164
|
-
bin_id =
|
165
|
-
|
166
|
-
|
349
|
+
bin_id = re_binary_id(fpath)
|
350
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
351
|
+
|
352
|
+
if bid == -1:
|
353
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
354
|
+
|
355
|
+
res = reveng_req(requests.get, f"cves/{bid}")
|
356
|
+
|
167
357
|
if res.status_code == 200:
|
168
358
|
cves = json.loads(res.text)
|
169
|
-
|
359
|
+
logger.info("Checking for known CVEs embedded inside %s", fpath)
|
360
|
+
|
170
361
|
if len(cves) == 0:
|
171
|
-
|
362
|
+
logger.info("0 CVEs found.")
|
172
363
|
else:
|
173
|
-
|
174
|
-
print_json(data=cves)
|
175
|
-
return
|
364
|
+
logger.warning("Warning CVEs found!\n%s", res.text)
|
176
365
|
elif res.status_code == 404:
|
177
|
-
|
178
|
-
return
|
366
|
+
logger.warning("Error, binary analysis not found for %s.", bin_id)
|
179
367
|
|
180
368
|
res.raise_for_status()
|
369
|
+
return res
|
370
|
+
|
181
371
|
|
182
|
-
def RE_status(fpath: str,
|
372
|
+
def RE_status(fpath: str, binary_id: int = 0) -> Response:
|
183
373
|
"""
|
184
|
-
|
374
|
+
Get the status of an ongoing binary analysis
|
375
|
+
:param fpath: File path for binary to analyse
|
376
|
+
:param binary_id: ID of binary
|
185
377
|
"""
|
186
|
-
bin_id =
|
187
|
-
|
188
|
-
|
189
|
-
if
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
378
|
+
bin_id = re_binary_id(fpath)
|
379
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
380
|
+
|
381
|
+
if bid == -1:
|
382
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
383
|
+
|
384
|
+
res = reveng_req(requests.get, f"analyse/status/{bid}")
|
385
|
+
|
386
|
+
if res.status_code == 400:
|
387
|
+
logger.warning(" Error, status not found for %s.", bin_id)
|
194
388
|
|
195
389
|
res.raise_for_status()
|
390
|
+
return res
|
196
391
|
|
197
392
|
|
198
|
-
def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
|
393
|
+
def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list:
|
199
394
|
"""
|
200
|
-
|
395
|
+
Compute the cosine distance between source embedding and embedding from binary
|
396
|
+
:param embedding: Embedding vector as python list
|
397
|
+
:param embeddings: Symbol embeddings
|
398
|
+
:param nns: Number of nearest neighbors
|
201
399
|
"""
|
202
400
|
df = DataFrame(data=embeddings)
|
203
401
|
np_embedding = array(embedding).reshape(1, -1)
|
204
|
-
source_embeddings = vstack(df[
|
402
|
+
source_embeddings = vstack(df["embedding"].values)
|
205
403
|
closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
|
206
404
|
distances = cosine_similarity(source_embeddings[closest], np_embedding)
|
405
|
+
|
207
406
|
# match closest embeddings with similarity
|
208
407
|
closest_df = df.iloc[closest]
|
408
|
+
|
209
409
|
# create json similarity object
|
210
410
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
211
|
-
json_sims = [{
|
411
|
+
json_sims = [{"similaritiy": float(d[0]), "vaddr": int(df.iloc[v]["vaddr"]), "name": str(df.iloc[v]["name"]),
|
412
|
+
"size": int(df.iloc[v]["size"])} for d, v in similarities]
|
212
413
|
return json_sims
|
213
414
|
|
214
415
|
|
215
|
-
def RE_nearest_symbols(embedding: list, model_name, nns: int = 5,
|
416
|
+
def RE_nearest_symbols(embedding: list, model_name: str, nns: int = 5,
|
417
|
+
collections: list = None, ignore_hashes: list = None,
|
418
|
+
distance: float = 0.0, debug_enabled: bool = False) -> Response:
|
216
419
|
"""
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
420
|
+
Get function name suggestions for an embedding
|
421
|
+
:param embedding: Embedding vector as python list
|
422
|
+
:param model_name: Binary model name
|
423
|
+
:param nns: Number of nearest neighbors
|
424
|
+
:param collections: List of collections RevEng.AI collection names to search through
|
425
|
+
:param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
|
426
|
+
:param distance: How close we want the ANN search to filter for
|
427
|
+
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
221
428
|
"""
|
222
|
-
params={
|
429
|
+
params = {"nns": nns, "model_name": model_name, "debug_enabled": debug_enabled}
|
430
|
+
|
431
|
+
if collections and len(collections) > 0:
|
432
|
+
# api param is collection, not collections
|
433
|
+
params["collection"] = "|".join(collections)
|
434
|
+
|
435
|
+
if ignore_hashes and len(ignore_hashes) > 0:
|
436
|
+
params["ignore_hashes"] = ignore_hashes
|
223
437
|
|
224
|
-
if
|
225
|
-
params[
|
438
|
+
if distance > 0.0:
|
439
|
+
params["distance"] = distance
|
226
440
|
|
227
441
|
res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
|
442
|
+
|
228
443
|
res.raise_for_status()
|
229
|
-
|
230
|
-
print_json(data=f_suggestions)
|
444
|
+
return res
|
231
445
|
|
232
446
|
|
233
|
-
def RE_nearest_binaries(embedding: list, model_name, nns: int = 5,
|
447
|
+
def RE_nearest_binaries(embedding: list, model_name: str, nns: int = 5,
|
448
|
+
collections: list = None, ignore_hashes: list = None) -> Response:
|
234
449
|
"""
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
450
|
+
Get executable suggestions for a binary embedding
|
451
|
+
:param embedding: Embedding vector as python list
|
452
|
+
:param model_name: Binary model name
|
453
|
+
:param nns: Number of nearest neighbors
|
454
|
+
:param collections: List of collections RevEng.AI collection names to search through
|
455
|
+
:param ignore_hashes: List[str] SHA-256 hash of binary files to ignore symbols from (usually the current binary)
|
239
456
|
"""
|
240
|
-
params={
|
457
|
+
params = {"nns": nns, "model_name": model_name}
|
458
|
+
|
459
|
+
if collections and len(collections) > 0:
|
460
|
+
# api param is collection, not collections
|
461
|
+
params["collection"] = "|".join(collections)
|
241
462
|
|
242
|
-
if
|
243
|
-
params[
|
463
|
+
if ignore_hashes and len(ignore_hashes) > 0:
|
464
|
+
params["ignore_hashes"] = ignore_hashes
|
244
465
|
|
245
466
|
res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
|
467
|
+
|
246
468
|
res.raise_for_status()
|
247
|
-
|
248
|
-
print_json(data=f_suggestions)
|
469
|
+
return res
|
249
470
|
|
250
471
|
|
251
|
-
def RE_SBOM(fpath: str,
|
472
|
+
def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
252
473
|
"""
|
253
|
-
|
254
|
-
|
255
|
-
|
474
|
+
Get Software Bill Of Materials for binary
|
475
|
+
:param fpath: File path for binary to analyse
|
476
|
+
:param binary_id: ID of binary
|
256
477
|
"""
|
257
|
-
|
478
|
+
bin_id = re_binary_id(fpath)
|
479
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
480
|
+
|
481
|
+
if bid == -1:
|
482
|
+
raise HTTPError(f"No matches found for hash: {bin_id}")
|
483
|
+
|
484
|
+
res = reveng_req(requests.get, f"sboms/{bid}")
|
485
|
+
|
486
|
+
logger.info("SBOM for %s:\n%s", fpath, res.text)
|
258
487
|
|
259
|
-
res = reveng_req(requests.get, f"sboms/{binary_id(fpath)}", params=params)
|
260
488
|
res.raise_for_status()
|
261
|
-
|
262
|
-
print_json(data=sbom)
|
489
|
+
return res
|
263
490
|
|
264
491
|
|
265
|
-
def
|
266
|
-
"""
|
492
|
+
def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
493
|
+
"""
|
494
|
+
Send the new name of a function to C2
|
495
|
+
:param function_id: ID of a function
|
496
|
+
:param new_name: New function name
|
497
|
+
"""
|
498
|
+
res = reveng_req(requests.post, f"functions/rename/{function_id}", json_data={"new_name": new_name})
|
499
|
+
|
500
|
+
if res.status_code == 200:
|
501
|
+
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
502
|
+
else:
|
503
|
+
logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
|
504
|
+
|
505
|
+
res.raise_for_status()
|
506
|
+
return res
|
507
|
+
|
508
|
+
|
509
|
+
def re_binary_id(fpath: str) -> str:
|
510
|
+
"""
|
511
|
+
Take the SHA-256 hash of binary file
|
512
|
+
:param fpath: File path for binary to analyse
|
513
|
+
"""
|
514
|
+
if not fpath or not exists(fpath):
|
515
|
+
return "undefined"
|
516
|
+
|
267
517
|
hf = sha256()
|
268
|
-
|
518
|
+
|
519
|
+
with open(fpath, "rb") as f:
|
269
520
|
c = f.read()
|
270
521
|
hf.update(c)
|
522
|
+
|
271
523
|
return hf.hexdigest()
|
272
524
|
|
273
525
|
|
274
|
-
def _binary_isa(lief_hdlr, exec_type):
|
526
|
+
def _binary_isa(lief_hdlr, exec_type: str) -> str:
|
275
527
|
"""
|
276
|
-
|
528
|
+
Get ISA format
|
277
529
|
"""
|
278
530
|
if exec_type == "elf":
|
279
531
|
machine_type = lief_hdlr.header.machine_type
|
280
|
-
|
532
|
+
|
533
|
+
if machine_type == ELF.ARCH.i386:
|
281
534
|
return "x86"
|
282
|
-
elif machine_type ==
|
535
|
+
elif machine_type == ELF.ARCH.x86_64:
|
283
536
|
return "x86_64"
|
284
|
-
|
285
537
|
elif exec_type == "pe":
|
286
538
|
machine_type = lief_hdlr.header.machine
|
287
|
-
|
539
|
+
|
540
|
+
if machine_type == PE.MACHINE_TYPES.I386:
|
288
541
|
return "x86"
|
289
|
-
elif machine_type ==
|
542
|
+
elif machine_type == PE.MACHINE_TYPES.AMD64:
|
290
543
|
return "x86_64"
|
291
|
-
|
292
544
|
elif exec_type == "macho":
|
293
545
|
machine_type = lief_hdlr.header.cpu_type
|
294
|
-
|
546
|
+
|
547
|
+
if machine_type == MachO.CPU_TYPES.x86:
|
295
548
|
return "x86"
|
296
|
-
elif machine_type ==
|
549
|
+
elif machine_type == MachO.CPU_TYPES.x86_64:
|
297
550
|
return "x86_64"
|
298
|
-
|
299
|
-
raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}")
|
300
551
|
|
552
|
+
logger.error("Error, failed to determine or unsupported ISA for exec_type: %s.", exec_type)
|
553
|
+
raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}.")
|
301
554
|
|
302
|
-
|
555
|
+
|
556
|
+
def _binary_format(lief_hdlr) -> str:
|
303
557
|
"""
|
304
|
-
|
558
|
+
Get executable file format
|
305
559
|
"""
|
306
560
|
if lief_hdlr.format == lief_hdlr.format.PE:
|
307
561
|
return "pe"
|
@@ -309,41 +563,42 @@ def _binary_format(lief_hdlr):
|
|
309
563
|
return "elf"
|
310
564
|
if lief_hdlr.format == lief_hdlr.format.MACHO:
|
311
565
|
return "macho"
|
312
|
-
|
313
|
-
raise RuntimeError("Error, could not determine binary format")
|
314
566
|
|
567
|
+
logger.error("Error, could not determine binary format: %s.", lief_hdlr.format)
|
568
|
+
raise RuntimeError("Error, could not determine binary format.")
|
315
569
|
|
316
570
|
|
317
|
-
def file_type(fpath: str):
|
571
|
+
def file_type(fpath: str) -> tuple[str, str]:
|
318
572
|
"""
|
319
|
-
|
573
|
+
Determine ISA for binary
|
574
|
+
:param fpath: File path for binary to analyse
|
320
575
|
"""
|
321
|
-
binary =
|
576
|
+
binary = parse(fpath)
|
322
577
|
|
323
578
|
# handle PE and ELF files
|
324
579
|
file_format = _binary_format(binary)
|
325
|
-
isa
|
580
|
+
isa = _binary_isa(binary, file_format)
|
581
|
+
|
326
582
|
return file_format, isa
|
327
583
|
|
328
584
|
|
329
|
-
def parse_config():
|
585
|
+
def parse_config() -> None:
|
586
|
+
"""
|
587
|
+
Parse ~/.reait.toml config file
|
330
588
|
"""
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
return
|
589
|
+
if exists(expanduser("~/.reait.toml")):
|
590
|
+
with open(expanduser("~/.reait.toml"), "r") as file:
|
591
|
+
config = tomli.loads(file.read())
|
335
592
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
if key in config:
|
340
|
-
re_conf[key] = config[key]
|
593
|
+
for key in ("apikey", "host", "model"):
|
594
|
+
if key in config:
|
595
|
+
re_conf[key] = config[key]
|
341
596
|
|
342
597
|
|
343
|
-
def angular_distance(x, y):
|
598
|
+
def angular_distance(x, y) -> float:
|
344
599
|
"""
|
345
600
|
Compute angular distance between two embedding vectors
|
346
|
-
Normalised
|
601
|
+
Normalised euclidian distance
|
347
602
|
"""
|
348
603
|
cos = dot(x, y) / ((dot(x, x) * dot(y, y)) ** 0.5)
|
349
|
-
return 1.0 - arccos(cos)/pi
|
604
|
+
return 1.0 - arccos(cos) / pi
|