reait 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +2 -0
- reait/api.py +523 -218
- reait/main.py +265 -295
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/METADATA +40 -51
- reait-1.0.0.dist-info/RECORD +9 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/WHEEL +1 -1
- reait-0.0.19.dist-info/RECORD +0 -9
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/LICENSE +0 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/entry_points.txt +0 -0
- {reait-0.0.19.dist-info → reait-1.0.0.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,349 +1,654 @@
|
|
1
|
-
|
2
|
-
from __future__ import print_function
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
from __future__ import print_function, annotations
|
3
|
+
|
4
|
+
import json
|
5
|
+
import tomli
|
6
|
+
import logging
|
7
|
+
import requests
|
8
|
+
|
3
9
|
from hashlib import sha256
|
4
|
-
from
|
10
|
+
from datetime import datetime
|
11
|
+
|
5
12
|
from sklearn.metrics.pairwise import cosine_similarity
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import
|
9
|
-
import
|
10
|
-
from numpy import array, vstack, mean, average, dot, arccos, pi
|
13
|
+
from os import access, R_OK
|
14
|
+
from os.path import basename, isfile, expanduser, getsize
|
15
|
+
from requests import request, Response, HTTPError
|
16
|
+
from numpy import array, vstack, dot, arccos, pi
|
11
17
|
from pandas import DataFrame
|
12
|
-
import
|
13
|
-
import tomli
|
14
|
-
from os.path import isfile
|
15
|
-
from sys import exit
|
16
|
-
from IPython import embed
|
17
|
-
import lief
|
18
|
+
from lief import parse, Binary, ELF, PE, MachO
|
18
19
|
|
19
|
-
__version__ = "0.0
|
20
|
+
__version__ = "1.0.0"
|
20
21
|
|
21
22
|
re_conf = {
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
"apikey": "l1br3",
|
24
|
+
"host": "https://api.reveng.ai",
|
25
|
+
"model": "binnet-0.3-x86",
|
25
26
|
}
|
26
27
|
|
27
|
-
def reveng_req(r: requests.request, end_point: str, data=None, ex_headers: dict = None, params=None):
|
28
|
-
url = f"{re_conf['host']}/{end_point}"
|
29
|
-
headers = { "Authorization": f"{re_conf['apikey']}" }
|
30
|
-
if ex_headers:
|
31
|
-
headers.update(ex_headers)
|
32
|
-
return r(url, headers=headers, data=data, params=params)
|
33
28
|
|
29
|
+
logger = logging.getLogger("REAIT")
|
30
|
+
|
31
|
+
|
32
|
+
class ReaitError(HTTPError):
|
33
|
+
def __init__(self, reason: str, end_point: str = None):
|
34
|
+
response = Response()
|
35
|
+
|
36
|
+
response.reason = reason
|
37
|
+
response.status_code = 404
|
38
|
+
response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
|
39
|
+
response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
|
40
|
+
|
41
|
+
super().__init__(reason, response=response)
|
34
42
|
|
35
|
-
|
43
|
+
|
44
|
+
def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
|
45
|
+
params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
|
36
46
|
"""
|
37
|
-
|
47
|
+
Constructs and sends a Request
|
48
|
+
:param r: Method for the new Request
|
49
|
+
:param end_point: Endpoint to add to the base URL
|
50
|
+
:param ex_headers: Extended HTTP headers to add
|
51
|
+
:param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
|
52
|
+
:param params: Dictionary, list of tuples or bytes to send in the query string for the query string
|
53
|
+
:param json_data: A JSON serializable Python object to send in the body
|
54
|
+
:param timeout: Number of seconds to stop waiting for a Response
|
55
|
+
:param files: Dictionary of files to send to the specified URL
|
38
56
|
"""
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
57
|
+
url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
|
58
|
+
headers = {"Authorization": re_conf["apikey"]}
|
59
|
+
|
60
|
+
if ex_headers:
|
61
|
+
headers.update(ex_headers)
|
62
|
+
|
63
|
+
logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
|
64
|
+
r.__name__.upper(), url, headers, data, json_data, params, files)
|
65
|
+
|
66
|
+
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
|
67
|
+
|
68
|
+
logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
69
|
+
r.__name__.upper(), url, response.headers, response.status_code, response.text)
|
70
|
+
|
71
|
+
return response
|
72
|
+
|
73
|
+
|
74
|
+
def re_hash_check(bin_id: str) -> bool:
|
75
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
76
|
+
|
77
|
+
if res.ok:
|
78
|
+
return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
|
46
79
|
else:
|
47
|
-
|
48
|
-
return
|
80
|
+
logger.warning("Bad Request: %s", res.text)
|
49
81
|
|
82
|
+
return False
|
50
83
|
|
51
|
-
def RE_analyse(fpath: str, model: str = None, isa_options: str = None, platform_options: str = None, file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None, scope: str = None, tags: str = None):
|
52
|
-
"""
|
53
|
-
Start analysis job for binary file
|
54
|
-
"""
|
55
|
-
filename = os.path.basename(fpath)
|
56
|
-
params={ 'file_name': filename }
|
57
|
-
for p_name in ('model', 'isa_options', 'platform_options', 'file_options', 'dynamic_execution', 'command_line_args', 'scope', 'tags'):
|
58
|
-
p_value = locals()[p_name]
|
59
|
-
if p_value:
|
60
|
-
params[p_name] = p_value
|
61
84
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
85
|
+
# Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
|
86
|
+
# Assumes a file has been passed, correct hash only
|
87
|
+
# Returns the BID of the binary_id (hash)
|
88
|
+
def re_bid_search(bin_id: str) -> int:
|
89
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
67
90
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
91
|
+
bid = -1
|
92
|
+
|
93
|
+
if res.ok:
|
94
|
+
# Filter the result who matches the SHA-256
|
95
|
+
binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
|
96
|
+
|
97
|
+
# Check only one record is returned
|
98
|
+
if len(binaries) == 1:
|
99
|
+
binary = binaries[0]
|
100
|
+
bid = binary["binary_id"]
|
101
|
+
|
102
|
+
logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
103
|
+
bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
|
104
|
+
elif len(binaries) > 1:
|
105
|
+
binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
|
106
|
+
|
107
|
+
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
108
|
+
|
109
|
+
options_dict = {}
|
110
|
+
|
111
|
+
for idx, binary in enumerate(binaries):
|
112
|
+
logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
113
|
+
idx, binary["binary_id"], binary["binary_name"], binary["creation"],
|
114
|
+
binary["model_name"], binary["status"])
|
115
|
+
|
116
|
+
options_dict[idx] = binary["binary_id"]
|
117
|
+
|
118
|
+
try:
|
119
|
+
user_input = input("[+] Please enter the option you want to use for this operation:")
|
120
|
+
|
121
|
+
option_number = int(user_input)
|
122
|
+
|
123
|
+
bid = options_dict.get(option_number, -1)
|
124
|
+
|
125
|
+
if bid == -1:
|
126
|
+
logger.warning("Invalid option.")
|
127
|
+
except Exception:
|
128
|
+
bid = options_dict[0]
|
129
|
+
logger.warning("Select the most recent analysis - ID: %d", bid)
|
130
|
+
else:
|
131
|
+
logger.warning("No matches found for hash: %s", bin_id)
|
132
|
+
else:
|
133
|
+
logger.warning("Bad Request: %s", res.text)
|
73
134
|
|
74
135
|
res.raise_for_status()
|
136
|
+
return bid
|
75
137
|
|
76
138
|
|
77
|
-
def
|
139
|
+
def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
78
140
|
"""
|
79
|
-
|
141
|
+
Delete analysis results for Binary ID in command
|
142
|
+
:param fpath: File path for binary to analyse
|
143
|
+
:param binary_id: ID of binary
|
80
144
|
"""
|
81
|
-
|
82
|
-
if
|
83
|
-
print("[+] Successfully uploaded binary to your account.")
|
84
|
-
print(f"[+] {fpath} - {binary_id(fpath)}")
|
85
|
-
return res
|
145
|
+
bin_id = re_binary_id(fpath)
|
146
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
86
147
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
148
|
+
end_point = f"v1/analyse/{bid}"
|
149
|
+
|
150
|
+
if bid == -1:
|
151
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
152
|
+
|
153
|
+
res: Response = reveng_req(requests.delete, end_point)
|
154
|
+
|
155
|
+
if res.ok:
|
156
|
+
logger.info("Securely deleted analysis ID %s - %s.", bid, bin_id)
|
157
|
+
elif res.status_code == 404:
|
158
|
+
logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
|
159
|
+
else:
|
160
|
+
logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
|
91
161
|
|
92
162
|
res.raise_for_status()
|
163
|
+
return res
|
164
|
+
|
165
|
+
|
166
|
+
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
167
|
+
platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
|
168
|
+
command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
|
169
|
+
duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
|
170
|
+
"""
|
171
|
+
Start analysis job for binary file
|
172
|
+
:param fpath: File path for binary to analyse
|
173
|
+
:param model_name: Binary model name
|
174
|
+
:param isa_options: Executable ISA
|
175
|
+
:param file_options: File options
|
176
|
+
:param platform_options: OS platform
|
177
|
+
:param dynamic_execution: Enable dynamic execution in sandbox during analysis
|
178
|
+
:param command_line_args: Command line arguments to pass when running binary sample in the sandbox
|
179
|
+
:param binary_scope: Analysis visibility
|
180
|
+
:param tags: Assign tags to an analysis
|
181
|
+
:param priority: Priority to processing queue
|
182
|
+
:param duplicate: Duplicate an existing binary
|
183
|
+
:param symbols: JSON object containing the base address and the list of functions
|
184
|
+
:param debug_fpath: File path for debug file
|
185
|
+
"""
|
186
|
+
bin_id = re_binary_id(fpath)
|
187
|
+
result = re_hash_check(bin_id)
|
188
|
+
|
189
|
+
end_point = "v1/analyse/"
|
190
|
+
|
191
|
+
if result and duplicate is False:
|
192
|
+
logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
193
|
+
bin_id)
|
194
|
+
raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
|
195
|
+
|
196
|
+
filename = basename(fpath)
|
197
|
+
|
198
|
+
params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
|
199
|
+
|
200
|
+
if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
|
201
|
+
try:
|
202
|
+
debug = RE_upload(debug_fpath).json()
|
203
|
+
|
204
|
+
if debug["success"]:
|
205
|
+
params["debug_hash"] = debug["sha_256_hash"]
|
206
|
+
except HTTPError:
|
207
|
+
pass
|
208
|
+
|
209
|
+
for p_name in ("model_name", "isa_options", "platform_options", "file_options",
|
210
|
+
"dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
|
211
|
+
p_value = locals()[p_name]
|
212
|
+
|
213
|
+
if p_value:
|
214
|
+
params[p_name] = p_value
|
93
215
|
|
216
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
94
217
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
res = reveng_req(requests.get, f"embeddings/{binary_id(fpath)}", params=params)
|
101
|
-
if res.status_code == 400:
|
102
|
-
print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
|
218
|
+
if res.ok:
|
219
|
+
logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
|
220
|
+
elif res.status_code == 400:
|
221
|
+
if "error" in res.json().keys():
|
222
|
+
logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
|
103
223
|
|
104
224
|
res.raise_for_status()
|
105
|
-
return res
|
225
|
+
return res
|
106
226
|
|
107
227
|
|
108
|
-
def
|
228
|
+
def RE_upload(fpath: str) -> Response:
|
109
229
|
"""
|
110
|
-
|
230
|
+
Upload binary to Server
|
231
|
+
:param fpath: File path for binary to analyse
|
111
232
|
"""
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
233
|
+
bin_id = re_binary_id(fpath)
|
234
|
+
result = re_hash_check(bin_id)
|
235
|
+
|
236
|
+
if result:
|
237
|
+
logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
|
238
|
+
|
239
|
+
res = Response()
|
240
|
+
res.status_code = 200
|
241
|
+
res.url = f"{re_conf['host']}/v1/upload"
|
242
|
+
res._content = ('{0}"success": true,'
|
243
|
+
'"message": "File already uploaded!",'
|
244
|
+
'"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
|
245
|
+
else:
|
246
|
+
with open(fpath, "rb") as fd:
|
247
|
+
res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
|
248
|
+
|
249
|
+
if res.ok:
|
250
|
+
logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
|
251
|
+
elif res.status_code == 400:
|
252
|
+
if "error" in res.json().keys():
|
253
|
+
logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
|
254
|
+
elif res.status_code == 413:
|
255
|
+
logger.warning("File too large. Please upload files under 10MB.")
|
256
|
+
elif res.status_code == 500:
|
257
|
+
logger.error("Internal Server Error. Please contact support. Skipping upload...")
|
116
258
|
|
117
259
|
res.raise_for_status()
|
118
|
-
return res
|
260
|
+
return res
|
119
261
|
|
120
262
|
|
121
|
-
def
|
263
|
+
def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
122
264
|
"""
|
123
|
-
|
265
|
+
Fetch symbol embeddings
|
266
|
+
:param fpath: File path for binary to analyse
|
267
|
+
:param binary_id: ID of binary
|
124
268
|
"""
|
125
|
-
|
269
|
+
bin_id = re_binary_id(fpath)
|
270
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
126
271
|
|
127
|
-
|
128
|
-
params['end_vaddr']: end_vaddr
|
129
|
-
if base_vaddr:
|
130
|
-
params['base_vaddr']: base_vaddr
|
131
|
-
if model:
|
132
|
-
params['model']: model
|
272
|
+
end_point = f"v1/embeddings/binary/{bid}"
|
133
273
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
274
|
+
if bid == -1:
|
275
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
276
|
+
|
277
|
+
res: Response = reveng_req(requests.get, end_point)
|
278
|
+
|
279
|
+
if res.status_code == 400:
|
280
|
+
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
281
|
+
bin_id)
|
138
282
|
|
139
283
|
res.raise_for_status()
|
140
|
-
return res
|
284
|
+
return res
|
141
285
|
|
142
286
|
|
143
|
-
def RE_logs(fpath: str,
|
287
|
+
def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
|
144
288
|
"""
|
145
|
-
|
289
|
+
Get the logs for an analysis associated to Binary ID in command
|
290
|
+
:param fpath: File path for binary to analyse
|
291
|
+
:param binary_id: ID of binary
|
292
|
+
:param console: Show response in console
|
146
293
|
"""
|
147
|
-
bin_id =
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
294
|
+
bin_id = re_binary_id(fpath)
|
295
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
296
|
+
|
297
|
+
end_point = f"v1/logs/{bid}"
|
298
|
+
|
299
|
+
if bid == -1:
|
300
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
301
|
+
|
302
|
+
res: Response = reveng_req(requests.get, end_point)
|
303
|
+
|
304
|
+
if res.ok and console:
|
305
|
+
logger.info("Logs found for %s:\n%s", bin_id, res.json()["logs"])
|
153
306
|
elif res.status_code == 404:
|
154
|
-
|
155
|
-
return
|
307
|
+
logger.warning("Error, logs not found for %s.", bin_id)
|
156
308
|
|
157
309
|
res.raise_for_status()
|
310
|
+
return res
|
158
311
|
|
159
312
|
|
160
|
-
def RE_cves(fpath: str,
|
313
|
+
def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
161
314
|
"""
|
162
|
-
|
315
|
+
Check for known CVEs in Binary
|
316
|
+
:param fpath: File path for binary to analyse
|
317
|
+
:param binary_id: ID of binary
|
163
318
|
"""
|
164
|
-
bin_id =
|
165
|
-
|
166
|
-
|
167
|
-
|
319
|
+
bin_id = re_binary_id(fpath)
|
320
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
321
|
+
|
322
|
+
end_point = f"cves/{bid}"
|
323
|
+
|
324
|
+
if bid == -1:
|
325
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
326
|
+
|
327
|
+
res: Response = reveng_req(requests.get, end_point)
|
328
|
+
|
329
|
+
if res.ok:
|
168
330
|
cves = json.loads(res.text)
|
169
|
-
|
331
|
+
logger.info("Checking for known CVEs embedded inside %s", fpath)
|
332
|
+
|
170
333
|
if len(cves) == 0:
|
171
|
-
|
334
|
+
logger.info("0 CVEs found.")
|
172
335
|
else:
|
173
|
-
|
174
|
-
print_json(data=cves)
|
175
|
-
return
|
336
|
+
logger.warning("Warning CVEs found!\n%s", res.text)
|
176
337
|
elif res.status_code == 404:
|
177
|
-
|
178
|
-
return
|
338
|
+
logger.warning("Error, binary analysis not found for %s.", bin_id)
|
179
339
|
|
180
340
|
res.raise_for_status()
|
341
|
+
return res
|
181
342
|
|
182
|
-
|
343
|
+
|
344
|
+
def RE_status(fpath: str, binary_id: int = 0, console: bool = False) -> Response:
|
183
345
|
"""
|
184
|
-
|
346
|
+
Get the status of an ongoing binary analysis
|
347
|
+
:param fpath: File path for binary to analyse
|
348
|
+
:param binary_id: ID of binary
|
185
349
|
"""
|
186
|
-
bin_id =
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
350
|
+
bin_id = re_binary_id(fpath)
|
351
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
352
|
+
|
353
|
+
end_point = f"v1/analyse/status/{bid}"
|
354
|
+
|
355
|
+
if bid == -1:
|
356
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
357
|
+
|
358
|
+
res: Response = reveng_req(requests.get, end_point)
|
359
|
+
|
360
|
+
if res.ok and console:
|
361
|
+
logger.info("Binary analysis status: %s", res.json()["status"])
|
362
|
+
if res.status_code == 400:
|
363
|
+
logger.warning(" Error, status not found for %s.", bin_id)
|
194
364
|
|
195
365
|
res.raise_for_status()
|
366
|
+
return res
|
196
367
|
|
197
368
|
|
198
|
-
def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
|
369
|
+
def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list:
|
199
370
|
"""
|
200
|
-
|
371
|
+
Compute the cosine distance between source embedding and embedding from binary
|
372
|
+
:param embedding: Embedding vector as python list
|
373
|
+
:param embeddings: Symbol embeddings
|
374
|
+
:param nns: Number of nearest neighbors
|
201
375
|
"""
|
202
376
|
df = DataFrame(data=embeddings)
|
203
377
|
np_embedding = array(embedding).reshape(1, -1)
|
204
|
-
source_embeddings = vstack(df[
|
378
|
+
source_embeddings = vstack(df["embedding"].values)
|
205
379
|
closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
|
206
380
|
distances = cosine_similarity(source_embeddings[closest], np_embedding)
|
381
|
+
|
207
382
|
# match closest embeddings with similarity
|
208
383
|
closest_df = df.iloc[closest]
|
384
|
+
|
209
385
|
# create json similarity object
|
210
386
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
211
|
-
json_sims = [{
|
387
|
+
json_sims = [{"similaritiy": float(d[0]),
|
388
|
+
"vaddr": int(df.iloc[v]["vaddr"]),
|
389
|
+
"name": str(df.iloc[v]["name"]),
|
390
|
+
"size": int(df.iloc[v]["size"])
|
391
|
+
} for d, v in similarities]
|
212
392
|
return json_sims
|
213
393
|
|
214
394
|
|
215
|
-
def
|
395
|
+
def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
|
396
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
216
397
|
"""
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
398
|
+
Get nearest functions to a passed function ids
|
399
|
+
:param function_ids: List of function ids
|
400
|
+
:param nns: Number of nearest neighbors
|
401
|
+
:param collections: List of collections RevEng.AI collection names to search through
|
402
|
+
:param distance: How close we want the ANN search to filter for
|
403
|
+
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
221
404
|
"""
|
222
|
-
params={
|
405
|
+
params = {"function_id_list": function_ids,
|
406
|
+
"result_per_function": nns,
|
407
|
+
"debug_mode": debug_enabled,
|
408
|
+
"distance": distance,}
|
223
409
|
|
224
410
|
if collections:
|
225
|
-
|
411
|
+
# api param is collection, not collections
|
412
|
+
params["collection"] = collections
|
413
|
+
|
414
|
+
res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
|
226
415
|
|
227
|
-
res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
|
228
416
|
res.raise_for_status()
|
229
|
-
|
230
|
-
print_json(data=f_suggestions)
|
417
|
+
return res
|
231
418
|
|
232
419
|
|
233
|
-
def
|
420
|
+
def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
|
421
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
234
422
|
"""
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
423
|
+
Get the nearest functions
|
424
|
+
:param fpath: File path for binary to analyse
|
425
|
+
:param binary_id: ID of binary
|
426
|
+
:param nns: Number of nearest neighbors
|
427
|
+
:param distance: How close we want the ANN search to filter for
|
428
|
+
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
239
429
|
"""
|
240
|
-
|
430
|
+
bin_id = re_binary_id(fpath)
|
431
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
241
432
|
|
242
|
-
|
243
|
-
|
433
|
+
end_point = f"v1/ann/symbol/{bid}"
|
434
|
+
|
435
|
+
if bid == -1:
|
436
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
437
|
+
|
438
|
+
params = {"result_per_function": nns,
|
439
|
+
"debug_mode": debug_enabled,
|
440
|
+
"distance": distance, }
|
441
|
+
|
442
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
443
|
+
|
444
|
+
res.raise_for_status()
|
445
|
+
return res
|
446
|
+
|
447
|
+
|
448
|
+
def RE_analyze_functions(fpath: str, binary_id: int = 0) -> Response:
|
449
|
+
bin_id = re_binary_id(fpath)
|
450
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
451
|
+
|
452
|
+
end_point = f"v1/analyse/functions/{bid}"
|
453
|
+
|
454
|
+
if bid == -1:
|
455
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
456
|
+
|
457
|
+
res: Response = reveng_req(requests.get, end_point)
|
458
|
+
|
459
|
+
res.raise_for_status()
|
460
|
+
return res
|
461
|
+
|
462
|
+
|
463
|
+
def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
464
|
+
"""
|
465
|
+
Get Software Bill Of Materials for binary
|
466
|
+
:param fpath: File path for binary to analyse
|
467
|
+
:param binary_id: ID of binary
|
468
|
+
"""
|
469
|
+
bin_id = re_binary_id(fpath)
|
470
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
471
|
+
|
472
|
+
end_point = f"sboms/{bid}"
|
473
|
+
|
474
|
+
if bid == -1:
|
475
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
476
|
+
|
477
|
+
res: Response = reveng_req(requests.get, end_point)
|
478
|
+
|
479
|
+
logger.info("SBOM for %s:\n%s", fpath, res.text)
|
480
|
+
|
481
|
+
res.raise_for_status()
|
482
|
+
return res
|
483
|
+
|
484
|
+
|
485
|
+
def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
486
|
+
"""
|
487
|
+
Send the new name of a function to C2
|
488
|
+
:param function_id: ID of a function
|
489
|
+
:param new_name: New function name
|
490
|
+
"""
|
491
|
+
res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
|
492
|
+
json_data={"new_name": new_name})
|
493
|
+
|
494
|
+
if res.ok:
|
495
|
+
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
496
|
+
else:
|
497
|
+
logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
|
498
|
+
|
499
|
+
res.raise_for_status()
|
500
|
+
return res
|
501
|
+
|
502
|
+
|
503
|
+
def RE_settings() -> Response:
|
504
|
+
"""
|
505
|
+
Get the configuration settings
|
506
|
+
"""
|
507
|
+
res: Response = reveng_req(requests.get, "v1/config")
|
244
508
|
|
245
|
-
res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
|
246
509
|
res.raise_for_status()
|
247
|
-
|
248
|
-
|
510
|
+
return res
|
511
|
+
|
512
|
+
|
513
|
+
def RE_health() -> bool:
|
514
|
+
"""
|
515
|
+
Health check & verify access to the API
|
516
|
+
"""
|
517
|
+
res: Response = reveng_req(requests.get, "v1")
|
518
|
+
|
519
|
+
success = res.json()["success"]
|
520
|
+
|
521
|
+
if success:
|
522
|
+
logger.info(res.json()["message"])
|
523
|
+
else:
|
524
|
+
logger.warning(res.json()["error"])
|
525
|
+
return success
|
249
526
|
|
250
527
|
|
251
|
-
def
|
528
|
+
def RE_authentication() -> Response:
|
252
529
|
"""
|
253
|
-
|
254
|
-
:param fpath: File path for binaty to analyse
|
255
|
-
:param model_name: str model name of RevEng.AI AI model
|
530
|
+
Authentication Check
|
256
531
|
"""
|
257
|
-
|
532
|
+
res: Response = reveng_req(requests.get, "v1/authenticate")
|
258
533
|
|
259
|
-
res = reveng_req(requests.get, f"sboms/{binary_id(fpath)}", params=params)
|
260
534
|
res.raise_for_status()
|
261
|
-
|
262
|
-
print_json(data=sbom)
|
535
|
+
return res
|
263
536
|
|
264
537
|
|
265
|
-
def
|
266
|
-
"""
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
538
|
+
def re_binary_id(fpath: str) -> str:
|
539
|
+
"""
|
540
|
+
Take the SHA-256 hash of binary file
|
541
|
+
:param fpath: File path for binary to analyse
|
542
|
+
"""
|
543
|
+
if fpath and isfile(fpath) and access(fpath, R_OK):
|
544
|
+
hf = sha256()
|
272
545
|
|
546
|
+
with open(fpath, "rb") as fd:
|
547
|
+
c = fd.read()
|
548
|
+
hf.update(c)
|
549
|
+
|
550
|
+
return hf.hexdigest()
|
551
|
+
else:
|
552
|
+
logger.error("File '%s' doesn't exist or isn't readable", fpath)
|
273
553
|
|
274
|
-
|
554
|
+
return "undefined"
|
555
|
+
|
556
|
+
|
557
|
+
def _binary_isa(binary: Binary, exec_type: str) -> str:
|
275
558
|
"""
|
276
|
-
|
559
|
+
Get ISA format
|
277
560
|
"""
|
278
|
-
if exec_type == "
|
279
|
-
|
280
|
-
|
561
|
+
if exec_type == "ELF":
|
562
|
+
arch = binary.header.machine_type
|
563
|
+
|
564
|
+
if arch == ELF.ARCH.i386:
|
281
565
|
return "x86"
|
282
|
-
elif
|
566
|
+
elif arch == ELF.ARCH.x86_64:
|
283
567
|
return "x86_64"
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
568
|
+
elif arch == ELF.ARCH.ARM:
|
569
|
+
return "ARM32"
|
570
|
+
elif arch == ELF.ARCH.AARCH64:
|
571
|
+
return "ARM64"
|
572
|
+
elif exec_type == "PE":
|
573
|
+
machine_type = binary.header.machine
|
574
|
+
|
575
|
+
if machine_type == PE.Header.MACHINE_TYPES.I386:
|
288
576
|
return "x86"
|
289
|
-
elif machine_type ==
|
577
|
+
elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
|
290
578
|
return "x86_64"
|
291
|
-
|
292
|
-
|
293
|
-
machine_type
|
294
|
-
|
579
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM:
|
580
|
+
return "ARM32"
|
581
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM64:
|
582
|
+
return "ARM64"
|
583
|
+
elif exec_type == "Mach-O":
|
584
|
+
cpu_type = binary.header.cpu_type
|
585
|
+
|
586
|
+
if cpu_type == MachO.CPU_TYPES.x86:
|
295
587
|
return "x86"
|
296
|
-
elif
|
588
|
+
elif cpu_type == MachO.CPU_TYPES.x86_64:
|
297
589
|
return "x86_64"
|
298
|
-
|
299
|
-
|
590
|
+
elif cpu_type == MachO.CPU_TYPES.ARM:
|
591
|
+
return "ARM32"
|
592
|
+
elif cpu_type == MachO.CPU_TYPES.ARM64:
|
593
|
+
return "ARM64"
|
594
|
+
|
595
|
+
logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
|
596
|
+
raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
|
300
597
|
|
301
598
|
|
302
|
-
def _binary_format(
|
599
|
+
def _binary_format(binary: Binary) -> str:
|
303
600
|
"""
|
304
|
-
|
601
|
+
Get executable file format
|
305
602
|
"""
|
306
|
-
if
|
307
|
-
return "
|
308
|
-
if
|
309
|
-
return "
|
310
|
-
if
|
311
|
-
return "
|
312
|
-
|
313
|
-
raise RuntimeError("Error, could not determine binary format")
|
603
|
+
if binary.format == Binary.FORMATS.PE:
|
604
|
+
return "PE"
|
605
|
+
if binary.format == Binary.FORMATS.ELF:
|
606
|
+
return "ELF"
|
607
|
+
if binary.format == Binary.FORMATS.MACHO:
|
608
|
+
return "Mach-O"
|
314
609
|
|
610
|
+
logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
|
611
|
+
raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
|
315
612
|
|
316
613
|
|
317
|
-
def file_type(fpath: str):
|
614
|
+
def file_type(fpath: str) -> tuple[str, str]:
|
318
615
|
"""
|
319
|
-
|
616
|
+
Determine ISA for binary
|
617
|
+
:param fpath: File path for binary to analyse
|
320
618
|
"""
|
321
|
-
binary =
|
619
|
+
binary = parse(fpath)
|
322
620
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
621
|
+
if not binary:
|
622
|
+
file_format = isa_format = "Unknown format"
|
623
|
+
else:
|
624
|
+
# handle PE and ELF files
|
625
|
+
file_format = _binary_format(binary)
|
626
|
+
isa_format = _binary_isa(binary, file_format)
|
327
627
|
|
628
|
+
return file_format, isa_format
|
328
629
|
|
329
|
-
|
630
|
+
|
631
|
+
def parse_config() -> None:
|
632
|
+
"""
|
633
|
+
Parse ~/.reait.toml config file
|
330
634
|
"""
|
331
|
-
|
332
|
-
"""
|
333
|
-
if not os.path.exists(os.path.expanduser("~/.reait.toml")):
|
334
|
-
return
|
635
|
+
fpath = expanduser("~/.reait.toml")
|
335
636
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
637
|
+
if isfile(fpath) and access(fpath, R_OK):
|
638
|
+
with open(fpath) as fd:
|
639
|
+
config = tomli.loads(fd.read())
|
640
|
+
|
641
|
+
for key in ("apikey", "host", "model",):
|
642
|
+
if key in config:
|
643
|
+
re_conf[key] = config[key]
|
644
|
+
else:
|
645
|
+
logger.info("File %s doesn't exist or isn't readable", fpath)
|
341
646
|
|
342
647
|
|
343
|
-
def angular_distance(x, y):
|
648
|
+
def angular_distance(x, y) -> float:
|
344
649
|
"""
|
345
650
|
Compute angular distance between two embedding vectors
|
346
|
-
Normalised
|
651
|
+
Normalised euclidian distance
|
347
652
|
"""
|
348
|
-
cos = dot(x, y) / ((dot(x, x) * dot(y, y))
|
349
|
-
return 1.0 - arccos(cos)/pi
|
653
|
+
cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
|
654
|
+
return 1.0 - arccos(cos) / pi
|