reait 0.0.20__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +2 -3
- reait/api.py +314 -246
- reait/main.py +184 -330
- {reait-0.0.20.dist-info → reait-1.0.1.dist-info}/METADATA +5 -38
- reait-1.0.1.dist-info/RECORD +9 -0
- reait-0.0.20.dist-info/RECORD +0 -9
- {reait-0.0.20.dist-info → reait-1.0.1.dist-info}/LICENSE +0 -0
- {reait-0.0.20.dist-info → reait-1.0.1.dist-info}/WHEEL +0 -0
- {reait-0.0.20.dist-info → reait-1.0.1.dist-info}/entry_points.txt +0 -0
- {reait-0.0.20.dist-info → reait-1.0.1.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,33 +1,48 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
1
|
# -*- coding: utf-8 -*-
|
3
2
|
from __future__ import print_function, annotations
|
4
3
|
|
4
|
+
import json
|
5
|
+
import tomli
|
6
|
+
import logging
|
7
|
+
import requests
|
8
|
+
|
5
9
|
from hashlib import sha256
|
10
|
+
from datetime import datetime
|
6
11
|
|
7
12
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
|
-
from os
|
13
|
+
from os import access, R_OK
|
14
|
+
from os.path import basename, isfile, expanduser, getsize
|
9
15
|
from requests import request, Response, HTTPError
|
10
|
-
import requests
|
11
16
|
from numpy import array, vstack, dot, arccos, pi
|
12
17
|
from pandas import DataFrame
|
13
|
-
import
|
14
|
-
import tomli
|
15
|
-
import logging
|
16
|
-
from lief import parse, ELF, PE, MachO
|
18
|
+
from lief import parse, Binary, ELF, PE, MachO
|
17
19
|
|
20
|
+
__version__ = "1.0.1"
|
18
21
|
|
19
22
|
re_conf = {
|
20
23
|
"apikey": "l1br3",
|
21
24
|
"host": "https://api.reveng.ai",
|
22
|
-
"model": "binnet-0.
|
25
|
+
"model": "binnet-0.3-x86",
|
23
26
|
}
|
24
27
|
|
25
28
|
|
26
29
|
logger = logging.getLogger("REAIT")
|
27
30
|
|
28
31
|
|
29
|
-
|
30
|
-
|
32
|
+
class ReaitError(HTTPError):
|
33
|
+
def __init__(self, reason: str, end_point: str = None):
|
34
|
+
response = Response()
|
35
|
+
|
36
|
+
response.reason = reason
|
37
|
+
response.status_code = 404
|
38
|
+
response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
|
39
|
+
response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
|
40
|
+
|
41
|
+
super().__init__(reason, response=response)
|
42
|
+
|
43
|
+
|
44
|
+
def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
|
45
|
+
params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
|
31
46
|
"""
|
32
47
|
Constructs and sends a Request
|
33
48
|
:param r: Method for the new Request
|
@@ -37,98 +52,85 @@ def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, p
|
|
37
52
|
:param params: Dictionary, list of tuples or bytes to send in the query string for the query string
|
38
53
|
:param json_data: A JSON serializable Python object to send in the body
|
39
54
|
:param timeout: Number of seconds to stop waiting for a Response
|
55
|
+
:param files: Dictionary of files to send to the specified URL
|
40
56
|
"""
|
41
|
-
url = f"{re_conf['host']}/{end_point}"
|
57
|
+
url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
|
42
58
|
headers = {"Authorization": re_conf["apikey"]}
|
43
59
|
|
44
60
|
if ex_headers:
|
45
61
|
headers.update(ex_headers)
|
46
62
|
|
47
|
-
logger.debug("Making request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s",
|
48
|
-
url, headers, data, json_data, params)
|
63
|
+
logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
|
64
|
+
r.__name__.upper(), url, headers, data, json_data, params, files)
|
49
65
|
|
50
|
-
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
|
66
|
+
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
|
51
67
|
|
52
|
-
logger.debug("Making response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
53
|
-
url, response.headers, response.status_code, response.text)
|
68
|
+
logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
69
|
+
r.__name__.upper(), url, response.headers, response.status_code, response.text)
|
54
70
|
|
55
71
|
return response
|
56
72
|
|
57
73
|
|
58
74
|
def re_hash_check(bin_id: str) -> bool:
|
59
|
-
|
60
|
-
res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
|
75
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
61
76
|
|
62
|
-
if res.
|
63
|
-
|
64
|
-
status = len(binaries_data) > 0
|
65
|
-
elif res.status_code == 400:
|
66
|
-
logger.warning("Bad Request: %s", res.text)
|
77
|
+
if res.ok:
|
78
|
+
return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
|
67
79
|
else:
|
68
|
-
logger.
|
80
|
+
logger.warning("Bad Request: %s", res.text)
|
69
81
|
|
70
|
-
|
71
|
-
return status
|
82
|
+
return False
|
72
83
|
|
73
84
|
|
74
85
|
# Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
|
75
86
|
# Assumes a file has been passed, correct hash only
|
76
87
|
# Returns the BID of the binary_id (hash)
|
77
88
|
def re_bid_search(bin_id: str) -> int:
|
78
|
-
res = reveng_req(requests.get,
|
89
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
79
90
|
|
80
91
|
bid = -1
|
81
92
|
|
82
|
-
|
83
|
-
|
93
|
+
if res.ok:
|
94
|
+
# Filter the result who matches the SHA-256
|
95
|
+
binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
|
96
|
+
|
84
97
|
# Check only one record is returned
|
85
|
-
|
98
|
+
if len(binaries) == 1:
|
99
|
+
binary = binaries[0]
|
100
|
+
bid = binary["binary_id"]
|
86
101
|
|
87
|
-
|
88
|
-
|
102
|
+
logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
103
|
+
bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
|
104
|
+
elif len(binaries) > 1:
|
105
|
+
binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
|
89
106
|
|
90
|
-
|
91
|
-
options_dict = {}
|
107
|
+
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
92
108
|
|
93
|
-
|
94
|
-
logger.info("[%d] - ID: {}, Name: %s, Creation: %s, Model: %s, Owner: %s, Status: %s",
|
95
|
-
idx, binary["binary_id"], binary["binary_name"], binary["creation"],
|
96
|
-
binary["model_name"], binary["owner"], binary["status"])
|
109
|
+
options_dict = {}
|
97
110
|
|
98
|
-
|
111
|
+
for idx, binary in enumerate(binaries):
|
112
|
+
logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
113
|
+
idx, binary["binary_id"], binary["binary_name"], binary["creation"],
|
114
|
+
binary["model_name"], binary["status"])
|
99
115
|
|
116
|
+
options_dict[idx] = binary["binary_id"]
|
117
|
+
|
118
|
+
try:
|
100
119
|
user_input = input("[+] Please enter the option you want to use for this operation:")
|
101
120
|
|
102
|
-
|
103
|
-
option_number = int(user_input)
|
121
|
+
option_number = int(user_input)
|
104
122
|
|
105
|
-
|
123
|
+
bid = options_dict.get(option_number, -1)
|
106
124
|
|
107
|
-
|
108
|
-
logger.warning("Invalid option.")
|
109
|
-
except Exception:
|
110
|
-
bid = -1
|
125
|
+
if bid == -1:
|
111
126
|
logger.warning("Invalid option.")
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
bid = binary["binary_id"]
|
116
|
-
else:
|
117
|
-
logger.warning("No matches found for hash: %s.", bin_id)
|
118
|
-
elif len(binaries_data) == 1:
|
119
|
-
binary = binaries_data[0]
|
120
|
-
bid = binary["binary_id"]
|
121
|
-
|
122
|
-
logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
|
123
|
-
"Creation: %s, Model: %s, Owner: %s, Status: %s",
|
124
|
-
bid, binary["binary_name"], binary["creation"],
|
125
|
-
binary["model_name"], binary["owner"], binary["status"])
|
127
|
+
except Exception:
|
128
|
+
bid = options_dict[0]
|
129
|
+
logger.warning("Select the most recent analysis - ID: %d", bid)
|
126
130
|
else:
|
127
|
-
logger.warning("No matches found for hash: %s
|
128
|
-
elif res.status_code == 400:
|
129
|
-
logger.warning("Bad Request: %s", res.text)
|
131
|
+
logger.warning("No matches found for hash: %s", bin_id)
|
130
132
|
else:
|
131
|
-
logger.
|
133
|
+
logger.warning("Bad Request: %s", res.text)
|
132
134
|
|
133
135
|
res.raise_for_status()
|
134
136
|
return bid
|
@@ -143,15 +145,17 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
143
145
|
bin_id = re_binary_id(fpath)
|
144
146
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
145
147
|
|
148
|
+
end_point = f"v1/analyse/{bid}"
|
149
|
+
|
146
150
|
if bid == -1:
|
147
|
-
raise
|
151
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
148
152
|
|
149
|
-
res = reveng_req(requests.delete,
|
153
|
+
res: Response = reveng_req(requests.delete, end_point)
|
150
154
|
|
151
|
-
if res.
|
152
|
-
logger.info("Securely deleted %s
|
155
|
+
if res.ok:
|
156
|
+
logger.info("Securely deleted analysis ID %s - %s.", bid, bin_id)
|
153
157
|
elif res.status_code == 404:
|
154
|
-
logger.warning("Error analysis not found for %s.", bin_id)
|
158
|
+
logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
|
155
159
|
else:
|
156
160
|
logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
|
157
161
|
|
@@ -159,10 +163,10 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
159
163
|
return res
|
160
164
|
|
161
165
|
|
162
|
-
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
163
|
-
|
164
|
-
|
165
|
-
duplicate: bool = False, symbols: dict = None) -> Response:
|
166
|
+
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
167
|
+
platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
|
168
|
+
command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
|
169
|
+
duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
|
166
170
|
"""
|
167
171
|
Start analysis job for binary file
|
168
172
|
:param fpath: File path for binary to analyse
|
@@ -172,46 +176,56 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, plat
|
|
172
176
|
:param platform_options: OS platform
|
173
177
|
:param dynamic_execution: Enable dynamic execution in sandbox during analysis
|
174
178
|
:param command_line_args: Command line arguments to pass when running binary sample in the sandbox
|
175
|
-
:param
|
179
|
+
:param binary_scope: Analysis visibility
|
176
180
|
:param tags: Assign tags to an analysis
|
177
181
|
:param priority: Priority to processing queue
|
178
182
|
:param duplicate: Duplicate an existing binary
|
179
|
-
:param symbols:
|
183
|
+
:param symbols: JSON object containing the base address and the list of functions
|
184
|
+
:param debug_fpath: File path for debug file
|
180
185
|
"""
|
181
186
|
bin_id = re_binary_id(fpath)
|
182
187
|
result = re_hash_check(bin_id)
|
183
188
|
|
189
|
+
end_point = "v1/analyse/"
|
190
|
+
|
184
191
|
if result and duplicate is False:
|
185
192
|
logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
186
193
|
bin_id)
|
187
|
-
raise
|
194
|
+
raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
|
188
195
|
|
189
196
|
filename = basename(fpath)
|
190
197
|
|
191
|
-
params = {"file_name": filename, "sha_256_hash": bin_id}
|
198
|
+
params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
|
192
199
|
|
200
|
+
if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
|
201
|
+
try:
|
202
|
+
debug = RE_upload(debug_fpath).json()
|
203
|
+
|
204
|
+
if debug["success"]:
|
205
|
+
params["debug_hash"] = debug["sha_256_hash"]
|
206
|
+
except HTTPError:
|
207
|
+
pass
|
208
|
+
|
193
209
|
for p_name in ("model_name", "isa_options", "platform_options", "file_options",
|
194
|
-
"dynamic_execution", "command_line_args", "
|
210
|
+
"dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
|
195
211
|
p_value = locals()[p_name]
|
196
212
|
|
197
213
|
if p_value:
|
198
214
|
params[p_name] = p_value
|
199
215
|
|
200
|
-
res = reveng_req(requests.post,
|
216
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
201
217
|
|
202
|
-
if res.
|
203
|
-
logger.info("Successfully submitted binary for analysis. %s - %s", fpath,
|
218
|
+
if res.ok:
|
219
|
+
logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
|
204
220
|
elif res.status_code == 400:
|
205
|
-
|
206
|
-
|
207
|
-
if "error" in response.keys():
|
208
|
-
logger.warning("Error analysing %s - %s", fpath, response["error"])
|
221
|
+
if "error" in res.json().keys():
|
222
|
+
logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
|
209
223
|
|
210
224
|
res.raise_for_status()
|
211
225
|
return res
|
212
226
|
|
213
227
|
|
214
|
-
def RE_upload(fpath: str) -> Response
|
228
|
+
def RE_upload(fpath: str) -> Response:
|
215
229
|
"""
|
216
230
|
Upload binary to Server
|
217
231
|
:param fpath: File path for binary to analyse
|
@@ -220,22 +234,27 @@ def RE_upload(fpath: str) -> Response | bool:
|
|
220
234
|
result = re_hash_check(bin_id)
|
221
235
|
|
222
236
|
if result:
|
223
|
-
logger.info("File %s - %s already
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
237
|
+
logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
|
238
|
+
|
239
|
+
res = Response()
|
240
|
+
res.status_code = 200
|
241
|
+
res.url = f"{re_conf['host']}/v1/upload"
|
242
|
+
res._content = ('{0}"success": true,'
|
243
|
+
'"message": "File already uploaded!",'
|
244
|
+
'"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
|
245
|
+
else:
|
246
|
+
with open(fpath, "rb") as fd:
|
247
|
+
res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
|
248
|
+
|
249
|
+
if res.ok:
|
250
|
+
logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
|
251
|
+
elif res.status_code == 400:
|
252
|
+
if "error" in res.json().keys():
|
253
|
+
logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
|
254
|
+
elif res.status_code == 413:
|
255
|
+
logger.warning("File too large. Please upload files under 10MB.")
|
256
|
+
elif res.status_code == 500:
|
257
|
+
logger.error("Internal Server Error. Please contact support. Skipping upload...")
|
239
258
|
|
240
259
|
res.raise_for_status()
|
241
260
|
return res
|
@@ -250,65 +269,14 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
|
250
269
|
bin_id = re_binary_id(fpath)
|
251
270
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
252
271
|
|
253
|
-
|
254
|
-
raise HTTPError(f"No matches found for hash: {bin_id}")
|
255
|
-
|
256
|
-
res = reveng_req(requests.get, f"embeddings/{bid}")
|
257
|
-
|
258
|
-
if res.status_code == 400:
|
259
|
-
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
260
|
-
bin_id)
|
261
|
-
|
262
|
-
res.raise_for_status()
|
263
|
-
return res
|
264
|
-
|
265
|
-
|
266
|
-
def RE_signature(fpath: str, binary_id: int = 0) -> Response:
|
267
|
-
"""
|
268
|
-
Fetch binary BinNet signature
|
269
|
-
:param fpath: File path for binary to analyse
|
270
|
-
:param binary_id: ID of binary
|
271
|
-
"""
|
272
|
-
bin_id = re_binary_id(fpath)
|
273
|
-
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
272
|
+
end_point = f"v1/embeddings/binary/{bid}"
|
274
273
|
|
275
274
|
if bid == -1:
|
276
|
-
raise
|
277
|
-
|
278
|
-
res = reveng_req(requests.get, f"signature/{bid}")
|
279
|
-
|
280
|
-
if res.status_code == 425:
|
281
|
-
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
282
|
-
bin_id)
|
283
|
-
|
284
|
-
res.raise_for_status()
|
285
|
-
return res
|
286
|
-
|
287
|
-
|
288
|
-
def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
|
289
|
-
model: str = None) -> Response:
|
290
|
-
"""
|
291
|
-
Fetch embedding for custom symbol range
|
292
|
-
:param fpath: File path for binary to analyse
|
293
|
-
:param start_vaddr: Start virtual address of the function to extract embeddings
|
294
|
-
:param end_vaddr: End virtual address of the function to extract embeddings
|
295
|
-
:param base_vaddr: Base address of the binary
|
296
|
-
:param model: Binary model name
|
297
|
-
"""
|
298
|
-
params = {}
|
299
|
-
|
300
|
-
if end_vaddr:
|
301
|
-
params["end_vaddr"] = end_vaddr
|
302
|
-
if base_vaddr:
|
303
|
-
params["base_vaddr"] = base_vaddr
|
304
|
-
if model:
|
305
|
-
params["models"] = model
|
306
|
-
|
307
|
-
bin_id = re_binary_id(fpath)
|
275
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
308
276
|
|
309
|
-
res = reveng_req(requests.get,
|
277
|
+
res: Response = reveng_req(requests.get, end_point)
|
310
278
|
|
311
|
-
if res.status_code ==
|
279
|
+
if res.status_code == 400:
|
312
280
|
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
313
281
|
bin_id)
|
314
282
|
|
@@ -326,13 +294,15 @@ def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
|
|
326
294
|
bin_id = re_binary_id(fpath)
|
327
295
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
328
296
|
|
297
|
+
end_point = f"v1/logs/{bid}"
|
298
|
+
|
329
299
|
if bid == -1:
|
330
|
-
raise
|
300
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
331
301
|
|
332
|
-
res = reveng_req(requests.get,
|
302
|
+
res: Response = reveng_req(requests.get, end_point)
|
333
303
|
|
334
|
-
if res.
|
335
|
-
logger.info("Logs found for %s:\n%s", bin_id, res.
|
304
|
+
if res.ok and console:
|
305
|
+
logger.info("Logs found for %s:\n%s", bin_id, res.json()["logs"])
|
336
306
|
elif res.status_code == 404:
|
337
307
|
logger.warning("Error, logs not found for %s.", bin_id)
|
338
308
|
|
@@ -349,12 +319,14 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
|
349
319
|
bin_id = re_binary_id(fpath)
|
350
320
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
351
321
|
|
322
|
+
end_point = f"cves/{bid}"
|
323
|
+
|
352
324
|
if bid == -1:
|
353
|
-
raise
|
325
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
354
326
|
|
355
|
-
res = reveng_req(requests.get,
|
327
|
+
res: Response = reveng_req(requests.get, end_point)
|
356
328
|
|
357
|
-
if res.
|
329
|
+
if res.ok:
|
358
330
|
cves = json.loads(res.text)
|
359
331
|
logger.info("Checking for known CVEs embedded inside %s", fpath)
|
360
332
|
|
@@ -369,7 +341,7 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
|
369
341
|
return res
|
370
342
|
|
371
343
|
|
372
|
-
def RE_status(fpath: str, binary_id: int = 0) -> Response:
|
344
|
+
def RE_status(fpath: str, binary_id: int = 0, console: bool = False) -> Response:
|
373
345
|
"""
|
374
346
|
Get the status of an ongoing binary analysis
|
375
347
|
:param fpath: File path for binary to analyse
|
@@ -378,11 +350,15 @@ def RE_status(fpath: str, binary_id: int = 0) -> Response:
|
|
378
350
|
bin_id = re_binary_id(fpath)
|
379
351
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
380
352
|
|
353
|
+
end_point = f"v1/analyse/status/{bid}"
|
354
|
+
|
381
355
|
if bid == -1:
|
382
|
-
raise
|
356
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
383
357
|
|
384
|
-
res = reveng_req(requests.get,
|
358
|
+
res: Response = reveng_req(requests.get, end_point)
|
385
359
|
|
360
|
+
if res.ok and console:
|
361
|
+
logger.info("Binary analysis status: %s", res.json()["status"])
|
386
362
|
if res.status_code == 400:
|
387
363
|
logger.warning(" Error, status not found for %s.", bin_id)
|
388
364
|
|
@@ -408,62 +384,79 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
408
384
|
|
409
385
|
# create json similarity object
|
410
386
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
411
|
-
json_sims = [{"similaritiy": float(d[0]),
|
412
|
-
"
|
387
|
+
json_sims = [{"similaritiy": float(d[0]),
|
388
|
+
"vaddr": int(df.iloc[v]["vaddr"]),
|
389
|
+
"name": str(df.iloc[v]["name"]),
|
390
|
+
"size": int(df.iloc[v]["size"]),
|
391
|
+
} for d, v in similarities]
|
413
392
|
return json_sims
|
414
393
|
|
415
394
|
|
416
|
-
def
|
417
|
-
|
418
|
-
distance: float = 0.0, debug_enabled: bool = False) -> Response:
|
395
|
+
def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
|
396
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
419
397
|
"""
|
420
|
-
Get
|
421
|
-
:param
|
422
|
-
:param model_name: Binary model name
|
398
|
+
Get nearest functions to a passed function ids
|
399
|
+
:param function_ids: List of function ids
|
423
400
|
:param nns: Number of nearest neighbors
|
424
401
|
:param collections: List of collections RevEng.AI collection names to search through
|
425
|
-
:param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
|
426
402
|
:param distance: How close we want the ANN search to filter for
|
427
403
|
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
428
404
|
"""
|
429
|
-
params = {"
|
405
|
+
params = {"function_id_list": function_ids,
|
406
|
+
"result_per_function": nns,
|
407
|
+
"debug_mode": debug_enabled,
|
408
|
+
"distance": distance,
|
409
|
+
}
|
430
410
|
|
431
|
-
if collections
|
411
|
+
if collections:
|
432
412
|
# api param is collection, not collections
|
433
|
-
params["collection"] =
|
434
|
-
|
435
|
-
if ignore_hashes and len(ignore_hashes) > 0:
|
436
|
-
params["ignore_hashes"] = ignore_hashes
|
413
|
+
params["collection"] = collections
|
437
414
|
|
438
|
-
|
439
|
-
params["distance"] = distance
|
440
|
-
|
441
|
-
res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
|
415
|
+
res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
|
442
416
|
|
443
417
|
res.raise_for_status()
|
444
418
|
return res
|
445
419
|
|
446
420
|
|
447
|
-
def
|
448
|
-
|
421
|
+
def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
|
422
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
449
423
|
"""
|
450
|
-
Get
|
451
|
-
:param
|
452
|
-
:param
|
424
|
+
Get the nearest functions
|
425
|
+
:param fpath: File path for binary to analyse
|
426
|
+
:param binary_id: ID of binary
|
453
427
|
:param nns: Number of nearest neighbors
|
454
|
-
:param
|
455
|
-
:param
|
428
|
+
:param distance: How close we want the ANN search to filter for
|
429
|
+
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
456
430
|
"""
|
457
|
-
|
431
|
+
bin_id = re_binary_id(fpath)
|
432
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
458
433
|
|
459
|
-
|
460
|
-
|
461
|
-
|
434
|
+
end_point = f"v1/ann/symbol/{bid}"
|
435
|
+
|
436
|
+
if bid == -1:
|
437
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
438
|
+
|
439
|
+
params = {"result_per_function": nns,
|
440
|
+
"debug_mode": debug_enabled,
|
441
|
+
"distance": distance,
|
442
|
+
}
|
443
|
+
|
444
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
445
|
+
|
446
|
+
res.raise_for_status()
|
447
|
+
return res
|
462
448
|
|
463
|
-
if ignore_hashes and len(ignore_hashes) > 0:
|
464
|
-
params["ignore_hashes"] = ignore_hashes
|
465
449
|
|
466
|
-
|
450
|
+
def RE_analyze_functions(fpath: str, binary_id: int = 0) -> Response:
|
451
|
+
bin_id = re_binary_id(fpath)
|
452
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
453
|
+
|
454
|
+
end_point = f"v1/analyse/functions/{bid}"
|
455
|
+
|
456
|
+
if bid == -1:
|
457
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
458
|
+
|
459
|
+
res: Response = reveng_req(requests.get, end_point)
|
467
460
|
|
468
461
|
res.raise_for_status()
|
469
462
|
return res
|
@@ -478,10 +471,12 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
|
478
471
|
bin_id = re_binary_id(fpath)
|
479
472
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
480
473
|
|
474
|
+
end_point = f"sboms/{bid}"
|
475
|
+
|
481
476
|
if bid == -1:
|
482
|
-
raise
|
477
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
483
478
|
|
484
|
-
res = reveng_req(requests.get,
|
479
|
+
res: Response = reveng_req(requests.get, end_point)
|
485
480
|
|
486
481
|
logger.info("SBOM for %s:\n%s", fpath, res.text)
|
487
482
|
|
@@ -495,9 +490,10 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
|
495
490
|
:param function_id: ID of a function
|
496
491
|
:param new_name: New function name
|
497
492
|
"""
|
498
|
-
res = reveng_req(requests.post, f"functions/rename/{function_id}",
|
493
|
+
res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
|
494
|
+
json_data={"new_name": new_name})
|
499
495
|
|
500
|
-
if res.
|
496
|
+
if res.ok:
|
501
497
|
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
502
498
|
else:
|
503
499
|
logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
|
@@ -506,66 +502,131 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
|
506
502
|
return res
|
507
503
|
|
508
504
|
|
505
|
+
def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
|
506
|
+
"""
|
507
|
+
Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
|
508
|
+
:param mapping: dictionary containing the function_id as key and function_name as value
|
509
|
+
"""
|
510
|
+
params = {"new_name_mapping":
|
511
|
+
[{"function_id": func_id,
|
512
|
+
"function_name": func_name,
|
513
|
+
} for func_id, func_name in mapping.items()]
|
514
|
+
}
|
515
|
+
|
516
|
+
res: Response = reveng_req(requests.post, "v1/functions/batch/rename", json_data=params)
|
517
|
+
|
518
|
+
res.raise_for_status()
|
519
|
+
return res
|
520
|
+
|
521
|
+
def RE_settings() -> Response:
|
522
|
+
"""
|
523
|
+
Get the configuration settings
|
524
|
+
"""
|
525
|
+
res: Response = reveng_req(requests.get, "v1/config")
|
526
|
+
|
527
|
+
res.raise_for_status()
|
528
|
+
return res
|
529
|
+
|
530
|
+
|
531
|
+
def RE_health() -> bool:
|
532
|
+
"""
|
533
|
+
Health check & verify access to the API
|
534
|
+
"""
|
535
|
+
res: Response = reveng_req(requests.get, "v1")
|
536
|
+
|
537
|
+
success = res.json()["success"]
|
538
|
+
|
539
|
+
if success:
|
540
|
+
logger.info(res.json()["message"])
|
541
|
+
else:
|
542
|
+
logger.warning(res.json()["error"])
|
543
|
+
return success
|
544
|
+
|
545
|
+
|
546
|
+
def RE_authentication() -> Response:
|
547
|
+
"""
|
548
|
+
Authentication Check
|
549
|
+
"""
|
550
|
+
res: Response = reveng_req(requests.get, "v1/authenticate")
|
551
|
+
|
552
|
+
res.raise_for_status()
|
553
|
+
return res
|
554
|
+
|
555
|
+
|
509
556
|
def re_binary_id(fpath: str) -> str:
|
510
557
|
"""
|
511
558
|
Take the SHA-256 hash of binary file
|
512
559
|
:param fpath: File path for binary to analyse
|
513
560
|
"""
|
514
|
-
if
|
515
|
-
|
561
|
+
if fpath and isfile(fpath) and access(fpath, R_OK):
|
562
|
+
hf = sha256()
|
516
563
|
|
517
|
-
|
564
|
+
with open(fpath, "rb") as fd:
|
565
|
+
c = fd.read()
|
566
|
+
hf.update(c)
|
518
567
|
|
519
|
-
|
520
|
-
|
521
|
-
|
568
|
+
return hf.hexdigest()
|
569
|
+
else:
|
570
|
+
logger.error("File '%s' doesn't exist or isn't readable", fpath)
|
522
571
|
|
523
|
-
return
|
572
|
+
return "Undefined"
|
524
573
|
|
525
574
|
|
526
|
-
def _binary_isa(
|
575
|
+
def _binary_isa(binary: Binary, exec_type: str) -> str:
|
527
576
|
"""
|
528
577
|
Get ISA format
|
529
578
|
"""
|
530
|
-
if exec_type == "
|
531
|
-
|
579
|
+
if exec_type == "ELF":
|
580
|
+
arch = binary.header.machine_type
|
532
581
|
|
533
|
-
if
|
582
|
+
if arch == ELF.ARCH.i386:
|
534
583
|
return "x86"
|
535
|
-
elif
|
584
|
+
elif arch == ELF.ARCH.x86_64:
|
536
585
|
return "x86_64"
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
586
|
+
elif arch == ELF.ARCH.ARM:
|
587
|
+
return "ARM32"
|
588
|
+
elif arch == ELF.ARCH.AARCH64:
|
589
|
+
return "ARM64"
|
590
|
+
elif exec_type == "PE":
|
591
|
+
machine_type = binary.header.machine
|
592
|
+
|
593
|
+
if machine_type == PE.Header.MACHINE_TYPES.I386:
|
541
594
|
return "x86"
|
542
|
-
elif machine_type == PE.MACHINE_TYPES.AMD64:
|
595
|
+
elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
|
543
596
|
return "x86_64"
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
597
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM:
|
598
|
+
return "ARM32"
|
599
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM64:
|
600
|
+
return "ARM64"
|
601
|
+
elif exec_type == "Mach-O":
|
602
|
+
cpu_type = binary.header.cpu_type
|
603
|
+
|
604
|
+
if cpu_type == MachO.CPU_TYPES.x86:
|
548
605
|
return "x86"
|
549
|
-
elif
|
606
|
+
elif cpu_type == MachO.CPU_TYPES.x86_64:
|
550
607
|
return "x86_64"
|
608
|
+
elif cpu_type == MachO.CPU_TYPES.ARM:
|
609
|
+
return "ARM32"
|
610
|
+
elif cpu_type == MachO.CPU_TYPES.ARM64:
|
611
|
+
return "ARM64"
|
551
612
|
|
552
|
-
logger.error("Error,
|
553
|
-
raise RuntimeError(f"Error,
|
613
|
+
logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
|
614
|
+
raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
|
554
615
|
|
555
616
|
|
556
|
-
def _binary_format(
|
617
|
+
def _binary_format(binary: Binary) -> str:
|
557
618
|
"""
|
558
619
|
Get executable file format
|
559
620
|
"""
|
560
|
-
if
|
561
|
-
return "
|
562
|
-
if
|
563
|
-
return "
|
564
|
-
if
|
565
|
-
return "
|
621
|
+
if binary.format == Binary.FORMATS.PE:
|
622
|
+
return "PE"
|
623
|
+
if binary.format == Binary.FORMATS.ELF:
|
624
|
+
return "ELF"
|
625
|
+
if binary.format == Binary.FORMATS.MACHO:
|
626
|
+
return "Mach-O"
|
566
627
|
|
567
|
-
logger.error("Error, could not determine binary format: %s.",
|
568
|
-
raise RuntimeError("Error, could not determine binary format.")
|
628
|
+
logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
|
629
|
+
raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
|
569
630
|
|
570
631
|
|
571
632
|
def file_type(fpath: str) -> tuple[str, str]:
|
@@ -575,24 +636,31 @@ def file_type(fpath: str) -> tuple[str, str]:
|
|
575
636
|
"""
|
576
637
|
binary = parse(fpath)
|
577
638
|
|
578
|
-
|
579
|
-
|
580
|
-
|
639
|
+
if not binary:
|
640
|
+
file_format = isa_format = "Unknown format"
|
641
|
+
else:
|
642
|
+
# handle PE and ELF files
|
643
|
+
file_format = _binary_format(binary)
|
644
|
+
isa_format = _binary_isa(binary, file_format)
|
581
645
|
|
582
|
-
return file_format,
|
646
|
+
return file_format, isa_format
|
583
647
|
|
584
648
|
|
585
649
|
def parse_config() -> None:
|
586
650
|
"""
|
587
651
|
Parse ~/.reait.toml config file
|
588
652
|
"""
|
589
|
-
|
590
|
-
with open(expanduser("~/.reait.toml"), "r") as file:
|
591
|
-
config = tomli.loads(file.read())
|
653
|
+
fpath = expanduser("~/.reait.toml")
|
592
654
|
|
593
|
-
|
655
|
+
if isfile(fpath) and access(fpath, R_OK):
|
656
|
+
with open(fpath) as fd:
|
657
|
+
config = tomli.loads(fd.read())
|
658
|
+
|
659
|
+
for key in ("apikey", "host", "model",):
|
594
660
|
if key in config:
|
595
661
|
re_conf[key] = config[key]
|
662
|
+
else:
|
663
|
+
logger.info("File %s doesn't exist or isn't readable", fpath)
|
596
664
|
|
597
665
|
|
598
666
|
def angular_distance(x, y) -> float:
|
@@ -600,5 +668,5 @@ def angular_distance(x, y) -> float:
|
|
600
668
|
Compute angular distance between two embedding vectors
|
601
669
|
Normalised euclidian distance
|
602
670
|
"""
|
603
|
-
cos = dot(x, y) / ((dot(x, x) * dot(y, y))
|
671
|
+
cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
|
604
672
|
return 1.0 - arccos(cos) / pi
|