reait 0.0.20__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +2 -3
- reait/api.py +296 -246
- reait/main.py +184 -330
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/METADATA +3 -36
- reait-1.0.0.dist-info/RECORD +9 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/WHEEL +1 -1
- reait-0.0.20.dist-info/RECORD +0 -9
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/LICENSE +0 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/entry_points.txt +0 -0
- {reait-0.0.20.dist-info → reait-1.0.0.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,33 +1,48 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
1
|
# -*- coding: utf-8 -*-
|
3
2
|
from __future__ import print_function, annotations
|
4
3
|
|
4
|
+
import json
|
5
|
+
import tomli
|
6
|
+
import logging
|
7
|
+
import requests
|
8
|
+
|
5
9
|
from hashlib import sha256
|
10
|
+
from datetime import datetime
|
6
11
|
|
7
12
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
|
-
from os
|
13
|
+
from os import access, R_OK
|
14
|
+
from os.path import basename, isfile, expanduser, getsize
|
9
15
|
from requests import request, Response, HTTPError
|
10
|
-
import requests
|
11
16
|
from numpy import array, vstack, dot, arccos, pi
|
12
17
|
from pandas import DataFrame
|
13
|
-
import
|
14
|
-
import tomli
|
15
|
-
import logging
|
16
|
-
from lief import parse, ELF, PE, MachO
|
18
|
+
from lief import parse, Binary, ELF, PE, MachO
|
17
19
|
|
20
|
+
__version__ = "1.0.0"
|
18
21
|
|
19
22
|
re_conf = {
|
20
23
|
"apikey": "l1br3",
|
21
24
|
"host": "https://api.reveng.ai",
|
22
|
-
"model": "binnet-0.
|
25
|
+
"model": "binnet-0.3-x86",
|
23
26
|
}
|
24
27
|
|
25
28
|
|
26
29
|
logger = logging.getLogger("REAIT")
|
27
30
|
|
28
31
|
|
29
|
-
|
30
|
-
|
32
|
+
class ReaitError(HTTPError):
|
33
|
+
def __init__(self, reason: str, end_point: str = None):
|
34
|
+
response = Response()
|
35
|
+
|
36
|
+
response.reason = reason
|
37
|
+
response.status_code = 404
|
38
|
+
response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
|
39
|
+
response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
|
40
|
+
|
41
|
+
super().__init__(reason, response=response)
|
42
|
+
|
43
|
+
|
44
|
+
def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
|
45
|
+
params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
|
31
46
|
"""
|
32
47
|
Constructs and sends a Request
|
33
48
|
:param r: Method for the new Request
|
@@ -37,98 +52,85 @@ def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, p
|
|
37
52
|
:param params: Dictionary, list of tuples or bytes to send in the query string for the query string
|
38
53
|
:param json_data: A JSON serializable Python object to send in the body
|
39
54
|
:param timeout: Number of seconds to stop waiting for a Response
|
55
|
+
:param files: Dictionary of files to send to the specified URL
|
40
56
|
"""
|
41
|
-
url = f"{re_conf['host']}/{end_point}"
|
57
|
+
url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
|
42
58
|
headers = {"Authorization": re_conf["apikey"]}
|
43
59
|
|
44
60
|
if ex_headers:
|
45
61
|
headers.update(ex_headers)
|
46
62
|
|
47
|
-
logger.debug("Making request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s",
|
48
|
-
url, headers, data, json_data, params)
|
63
|
+
logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
|
64
|
+
r.__name__.upper(), url, headers, data, json_data, params, files)
|
49
65
|
|
50
|
-
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
|
66
|
+
response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
|
51
67
|
|
52
|
-
logger.debug("Making response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
53
|
-
url, response.headers, response.status_code, response.text)
|
68
|
+
logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
69
|
+
r.__name__.upper(), url, response.headers, response.status_code, response.text)
|
54
70
|
|
55
71
|
return response
|
56
72
|
|
57
73
|
|
58
74
|
def re_hash_check(bin_id: str) -> bool:
|
59
|
-
|
60
|
-
res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
|
75
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
61
76
|
|
62
|
-
if res.
|
63
|
-
|
64
|
-
status = len(binaries_data) > 0
|
65
|
-
elif res.status_code == 400:
|
66
|
-
logger.warning("Bad Request: %s", res.text)
|
77
|
+
if res.ok:
|
78
|
+
return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
|
67
79
|
else:
|
68
|
-
logger.
|
80
|
+
logger.warning("Bad Request: %s", res.text)
|
69
81
|
|
70
|
-
|
71
|
-
return status
|
82
|
+
return False
|
72
83
|
|
73
84
|
|
74
85
|
# Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
|
75
86
|
# Assumes a file has been passed, correct hash only
|
76
87
|
# Returns the BID of the binary_id (hash)
|
77
88
|
def re_bid_search(bin_id: str) -> int:
|
78
|
-
res = reveng_req(requests.get,
|
89
|
+
res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
|
79
90
|
|
80
91
|
bid = -1
|
81
92
|
|
82
|
-
|
83
|
-
|
93
|
+
if res.ok:
|
94
|
+
# Filter the result who matches the SHA-256
|
95
|
+
binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
|
96
|
+
|
84
97
|
# Check only one record is returned
|
85
|
-
|
98
|
+
if len(binaries) == 1:
|
99
|
+
binary = binaries[0]
|
100
|
+
bid = binary["binary_id"]
|
101
|
+
|
102
|
+
logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
103
|
+
bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
|
104
|
+
elif len(binaries) > 1:
|
105
|
+
binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
|
86
106
|
|
87
|
-
|
88
|
-
logger.info("%d matches found for hash: %s.", len(binaries_data), bin_id)
|
107
|
+
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
89
108
|
|
90
|
-
|
91
|
-
options_dict = {}
|
109
|
+
options_dict = {}
|
92
110
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
111
|
+
for idx, binary in enumerate(binaries):
|
112
|
+
logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
113
|
+
idx, binary["binary_id"], binary["binary_name"], binary["creation"],
|
114
|
+
binary["model_name"], binary["status"])
|
97
115
|
|
98
|
-
|
116
|
+
options_dict[idx] = binary["binary_id"]
|
99
117
|
|
118
|
+
try:
|
100
119
|
user_input = input("[+] Please enter the option you want to use for this operation:")
|
101
120
|
|
102
|
-
|
103
|
-
option_number = int(user_input)
|
121
|
+
option_number = int(user_input)
|
104
122
|
|
105
|
-
|
123
|
+
bid = options_dict.get(option_number, -1)
|
106
124
|
|
107
|
-
|
108
|
-
logger.warning("Invalid option.")
|
109
|
-
except Exception:
|
110
|
-
bid = -1
|
125
|
+
if bid == -1:
|
111
126
|
logger.warning("Invalid option.")
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
bid = binary["binary_id"]
|
116
|
-
else:
|
117
|
-
logger.warning("No matches found for hash: %s.", bin_id)
|
118
|
-
elif len(binaries_data) == 1:
|
119
|
-
binary = binaries_data[0]
|
120
|
-
bid = binary["binary_id"]
|
121
|
-
|
122
|
-
logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
|
123
|
-
"Creation: %s, Model: %s, Owner: %s, Status: %s",
|
124
|
-
bid, binary["binary_name"], binary["creation"],
|
125
|
-
binary["model_name"], binary["owner"], binary["status"])
|
127
|
+
except Exception:
|
128
|
+
bid = options_dict[0]
|
129
|
+
logger.warning("Select the most recent analysis - ID: %d", bid)
|
126
130
|
else:
|
127
|
-
logger.warning("No matches found for hash: %s
|
128
|
-
elif res.status_code == 400:
|
129
|
-
logger.warning("Bad Request: %s", res.text)
|
131
|
+
logger.warning("No matches found for hash: %s", bin_id)
|
130
132
|
else:
|
131
|
-
logger.
|
133
|
+
logger.warning("Bad Request: %s", res.text)
|
132
134
|
|
133
135
|
res.raise_for_status()
|
134
136
|
return bid
|
@@ -143,15 +145,17 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
143
145
|
bin_id = re_binary_id(fpath)
|
144
146
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
145
147
|
|
148
|
+
end_point = f"v1/analyse/{bid}"
|
149
|
+
|
146
150
|
if bid == -1:
|
147
|
-
raise
|
151
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
148
152
|
|
149
|
-
res = reveng_req(requests.delete,
|
153
|
+
res: Response = reveng_req(requests.delete, end_point)
|
150
154
|
|
151
|
-
if res.
|
152
|
-
logger.info("Securely deleted %s
|
155
|
+
if res.ok:
|
156
|
+
logger.info("Securely deleted analysis ID %s - %s.", bid, bin_id)
|
153
157
|
elif res.status_code == 404:
|
154
|
-
logger.warning("Error analysis not found for %s.", bin_id)
|
158
|
+
logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
|
155
159
|
else:
|
156
160
|
logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
|
157
161
|
|
@@ -159,10 +163,10 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
159
163
|
return res
|
160
164
|
|
161
165
|
|
162
|
-
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
163
|
-
|
164
|
-
|
165
|
-
duplicate: bool = False, symbols: dict = None) -> Response:
|
166
|
+
def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
167
|
+
platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
|
168
|
+
command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
|
169
|
+
duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
|
166
170
|
"""
|
167
171
|
Start analysis job for binary file
|
168
172
|
:param fpath: File path for binary to analyse
|
@@ -172,46 +176,56 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, plat
|
|
172
176
|
:param platform_options: OS platform
|
173
177
|
:param dynamic_execution: Enable dynamic execution in sandbox during analysis
|
174
178
|
:param command_line_args: Command line arguments to pass when running binary sample in the sandbox
|
175
|
-
:param
|
179
|
+
:param binary_scope: Analysis visibility
|
176
180
|
:param tags: Assign tags to an analysis
|
177
181
|
:param priority: Priority to processing queue
|
178
182
|
:param duplicate: Duplicate an existing binary
|
179
|
-
:param symbols:
|
183
|
+
:param symbols: JSON object containing the base address and the list of functions
|
184
|
+
:param debug_fpath: File path for debug file
|
180
185
|
"""
|
181
186
|
bin_id = re_binary_id(fpath)
|
182
187
|
result = re_hash_check(bin_id)
|
183
188
|
|
189
|
+
end_point = "v1/analyse/"
|
190
|
+
|
184
191
|
if result and duplicate is False:
|
185
192
|
logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
186
193
|
bin_id)
|
187
|
-
raise
|
194
|
+
raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
|
188
195
|
|
189
196
|
filename = basename(fpath)
|
190
197
|
|
191
|
-
params = {"file_name": filename, "sha_256_hash": bin_id}
|
198
|
+
params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
|
199
|
+
|
200
|
+
if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
|
201
|
+
try:
|
202
|
+
debug = RE_upload(debug_fpath).json()
|
192
203
|
|
204
|
+
if debug["success"]:
|
205
|
+
params["debug_hash"] = debug["sha_256_hash"]
|
206
|
+
except HTTPError:
|
207
|
+
pass
|
208
|
+
|
193
209
|
for p_name in ("model_name", "isa_options", "platform_options", "file_options",
|
194
|
-
"dynamic_execution", "command_line_args", "
|
210
|
+
"dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
|
195
211
|
p_value = locals()[p_name]
|
196
212
|
|
197
213
|
if p_value:
|
198
214
|
params[p_name] = p_value
|
199
215
|
|
200
|
-
res = reveng_req(requests.post,
|
216
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
201
217
|
|
202
|
-
if res.
|
203
|
-
logger.info("Successfully submitted binary for analysis. %s - %s", fpath,
|
218
|
+
if res.ok:
|
219
|
+
logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
|
204
220
|
elif res.status_code == 400:
|
205
|
-
|
206
|
-
|
207
|
-
if "error" in response.keys():
|
208
|
-
logger.warning("Error analysing %s - %s", fpath, response["error"])
|
221
|
+
if "error" in res.json().keys():
|
222
|
+
logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
|
209
223
|
|
210
224
|
res.raise_for_status()
|
211
225
|
return res
|
212
226
|
|
213
227
|
|
214
|
-
def RE_upload(fpath: str) -> Response
|
228
|
+
def RE_upload(fpath: str) -> Response:
|
215
229
|
"""
|
216
230
|
Upload binary to Server
|
217
231
|
:param fpath: File path for binary to analyse
|
@@ -220,22 +234,27 @@ def RE_upload(fpath: str) -> Response | bool:
|
|
220
234
|
result = re_hash_check(bin_id)
|
221
235
|
|
222
236
|
if result:
|
223
|
-
logger.info("File %s - %s already
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
237
|
+
logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
|
238
|
+
|
239
|
+
res = Response()
|
240
|
+
res.status_code = 200
|
241
|
+
res.url = f"{re_conf['host']}/v1/upload"
|
242
|
+
res._content = ('{0}"success": true,'
|
243
|
+
'"message": "File already uploaded!",'
|
244
|
+
'"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
|
245
|
+
else:
|
246
|
+
with open(fpath, "rb") as fd:
|
247
|
+
res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
|
248
|
+
|
249
|
+
if res.ok:
|
250
|
+
logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
|
251
|
+
elif res.status_code == 400:
|
252
|
+
if "error" in res.json().keys():
|
253
|
+
logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
|
254
|
+
elif res.status_code == 413:
|
255
|
+
logger.warning("File too large. Please upload files under 10MB.")
|
256
|
+
elif res.status_code == 500:
|
257
|
+
logger.error("Internal Server Error. Please contact support. Skipping upload...")
|
239
258
|
|
240
259
|
res.raise_for_status()
|
241
260
|
return res
|
@@ -250,65 +269,14 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
|
250
269
|
bin_id = re_binary_id(fpath)
|
251
270
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
252
271
|
|
253
|
-
|
254
|
-
raise HTTPError(f"No matches found for hash: {bin_id}")
|
255
|
-
|
256
|
-
res = reveng_req(requests.get, f"embeddings/{bid}")
|
257
|
-
|
258
|
-
if res.status_code == 400:
|
259
|
-
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
260
|
-
bin_id)
|
261
|
-
|
262
|
-
res.raise_for_status()
|
263
|
-
return res
|
264
|
-
|
265
|
-
|
266
|
-
def RE_signature(fpath: str, binary_id: int = 0) -> Response:
|
267
|
-
"""
|
268
|
-
Fetch binary BinNet signature
|
269
|
-
:param fpath: File path for binary to analyse
|
270
|
-
:param binary_id: ID of binary
|
271
|
-
"""
|
272
|
-
bin_id = re_binary_id(fpath)
|
273
|
-
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
272
|
+
end_point = f"v1/embeddings/binary/{bid}"
|
274
273
|
|
275
274
|
if bid == -1:
|
276
|
-
raise
|
277
|
-
|
278
|
-
res = reveng_req(requests.get, f"signature/{bid}")
|
279
|
-
|
280
|
-
if res.status_code == 425:
|
281
|
-
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
282
|
-
bin_id)
|
283
|
-
|
284
|
-
res.raise_for_status()
|
285
|
-
return res
|
286
|
-
|
287
|
-
|
288
|
-
def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
|
289
|
-
model: str = None) -> Response:
|
290
|
-
"""
|
291
|
-
Fetch embedding for custom symbol range
|
292
|
-
:param fpath: File path for binary to analyse
|
293
|
-
:param start_vaddr: Start virtual address of the function to extract embeddings
|
294
|
-
:param end_vaddr: End virtual address of the function to extract embeddings
|
295
|
-
:param base_vaddr: Base address of the binary
|
296
|
-
:param model: Binary model name
|
297
|
-
"""
|
298
|
-
params = {}
|
275
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
299
276
|
|
300
|
-
|
301
|
-
params["end_vaddr"] = end_vaddr
|
302
|
-
if base_vaddr:
|
303
|
-
params["base_vaddr"] = base_vaddr
|
304
|
-
if model:
|
305
|
-
params["models"] = model
|
277
|
+
res: Response = reveng_req(requests.get, end_point)
|
306
278
|
|
307
|
-
|
308
|
-
|
309
|
-
res = reveng_req(requests.get, f"embedding/{bin_id}/{start_vaddr}", params=params)
|
310
|
-
|
311
|
-
if res.status_code == 425:
|
279
|
+
if res.status_code == 400:
|
312
280
|
logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
313
281
|
bin_id)
|
314
282
|
|
@@ -326,13 +294,15 @@ def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
|
|
326
294
|
bin_id = re_binary_id(fpath)
|
327
295
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
328
296
|
|
297
|
+
end_point = f"v1/logs/{bid}"
|
298
|
+
|
329
299
|
if bid == -1:
|
330
|
-
raise
|
300
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
331
301
|
|
332
|
-
res = reveng_req(requests.get,
|
302
|
+
res: Response = reveng_req(requests.get, end_point)
|
333
303
|
|
334
|
-
if res.
|
335
|
-
logger.info("Logs found for %s:\n%s", bin_id, res.
|
304
|
+
if res.ok and console:
|
305
|
+
logger.info("Logs found for %s:\n%s", bin_id, res.json()["logs"])
|
336
306
|
elif res.status_code == 404:
|
337
307
|
logger.warning("Error, logs not found for %s.", bin_id)
|
338
308
|
|
@@ -349,12 +319,14 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
|
349
319
|
bin_id = re_binary_id(fpath)
|
350
320
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
351
321
|
|
322
|
+
end_point = f"cves/{bid}"
|
323
|
+
|
352
324
|
if bid == -1:
|
353
|
-
raise
|
325
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
354
326
|
|
355
|
-
res = reveng_req(requests.get,
|
327
|
+
res: Response = reveng_req(requests.get, end_point)
|
356
328
|
|
357
|
-
if res.
|
329
|
+
if res.ok:
|
358
330
|
cves = json.loads(res.text)
|
359
331
|
logger.info("Checking for known CVEs embedded inside %s", fpath)
|
360
332
|
|
@@ -369,7 +341,7 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
|
|
369
341
|
return res
|
370
342
|
|
371
343
|
|
372
|
-
def RE_status(fpath: str, binary_id: int = 0) -> Response:
|
344
|
+
def RE_status(fpath: str, binary_id: int = 0, console: bool = False) -> Response:
|
373
345
|
"""
|
374
346
|
Get the status of an ongoing binary analysis
|
375
347
|
:param fpath: File path for binary to analyse
|
@@ -378,11 +350,15 @@ def RE_status(fpath: str, binary_id: int = 0) -> Response:
|
|
378
350
|
bin_id = re_binary_id(fpath)
|
379
351
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
380
352
|
|
353
|
+
end_point = f"v1/analyse/status/{bid}"
|
354
|
+
|
381
355
|
if bid == -1:
|
382
|
-
raise
|
356
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
383
357
|
|
384
|
-
res = reveng_req(requests.get,
|
358
|
+
res: Response = reveng_req(requests.get, end_point)
|
385
359
|
|
360
|
+
if res.ok and console:
|
361
|
+
logger.info("Binary analysis status: %s", res.json()["status"])
|
386
362
|
if res.status_code == 400:
|
387
363
|
logger.warning(" Error, status not found for %s.", bin_id)
|
388
364
|
|
@@ -408,62 +384,77 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
408
384
|
|
409
385
|
# create json similarity object
|
410
386
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
411
|
-
json_sims = [{"similaritiy": float(d[0]),
|
412
|
-
"
|
387
|
+
json_sims = [{"similaritiy": float(d[0]),
|
388
|
+
"vaddr": int(df.iloc[v]["vaddr"]),
|
389
|
+
"name": str(df.iloc[v]["name"]),
|
390
|
+
"size": int(df.iloc[v]["size"])
|
391
|
+
} for d, v in similarities]
|
413
392
|
return json_sims
|
414
393
|
|
415
394
|
|
416
|
-
def
|
417
|
-
|
418
|
-
distance: float = 0.0, debug_enabled: bool = False) -> Response:
|
395
|
+
def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
|
396
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
419
397
|
"""
|
420
|
-
Get
|
421
|
-
:param
|
422
|
-
:param model_name: Binary model name
|
398
|
+
Get nearest functions to a passed function ids
|
399
|
+
:param function_ids: List of function ids
|
423
400
|
:param nns: Number of nearest neighbors
|
424
401
|
:param collections: List of collections RevEng.AI collection names to search through
|
425
|
-
:param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
|
426
402
|
:param distance: How close we want the ANN search to filter for
|
427
403
|
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
428
404
|
"""
|
429
|
-
params = {"
|
405
|
+
params = {"function_id_list": function_ids,
|
406
|
+
"result_per_function": nns,
|
407
|
+
"debug_mode": debug_enabled,
|
408
|
+
"distance": distance,}
|
430
409
|
|
431
|
-
if collections
|
410
|
+
if collections:
|
432
411
|
# api param is collection, not collections
|
433
|
-
params["collection"] =
|
434
|
-
|
435
|
-
if ignore_hashes and len(ignore_hashes) > 0:
|
436
|
-
params["ignore_hashes"] = ignore_hashes
|
412
|
+
params["collection"] = collections
|
437
413
|
|
438
|
-
|
439
|
-
params["distance"] = distance
|
440
|
-
|
441
|
-
res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
|
414
|
+
res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
|
442
415
|
|
443
416
|
res.raise_for_status()
|
444
417
|
return res
|
445
418
|
|
446
419
|
|
447
|
-
def
|
448
|
-
|
420
|
+
def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
|
421
|
+
distance: float = 0.1, debug_enabled: bool = False) -> Response:
|
449
422
|
"""
|
450
|
-
Get
|
451
|
-
:param
|
452
|
-
:param
|
423
|
+
Get the nearest functions
|
424
|
+
:param fpath: File path for binary to analyse
|
425
|
+
:param binary_id: ID of binary
|
453
426
|
:param nns: Number of nearest neighbors
|
454
|
-
:param
|
455
|
-
:param
|
427
|
+
:param distance: How close we want the ANN search to filter for
|
428
|
+
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
456
429
|
"""
|
457
|
-
|
430
|
+
bin_id = re_binary_id(fpath)
|
431
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
458
432
|
|
459
|
-
|
460
|
-
|
461
|
-
|
433
|
+
end_point = f"v1/ann/symbol/{bid}"
|
434
|
+
|
435
|
+
if bid == -1:
|
436
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
437
|
+
|
438
|
+
params = {"result_per_function": nns,
|
439
|
+
"debug_mode": debug_enabled,
|
440
|
+
"distance": distance, }
|
441
|
+
|
442
|
+
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
443
|
+
|
444
|
+
res.raise_for_status()
|
445
|
+
return res
|
446
|
+
|
447
|
+
|
448
|
+
def RE_analyze_functions(fpath: str, binary_id: int = 0) -> Response:
|
449
|
+
bin_id = re_binary_id(fpath)
|
450
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
451
|
+
|
452
|
+
end_point = f"v1/analyse/functions/{bid}"
|
462
453
|
|
463
|
-
if
|
464
|
-
|
454
|
+
if bid == -1:
|
455
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
465
456
|
|
466
|
-
res = reveng_req(requests.
|
457
|
+
res: Response = reveng_req(requests.get, end_point)
|
467
458
|
|
468
459
|
res.raise_for_status()
|
469
460
|
return res
|
@@ -478,10 +469,12 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
|
478
469
|
bin_id = re_binary_id(fpath)
|
479
470
|
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
480
471
|
|
472
|
+
end_point = f"sboms/{bid}"
|
473
|
+
|
481
474
|
if bid == -1:
|
482
|
-
raise
|
475
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
483
476
|
|
484
|
-
res = reveng_req(requests.get,
|
477
|
+
res: Response = reveng_req(requests.get, end_point)
|
485
478
|
|
486
479
|
logger.info("SBOM for %s:\n%s", fpath, res.text)
|
487
480
|
|
@@ -495,9 +488,10 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
|
495
488
|
:param function_id: ID of a function
|
496
489
|
:param new_name: New function name
|
497
490
|
"""
|
498
|
-
res = reveng_req(requests.post, f"functions/rename/{function_id}",
|
491
|
+
res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
|
492
|
+
json_data={"new_name": new_name})
|
499
493
|
|
500
|
-
if res.
|
494
|
+
if res.ok:
|
501
495
|
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
502
496
|
else:
|
503
497
|
logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
|
@@ -506,66 +500,115 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
|
506
500
|
return res
|
507
501
|
|
508
502
|
|
503
|
+
def RE_settings() -> Response:
|
504
|
+
"""
|
505
|
+
Get the configuration settings
|
506
|
+
"""
|
507
|
+
res: Response = reveng_req(requests.get, "v1/config")
|
508
|
+
|
509
|
+
res.raise_for_status()
|
510
|
+
return res
|
511
|
+
|
512
|
+
|
513
|
+
def RE_health() -> bool:
|
514
|
+
"""
|
515
|
+
Health check & verify access to the API
|
516
|
+
"""
|
517
|
+
res: Response = reveng_req(requests.get, "v1")
|
518
|
+
|
519
|
+
success = res.json()["success"]
|
520
|
+
|
521
|
+
if success:
|
522
|
+
logger.info(res.json()["message"])
|
523
|
+
else:
|
524
|
+
logger.warning(res.json()["error"])
|
525
|
+
return success
|
526
|
+
|
527
|
+
|
528
|
+
def RE_authentication() -> Response:
|
529
|
+
"""
|
530
|
+
Authentication Check
|
531
|
+
"""
|
532
|
+
res: Response = reveng_req(requests.get, "v1/authenticate")
|
533
|
+
|
534
|
+
res.raise_for_status()
|
535
|
+
return res
|
536
|
+
|
537
|
+
|
509
538
|
def re_binary_id(fpath: str) -> str:
|
510
539
|
"""
|
511
540
|
Take the SHA-256 hash of binary file
|
512
541
|
:param fpath: File path for binary to analyse
|
513
542
|
"""
|
514
|
-
if
|
515
|
-
|
543
|
+
if fpath and isfile(fpath) and access(fpath, R_OK):
|
544
|
+
hf = sha256()
|
516
545
|
|
517
|
-
|
546
|
+
with open(fpath, "rb") as fd:
|
547
|
+
c = fd.read()
|
548
|
+
hf.update(c)
|
518
549
|
|
519
|
-
|
520
|
-
|
521
|
-
|
550
|
+
return hf.hexdigest()
|
551
|
+
else:
|
552
|
+
logger.error("File '%s' doesn't exist or isn't readable", fpath)
|
522
553
|
|
523
|
-
return
|
554
|
+
return "undefined"
|
524
555
|
|
525
556
|
|
526
|
-
def _binary_isa(
|
557
|
+
def _binary_isa(binary: Binary, exec_type: str) -> str:
|
527
558
|
"""
|
528
559
|
Get ISA format
|
529
560
|
"""
|
530
|
-
if exec_type == "
|
531
|
-
|
561
|
+
if exec_type == "ELF":
|
562
|
+
arch = binary.header.machine_type
|
532
563
|
|
533
|
-
if
|
564
|
+
if arch == ELF.ARCH.i386:
|
534
565
|
return "x86"
|
535
|
-
elif
|
566
|
+
elif arch == ELF.ARCH.x86_64:
|
536
567
|
return "x86_64"
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
568
|
+
elif arch == ELF.ARCH.ARM:
|
569
|
+
return "ARM32"
|
570
|
+
elif arch == ELF.ARCH.AARCH64:
|
571
|
+
return "ARM64"
|
572
|
+
elif exec_type == "PE":
|
573
|
+
machine_type = binary.header.machine
|
574
|
+
|
575
|
+
if machine_type == PE.Header.MACHINE_TYPES.I386:
|
541
576
|
return "x86"
|
542
|
-
elif machine_type == PE.MACHINE_TYPES.AMD64:
|
577
|
+
elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
|
543
578
|
return "x86_64"
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
579
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM:
|
580
|
+
return "ARM32"
|
581
|
+
elif machine_type == PE.Header.MACHINE_TYPES.ARM64:
|
582
|
+
return "ARM64"
|
583
|
+
elif exec_type == "Mach-O":
|
584
|
+
cpu_type = binary.header.cpu_type
|
585
|
+
|
586
|
+
if cpu_type == MachO.CPU_TYPES.x86:
|
548
587
|
return "x86"
|
549
|
-
elif
|
588
|
+
elif cpu_type == MachO.CPU_TYPES.x86_64:
|
550
589
|
return "x86_64"
|
590
|
+
elif cpu_type == MachO.CPU_TYPES.ARM:
|
591
|
+
return "ARM32"
|
592
|
+
elif cpu_type == MachO.CPU_TYPES.ARM64:
|
593
|
+
return "ARM64"
|
551
594
|
|
552
|
-
logger.error("Error,
|
553
|
-
raise RuntimeError(f"Error,
|
595
|
+
logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
|
596
|
+
raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
|
554
597
|
|
555
598
|
|
556
|
-
def _binary_format(
|
599
|
+
def _binary_format(binary: Binary) -> str:
|
557
600
|
"""
|
558
601
|
Get executable file format
|
559
602
|
"""
|
560
|
-
if
|
561
|
-
return "
|
562
|
-
if
|
563
|
-
return "
|
564
|
-
if
|
565
|
-
return "
|
603
|
+
if binary.format == Binary.FORMATS.PE:
|
604
|
+
return "PE"
|
605
|
+
if binary.format == Binary.FORMATS.ELF:
|
606
|
+
return "ELF"
|
607
|
+
if binary.format == Binary.FORMATS.MACHO:
|
608
|
+
return "Mach-O"
|
566
609
|
|
567
|
-
logger.error("Error, could not determine binary format: %s.",
|
568
|
-
raise RuntimeError("Error, could not determine binary format.")
|
610
|
+
logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
|
611
|
+
raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
|
569
612
|
|
570
613
|
|
571
614
|
def file_type(fpath: str) -> tuple[str, str]:
|
@@ -575,24 +618,31 @@ def file_type(fpath: str) -> tuple[str, str]:
|
|
575
618
|
"""
|
576
619
|
binary = parse(fpath)
|
577
620
|
|
578
|
-
|
579
|
-
|
580
|
-
|
621
|
+
if not binary:
|
622
|
+
file_format = isa_format = "Unknown format"
|
623
|
+
else:
|
624
|
+
# handle PE and ELF files
|
625
|
+
file_format = _binary_format(binary)
|
626
|
+
isa_format = _binary_isa(binary, file_format)
|
581
627
|
|
582
|
-
return file_format,
|
628
|
+
return file_format, isa_format
|
583
629
|
|
584
630
|
|
585
631
|
def parse_config() -> None:
|
586
632
|
"""
|
587
633
|
Parse ~/.reait.toml config file
|
588
634
|
"""
|
589
|
-
|
590
|
-
|
591
|
-
|
635
|
+
fpath = expanduser("~/.reait.toml")
|
636
|
+
|
637
|
+
if isfile(fpath) and access(fpath, R_OK):
|
638
|
+
with open(fpath) as fd:
|
639
|
+
config = tomli.loads(fd.read())
|
592
640
|
|
593
|
-
for key in ("apikey", "host", "model"):
|
641
|
+
for key in ("apikey", "host", "model",):
|
594
642
|
if key in config:
|
595
643
|
re_conf[key] = config[key]
|
644
|
+
else:
|
645
|
+
logger.info("File %s doesn't exist or isn't readable", fpath)
|
596
646
|
|
597
647
|
|
598
648
|
def angular_distance(x, y) -> float:
|
@@ -600,5 +650,5 @@ def angular_distance(x, y) -> float:
|
|
600
650
|
Compute angular distance between two embedding vectors
|
601
651
|
Normalised euclidian distance
|
602
652
|
"""
|
603
|
-
cos = dot(x, y) / ((dot(x, x) * dot(y, y))
|
653
|
+
cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
|
604
654
|
return 1.0 - arccos(cos) / pi
|