reait 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +0 -1
- reait/api.py +603 -96
- reait/main.py +353 -109
- {reait-1.0.0.dist-info → reait-1.1.0.dist-info}/METADATA +18 -13
- reait-1.1.0.dist-info/RECORD +9 -0
- {reait-1.0.0.dist-info → reait-1.1.0.dist-info}/WHEEL +1 -1
- reait-1.0.0.dist-info/RECORD +0 -9
- {reait-1.0.0.dist-info → reait-1.1.0.dist-info}/entry_points.txt +0 -0
- {reait-1.0.0.dist-info → reait-1.1.0.dist-info/licenses}/LICENSE +0 -0
- {reait-1.0.0.dist-info → reait-1.1.0.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,28 +1,25 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
1
|
from __future__ import print_function, annotations
|
3
2
|
|
3
|
+
from os import access, R_OK, environ
|
4
|
+
from os.path import basename, isfile, expanduser, getsize
|
5
|
+
|
4
6
|
import json
|
5
|
-
import tomli
|
6
7
|
import logging
|
7
8
|
import requests
|
8
|
-
|
9
|
-
from hashlib import sha256
|
9
|
+
import tomli
|
10
10
|
from datetime import datetime
|
11
|
-
|
12
|
-
from
|
13
|
-
from os import access, R_OK
|
14
|
-
from os.path import basename, isfile, expanduser, getsize
|
15
|
-
from requests import request, Response, HTTPError
|
11
|
+
from hashlib import sha256
|
12
|
+
from lief import parse, Binary, ELF, PE, MachO
|
16
13
|
from numpy import array, vstack, dot, arccos, pi
|
17
14
|
from pandas import DataFrame
|
18
|
-
from
|
15
|
+
from requests import request, Response, HTTPError
|
16
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
19
17
|
|
20
|
-
__version__ = "1.
|
18
|
+
__version__ = "1.1.0"
|
21
19
|
|
22
20
|
re_conf = {
|
23
|
-
"apikey": "
|
24
|
-
"host": "https://api.reveng.ai",
|
25
|
-
"model": "binnet-0.3-x86",
|
21
|
+
"apikey": environ.get("REAI_API_KEY", ""),
|
22
|
+
"host": environ.get("REAI_API_HOST", "https://api.reveng.ai"),
|
26
23
|
}
|
27
24
|
|
28
25
|
|
@@ -36,16 +33,28 @@ class ReaitError(HTTPError):
|
|
36
33
|
response.reason = reason
|
37
34
|
response.status_code = 404
|
38
35
|
response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
|
39
|
-
response.url =
|
36
|
+
response.url = (
|
37
|
+
f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
|
38
|
+
if end_point
|
39
|
+
else None
|
40
|
+
)
|
40
41
|
|
41
42
|
super().__init__(reason, response=response)
|
42
43
|
|
43
44
|
|
44
|
-
def reveng_req(
|
45
|
-
|
45
|
+
def reveng_req(
|
46
|
+
req: request,
|
47
|
+
end_point: str,
|
48
|
+
data: dict = None,
|
49
|
+
ex_headers: dict = None,
|
50
|
+
params: dict = None,
|
51
|
+
json_data: dict = None,
|
52
|
+
timeout: int = 60,
|
53
|
+
files: dict = None,
|
54
|
+
) -> Response:
|
46
55
|
"""
|
47
56
|
Constructs and sends a Request
|
48
|
-
:param
|
57
|
+
:param req: Method for the new Request
|
49
58
|
:param end_point: Endpoint to add to the base URL
|
50
59
|
:param ex_headers: Extended HTTP headers to add
|
51
60
|
:param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
|
@@ -60,22 +69,48 @@ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict =
|
|
60
69
|
if ex_headers:
|
61
70
|
headers.update(ex_headers)
|
62
71
|
|
63
|
-
logger.debug(
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
72
|
+
logger.debug(
|
73
|
+
"Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
|
74
|
+
req.__name__.upper(),
|
75
|
+
url,
|
76
|
+
headers,
|
77
|
+
data,
|
78
|
+
json_data,
|
79
|
+
params,
|
80
|
+
files,
|
81
|
+
)
|
82
|
+
|
83
|
+
response: Response = req(
|
84
|
+
url,
|
85
|
+
headers=headers,
|
86
|
+
json=json_data,
|
87
|
+
data=data,
|
88
|
+
params=params,
|
89
|
+
timeout=timeout,
|
90
|
+
files=files,
|
91
|
+
)
|
92
|
+
|
93
|
+
logger.debug(
|
94
|
+
"Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
95
|
+
req.__name__.upper(),
|
96
|
+
url,
|
97
|
+
response.headers,
|
98
|
+
response.status_code,
|
99
|
+
response.text,
|
100
|
+
)
|
70
101
|
|
71
102
|
return response
|
72
103
|
|
73
104
|
|
74
105
|
def re_hash_check(bin_id: str) -> bool:
|
75
|
-
res: Response = reveng_req(
|
106
|
+
res: Response = reveng_req(
|
107
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
108
|
+
)
|
76
109
|
|
77
110
|
if res.ok:
|
78
|
-
return any(
|
111
|
+
return any(
|
112
|
+
binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"]
|
113
|
+
)
|
79
114
|
else:
|
80
115
|
logger.warning("Bad Request: %s", res.text)
|
81
116
|
|
@@ -86,37 +121,63 @@ def re_hash_check(bin_id: str) -> bool:
|
|
86
121
|
# Assumes a file has been passed, correct hash only
|
87
122
|
# Returns the BID of the binary_id (hash)
|
88
123
|
def re_bid_search(bin_id: str) -> int:
|
89
|
-
res: Response = reveng_req(
|
124
|
+
res: Response = reveng_req(
|
125
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
126
|
+
)
|
90
127
|
|
91
128
|
bid = -1
|
92
129
|
|
93
130
|
if res.ok:
|
94
131
|
# Filter the result who matches the SHA-256
|
95
|
-
binaries = list(
|
132
|
+
binaries = list(
|
133
|
+
filter(
|
134
|
+
lambda binary: binary["sha_256_hash"] == bin_id,
|
135
|
+
res.json()["query_results"],
|
136
|
+
)
|
137
|
+
)
|
96
138
|
|
97
139
|
# Check only one record is returned
|
98
140
|
if len(binaries) == 1:
|
99
141
|
binary = binaries[0]
|
100
142
|
bid = binary["binary_id"]
|
101
143
|
|
102
|
-
logger.info(
|
103
|
-
|
144
|
+
logger.info(
|
145
|
+
"Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
146
|
+
bid,
|
147
|
+
binary["binary_name"],
|
148
|
+
binary["creation"],
|
149
|
+
binary["model_name"],
|
150
|
+
binary["status"],
|
151
|
+
)
|
104
152
|
elif len(binaries) > 1:
|
105
|
-
binaries.sort(
|
153
|
+
binaries.sort(
|
154
|
+
key=lambda binary: datetime.fromisoformat(
|
155
|
+
binary["creation"]
|
156
|
+
).timestamp(),
|
157
|
+
reverse=True,
|
158
|
+
)
|
106
159
|
|
107
160
|
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
108
161
|
|
109
162
|
options_dict = {}
|
110
163
|
|
111
164
|
for idx, binary in enumerate(binaries):
|
112
|
-
logger.info(
|
113
|
-
|
114
|
-
|
165
|
+
logger.info(
|
166
|
+
"[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
167
|
+
idx,
|
168
|
+
binary["binary_id"],
|
169
|
+
binary["binary_name"],
|
170
|
+
binary["creation"],
|
171
|
+
binary["model_name"],
|
172
|
+
binary["status"],
|
173
|
+
)
|
115
174
|
|
116
175
|
options_dict[idx] = binary["binary_id"]
|
117
176
|
|
118
177
|
try:
|
119
|
-
user_input = input(
|
178
|
+
user_input = input(
|
179
|
+
"[+] Please enter the option you want to use for this operation:"
|
180
|
+
)
|
120
181
|
|
121
182
|
option_number = int(user_input)
|
122
183
|
|
@@ -157,16 +218,32 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
157
218
|
elif res.status_code == 404:
|
158
219
|
logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
|
159
220
|
else:
|
160
|
-
logger.error(
|
221
|
+
logger.error(
|
222
|
+
"Error deleting binary %s under. Server returned %d.",
|
223
|
+
bin_id,
|
224
|
+
res.status_code,
|
225
|
+
)
|
161
226
|
|
162
227
|
res.raise_for_status()
|
163
228
|
return res
|
164
229
|
|
165
230
|
|
166
|
-
def RE_analyse(
|
167
|
-
|
168
|
-
|
169
|
-
|
231
|
+
def RE_analyse(
|
232
|
+
fpath: str,
|
233
|
+
model_name: str = None,
|
234
|
+
isa_options: str = None,
|
235
|
+
platform_options: str = None,
|
236
|
+
file_options: str = None,
|
237
|
+
dynamic_execution: bool = False,
|
238
|
+
command_line_args: str = None,
|
239
|
+
binary_scope: str = None,
|
240
|
+
tags: list = None,
|
241
|
+
priority: int = 0,
|
242
|
+
duplicate: bool = False,
|
243
|
+
symbols: dict = None,
|
244
|
+
debug_fpath: str = None,
|
245
|
+
skip_scraping: bool = False,
|
246
|
+
) -> Response:
|
170
247
|
"""
|
171
248
|
Start analysis job for binary file
|
172
249
|
:param fpath: File path for binary to analyse
|
@@ -182,6 +259,7 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
182
259
|
:param duplicate: Duplicate an existing binary
|
183
260
|
:param symbols: JSON object containing the base address and the list of functions
|
184
261
|
:param debug_fpath: File path for debug file
|
262
|
+
:param skip_scraping: Disable/Enable auto-tagging of binary sample in relevant APIs
|
185
263
|
"""
|
186
264
|
bin_id = re_binary_id(fpath)
|
187
265
|
result = re_hash_check(bin_id)
|
@@ -189,13 +267,19 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
189
267
|
end_point = "v1/analyse/"
|
190
268
|
|
191
269
|
if result and duplicate is False:
|
192
|
-
logger.error(
|
193
|
-
|
270
|
+
logger.error(
|
271
|
+
"Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
272
|
+
bin_id,
|
273
|
+
)
|
194
274
|
raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
|
195
275
|
|
196
276
|
filename = basename(fpath)
|
197
277
|
|
198
|
-
params = {
|
278
|
+
params = {
|
279
|
+
"file_name": filename,
|
280
|
+
"size_in_bytes": getsize(fpath),
|
281
|
+
"sha_256_hash": bin_id,
|
282
|
+
}
|
199
283
|
|
200
284
|
if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
|
201
285
|
try:
|
@@ -205,18 +289,30 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
205
289
|
params["debug_hash"] = debug["sha_256_hash"]
|
206
290
|
except HTTPError:
|
207
291
|
pass
|
208
|
-
|
209
|
-
for p_name in (
|
210
|
-
|
292
|
+
|
293
|
+
for p_name in (
|
294
|
+
"model_name",
|
295
|
+
"isa_options",
|
296
|
+
"platform_options",
|
297
|
+
"file_options",
|
298
|
+
"dynamic_execution",
|
299
|
+
"command_line_args",
|
300
|
+
"binary_scope",
|
301
|
+
"tags",
|
302
|
+
"priority",
|
303
|
+
"symbols",
|
304
|
+
"skip_scraping",
|
305
|
+
):
|
211
306
|
p_value = locals()[p_name]
|
212
307
|
|
213
308
|
if p_value:
|
214
309
|
params[p_name] = p_value
|
215
310
|
|
216
311
|
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
217
|
-
|
218
312
|
if res.ok:
|
219
|
-
logger.info(
|
313
|
+
logger.info(
|
314
|
+
"Successfully submitted binary for analysis. %s - %s", fpath, bin_id
|
315
|
+
)
|
220
316
|
elif res.status_code == 400:
|
221
317
|
if "error" in res.json().keys():
|
222
318
|
logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
|
@@ -239,22 +335,32 @@ def RE_upload(fpath: str) -> Response:
|
|
239
335
|
res = Response()
|
240
336
|
res.status_code = 200
|
241
337
|
res.url = f"{re_conf['host']}/v1/upload"
|
242
|
-
res._content = (
|
243
|
-
|
244
|
-
|
338
|
+
res._content = (
|
339
|
+
(
|
340
|
+
'{0}"success": true,'
|
341
|
+
'"message": "File already uploaded!",'
|
342
|
+
'"sha_256_hash": "{1}"{2}'
|
343
|
+
)
|
344
|
+
.format("{", bin_id, "}")
|
345
|
+
.encode()
|
346
|
+
)
|
245
347
|
else:
|
246
348
|
with open(fpath, "rb") as fd:
|
247
349
|
res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
|
248
350
|
|
249
351
|
if res.ok:
|
250
|
-
logger.info(
|
352
|
+
logger.info(
|
353
|
+
"Successfully uploaded binary to your account. %s - %s", fpath, bin_id
|
354
|
+
)
|
251
355
|
elif res.status_code == 400:
|
252
356
|
if "error" in res.json().keys():
|
253
357
|
logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
|
254
358
|
elif res.status_code == 413:
|
255
359
|
logger.warning("File too large. Please upload files under 10MB.")
|
256
360
|
elif res.status_code == 500:
|
257
|
-
logger.error(
|
361
|
+
logger.error(
|
362
|
+
"Internal Server Error. Please contact support. Skipping upload..."
|
363
|
+
)
|
258
364
|
|
259
365
|
res.raise_for_status()
|
260
366
|
return res
|
@@ -277,8 +383,10 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
|
277
383
|
res: Response = reveng_req(requests.get, end_point)
|
278
384
|
|
279
385
|
if res.status_code == 400:
|
280
|
-
logger.warning(
|
281
|
-
|
386
|
+
logger.warning(
|
387
|
+
"Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
388
|
+
bin_id,
|
389
|
+
)
|
282
390
|
|
283
391
|
res.raise_for_status()
|
284
392
|
return res
|
@@ -376,7 +484,11 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
376
484
|
df = DataFrame(data=embeddings)
|
377
485
|
np_embedding = array(embedding).reshape(1, -1)
|
378
486
|
source_embeddings = vstack(df["embedding"].values)
|
379
|
-
closest =
|
487
|
+
closest = (
|
488
|
+
cosine_similarity(source_embeddings, np_embedding)
|
489
|
+
.squeeze()
|
490
|
+
.argsort()[::-1][:nns]
|
491
|
+
)
|
380
492
|
distances = cosine_similarity(source_embeddings[closest], np_embedding)
|
381
493
|
|
382
494
|
# match closest embeddings with similarity
|
@@ -384,16 +496,25 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
384
496
|
|
385
497
|
# create json similarity object
|
386
498
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
387
|
-
json_sims = [
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
499
|
+
json_sims = [
|
500
|
+
{
|
501
|
+
"similaritiy": float(d[0]),
|
502
|
+
"vaddr": int(df.iloc[v]["vaddr"]),
|
503
|
+
"name": str(df.iloc[v]["name"]),
|
504
|
+
"size": int(df.iloc[v]["size"]),
|
505
|
+
}
|
506
|
+
for d, v in similarities
|
507
|
+
]
|
392
508
|
return json_sims
|
393
509
|
|
394
510
|
|
395
|
-
def RE_nearest_symbols_batch(
|
396
|
-
|
511
|
+
def RE_nearest_symbols_batch(
|
512
|
+
function_ids: list[int],
|
513
|
+
nns: int = 5,
|
514
|
+
collections: list[str] = None,
|
515
|
+
distance: float = 0.1,
|
516
|
+
debug_enabled: bool = False,
|
517
|
+
) -> Response:
|
397
518
|
"""
|
398
519
|
Get nearest functions to a passed function ids
|
399
520
|
:param function_ids: List of function ids
|
@@ -402,10 +523,12 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
|
|
402
523
|
:param distance: How close we want the ANN search to filter for
|
403
524
|
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
404
525
|
"""
|
405
|
-
params = {
|
406
|
-
|
407
|
-
|
408
|
-
|
526
|
+
params = {
|
527
|
+
"function_id_list": function_ids,
|
528
|
+
"result_per_function": nns,
|
529
|
+
"debug_mode": debug_enabled,
|
530
|
+
"distance": distance,
|
531
|
+
}
|
409
532
|
|
410
533
|
if collections:
|
411
534
|
# api param is collection, not collections
|
@@ -417,8 +540,13 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
|
|
417
540
|
return res
|
418
541
|
|
419
542
|
|
420
|
-
def RE_nearest_functions(
|
421
|
-
|
543
|
+
def RE_nearest_functions(
|
544
|
+
fpath: str,
|
545
|
+
binary_id: int = 0,
|
546
|
+
nns: int = 5,
|
547
|
+
distance: float = 0.1,
|
548
|
+
debug_enabled: bool = False,
|
549
|
+
) -> Response:
|
422
550
|
"""
|
423
551
|
Get the nearest functions
|
424
552
|
:param fpath: File path for binary to analyse
|
@@ -435,9 +563,11 @@ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
|
|
435
563
|
if bid == -1:
|
436
564
|
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
437
565
|
|
438
|
-
params = {
|
439
|
-
|
440
|
-
|
566
|
+
params = {
|
567
|
+
"result_per_function": nns,
|
568
|
+
"debug_mode": debug_enabled,
|
569
|
+
"distance": distance,
|
570
|
+
}
|
441
571
|
|
442
572
|
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
443
573
|
|
@@ -482,14 +612,47 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
|
482
612
|
return res
|
483
613
|
|
484
614
|
|
615
|
+
def RE_binary_additonal_details(fpath: str, binary_id: int = None) -> Response:
|
616
|
+
bin_id = re_binary_id(fpath)
|
617
|
+
bid = re_bid_search(bin_id) if binary_id is None else binary_id
|
618
|
+
if bid == -1:
|
619
|
+
raise ReaitError(f"No matches found for hash: {bin_id}")
|
620
|
+
|
621
|
+
endpoint = f"v2/binaries/{bid}/additional-details"
|
622
|
+
res: Response = reveng_req(requests.get, endpoint)
|
623
|
+
res.raise_for_status()
|
624
|
+
|
625
|
+
logger.info(f"Additional Details Info({fpath}):\n")
|
626
|
+
logger.info(f"\n{json.dumps(res.json(), indent=4)}")
|
627
|
+
return res
|
628
|
+
|
629
|
+
|
630
|
+
def RE_binary_details(fpath: str, binary_id: int = None) -> Response:
|
631
|
+
bin_id = re_binary_id(fpath)
|
632
|
+
bid = re_bid_search(bin_id) if binary_id is None else binary_id
|
633
|
+
if bid == -1:
|
634
|
+
raise ReaitError(f"No matches found for hash: {bin_id}")
|
635
|
+
|
636
|
+
endpoint = f"v2/binaries/{bid}/details"
|
637
|
+
res: Response = reveng_req(requests.get, endpoint)
|
638
|
+
res.raise_for_status()
|
639
|
+
|
640
|
+
logger.info(f"Details Info({fpath}):\n")
|
641
|
+
logger.info(f"\n{json.dumps(res.json(), indent=4)}")
|
642
|
+
return res
|
643
|
+
|
644
|
+
|
485
645
|
def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
486
646
|
"""
|
487
647
|
Send the new name of a function to C2
|
488
648
|
:param function_id: ID of a function
|
489
649
|
:param new_name: New function name
|
490
650
|
"""
|
491
|
-
res: Response = reveng_req(
|
492
|
-
|
651
|
+
res: Response = reveng_req(
|
652
|
+
requests.post,
|
653
|
+
f"v1/functions/rename/{function_id}",
|
654
|
+
json_data={"new_name": new_name},
|
655
|
+
)
|
493
656
|
|
494
657
|
if res.ok:
|
495
658
|
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
@@ -500,6 +663,29 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
|
500
663
|
return res
|
501
664
|
|
502
665
|
|
666
|
+
def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
|
667
|
+
"""
|
668
|
+
Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
|
669
|
+
:param mapping: dictionary containing the function_id as key and function_name as value
|
670
|
+
"""
|
671
|
+
params = {
|
672
|
+
"new_name_mapping": [
|
673
|
+
{
|
674
|
+
"function_id": func_id,
|
675
|
+
"function_name": func_name,
|
676
|
+
}
|
677
|
+
for func_id, func_name in mapping.items()
|
678
|
+
]
|
679
|
+
}
|
680
|
+
|
681
|
+
res: Response = reveng_req(
|
682
|
+
requests.post, "v1/functions/batch/rename", json_data=params
|
683
|
+
)
|
684
|
+
|
685
|
+
res.raise_for_status()
|
686
|
+
return res
|
687
|
+
|
688
|
+
|
503
689
|
def RE_settings() -> Response:
|
504
690
|
"""
|
505
691
|
Get the configuration settings
|
@@ -535,6 +721,57 @@ def RE_authentication() -> Response:
|
|
535
721
|
return res
|
536
722
|
|
537
723
|
|
724
|
+
def RE_functions_list(
|
725
|
+
analysis_id: int,
|
726
|
+
search_term: str = "",
|
727
|
+
min_v_address: int = 0,
|
728
|
+
max_v_address: int = 0,
|
729
|
+
) -> Response:
|
730
|
+
"""
|
731
|
+
Get the functions of a binary
|
732
|
+
:param binary_id: Binary ID
|
733
|
+
"""
|
734
|
+
params = {}
|
735
|
+
if search_term:
|
736
|
+
params["search_term"] = search_term
|
737
|
+
|
738
|
+
if min_v_address != 0:
|
739
|
+
params["min_v_address"] = min_v_address
|
740
|
+
|
741
|
+
if max_v_address != 0:
|
742
|
+
params["max_v_address"] = max_v_address
|
743
|
+
|
744
|
+
res: Response = reveng_req(
|
745
|
+
requests.get, f"v2/analyses/{analysis_id}/info/functions/list", params=params
|
746
|
+
)
|
747
|
+
|
748
|
+
res.raise_for_status()
|
749
|
+
|
750
|
+
return res
|
751
|
+
|
752
|
+
|
753
|
+
def RE_function_callers_callees(function: int) -> Response:
|
754
|
+
"""
|
755
|
+
Get the callers and callees of a functions
|
756
|
+
:param function: Function ID
|
757
|
+
"""
|
758
|
+
res: Response = reveng_req(requests.get, f"v2/functions/{function}/callees_callers")
|
759
|
+
|
760
|
+
res.raise_for_status()
|
761
|
+
return res
|
762
|
+
|
763
|
+
|
764
|
+
def RE_analysis_info(analysis_id: int) -> Response:
|
765
|
+
"""
|
766
|
+
Get the analysis information
|
767
|
+
:param analysis_id: Analysis ID
|
768
|
+
"""
|
769
|
+
res: Response = reveng_req(requests.get, f"v2/analyses/{analysis_id}/info/basic")
|
770
|
+
|
771
|
+
res.raise_for_status()
|
772
|
+
return res
|
773
|
+
|
774
|
+
|
538
775
|
def re_binary_id(fpath: str) -> str:
|
539
776
|
"""
|
540
777
|
Take the SHA-256 hash of binary file
|
@@ -549,9 +786,7 @@ def re_binary_id(fpath: str) -> str:
|
|
549
786
|
|
550
787
|
return hf.hexdigest()
|
551
788
|
else:
|
552
|
-
|
553
|
-
|
554
|
-
return "undefined"
|
789
|
+
return fpath
|
555
790
|
|
556
791
|
|
557
792
|
def _binary_isa(binary: Binary, exec_type: str) -> str:
|
@@ -560,10 +795,9 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
560
795
|
"""
|
561
796
|
if exec_type == "ELF":
|
562
797
|
arch = binary.header.machine_type
|
563
|
-
|
564
|
-
if arch == ELF.ARCH.i386:
|
798
|
+
if arch == ELF.ARCH.I386:
|
565
799
|
return "x86"
|
566
|
-
elif arch == ELF.ARCH.
|
800
|
+
elif arch == ELF.ARCH.X86_64:
|
567
801
|
return "x86_64"
|
568
802
|
elif arch == ELF.ARCH.ARM:
|
569
803
|
return "ARM32"
|
@@ -571,7 +805,6 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
571
805
|
return "ARM64"
|
572
806
|
elif exec_type == "PE":
|
573
807
|
machine_type = binary.header.machine
|
574
|
-
|
575
808
|
if machine_type == PE.Header.MACHINE_TYPES.I386:
|
576
809
|
return "x86"
|
577
810
|
elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
|
@@ -583,17 +816,23 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
583
816
|
elif exec_type == "Mach-O":
|
584
817
|
cpu_type = binary.header.cpu_type
|
585
818
|
|
586
|
-
if cpu_type == MachO.
|
819
|
+
if cpu_type == MachO.Header.CPU_TYPE.X86:
|
587
820
|
return "x86"
|
588
|
-
elif cpu_type == MachO.
|
821
|
+
elif cpu_type == MachO.Header.CPU_TYPE.X86_64:
|
589
822
|
return "x86_64"
|
590
|
-
elif cpu_type == MachO.
|
823
|
+
elif cpu_type == MachO.Header.CPU_TYPE.ARM:
|
591
824
|
return "ARM32"
|
592
|
-
elif cpu_type == MachO.
|
825
|
+
elif cpu_type == MachO.Header.CPU_TYPE.ARM64:
|
593
826
|
return "ARM64"
|
594
827
|
|
595
|
-
logger.error(
|
596
|
-
|
828
|
+
logger.error(
|
829
|
+
"Error, could not determine or unsupported "
|
830
|
+
f"ISA for binary format: {exec_type}."
|
831
|
+
)
|
832
|
+
raise RuntimeError(
|
833
|
+
"Error, could not determine or unsupported "
|
834
|
+
f"ISA for binary format: {exec_type}."
|
835
|
+
)
|
597
836
|
|
598
837
|
|
599
838
|
def _binary_format(binary: Binary) -> str:
|
@@ -607,8 +846,12 @@ def _binary_format(binary: Binary) -> str:
|
|
607
846
|
if binary.format == Binary.FORMATS.MACHO:
|
608
847
|
return "Mach-O"
|
609
848
|
|
610
|
-
logger.error(
|
611
|
-
|
849
|
+
logger.error(
|
850
|
+
"Error, could not determine or unsupported" f" binary format: {binary.format}."
|
851
|
+
)
|
852
|
+
raise RuntimeError(
|
853
|
+
"Error, could not determine or " f"unsupported binary format: {binary.format}"
|
854
|
+
)
|
612
855
|
|
613
856
|
|
614
857
|
def file_type(fpath: str) -> tuple[str, str]:
|
@@ -638,17 +881,281 @@ def parse_config() -> None:
|
|
638
881
|
with open(fpath) as fd:
|
639
882
|
config = tomli.loads(fd.read())
|
640
883
|
|
641
|
-
for key in (
|
884
|
+
for key in (
|
885
|
+
"apikey",
|
886
|
+
"host",
|
887
|
+
"model",
|
888
|
+
):
|
642
889
|
if key in config:
|
643
890
|
re_conf[key] = config[key]
|
644
891
|
else:
|
645
892
|
logger.info("File %s doesn't exist or isn't readable", fpath)
|
646
893
|
|
647
894
|
|
648
|
-
def
|
895
|
+
def RE_analysis_id(fpath: str, binary_id: int = 0) -> Response:
|
896
|
+
"""
|
897
|
+
Get the Analysis ID for the Binary ID
|
898
|
+
:param fpath: File path for binary to analyse
|
899
|
+
:param binary_id: ID of binary
|
900
|
+
"""
|
901
|
+
bin_id = re_binary_id(fpath)
|
902
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
903
|
+
|
904
|
+
end_point = f"v2/analyses/lookup/{bid}"
|
905
|
+
|
906
|
+
if bid == -1:
|
907
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
908
|
+
|
909
|
+
res: Response = reveng_req(requests.get, end_point)
|
910
|
+
|
911
|
+
logger.info("Analysis ID for %s:\n%s", fpath, res.text)
|
912
|
+
|
913
|
+
res.raise_for_status()
|
914
|
+
return res
|
915
|
+
|
916
|
+
|
917
|
+
def RE_generate_data_types(analysis_id: int, function_ids: list[int]) -> Response:
|
649
918
|
"""
|
650
|
-
|
651
|
-
|
919
|
+
Generate data types for the analysis
|
920
|
+
:param aid: Analysis ID
|
652
921
|
"""
|
653
|
-
|
654
|
-
|
922
|
+
end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
|
923
|
+
|
924
|
+
res: Response = reveng_req(
|
925
|
+
requests.post, end_point, json_data={"function_ids": function_ids}
|
926
|
+
)
|
927
|
+
res.raise_for_status()
|
928
|
+
return res
|
929
|
+
|
930
|
+
|
931
|
+
def RE_list_data_types(analysis_id: int, function_ids: list[int]) -> Response:
|
932
|
+
"""
|
933
|
+
List data types for the analysis
|
934
|
+
:param aid: Analysis ID
|
935
|
+
:param function_ids: List of function IDs
|
936
|
+
"""
|
937
|
+
end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
|
938
|
+
|
939
|
+
res: Response = reveng_req(
|
940
|
+
requests.get, end_point, json_data={"function_ids": function_ids}
|
941
|
+
)
|
942
|
+
res.raise_for_status()
|
943
|
+
return res
|
944
|
+
|
945
|
+
|
946
|
+
def RE_begin_ai_decompilation(function_id: int) -> Response:
|
947
|
+
"""
|
948
|
+
Begin AI decompilation for the function
|
949
|
+
:param function_id: Function ID
|
950
|
+
"""
|
951
|
+
end_point = f"/v2/functions/{function_id}/ai-decompilation"
|
952
|
+
|
953
|
+
res: Response = reveng_req(
|
954
|
+
requests.post,
|
955
|
+
end_point,
|
956
|
+
data=None,
|
957
|
+
)
|
958
|
+
res.raise_for_status()
|
959
|
+
return res
|
960
|
+
|
961
|
+
|
962
|
+
def RE_poll_ai_decompilation(function_id: int) -> Response:
|
963
|
+
"""
|
964
|
+
Poll AI decompilation for the function
|
965
|
+
:param function_id: Function ID
|
966
|
+
"""
|
967
|
+
end_point = f"/v2/functions/{function_id}/ai-decompilation"
|
968
|
+
|
969
|
+
res: Response = reveng_req(
|
970
|
+
requests.get,
|
971
|
+
end_point,
|
972
|
+
)
|
973
|
+
res.raise_for_status()
|
974
|
+
return res
|
975
|
+
|
976
|
+
|
977
|
+
def RE_analysis_lookup(binary_id: int) -> Response:
|
978
|
+
"""
|
979
|
+
Get the Analysis ID from a Binary ID
|
980
|
+
:param binary_id: Binary ID
|
981
|
+
"""
|
982
|
+
end_point = f"/v2/analyses/lookup/{binary_id}"
|
983
|
+
res: Response = reveng_req(requests.get, end_point)
|
984
|
+
res.raise_for_status()
|
985
|
+
return res
|
986
|
+
|
987
|
+
|
988
|
+
def RE_collections_search(
|
989
|
+
page: int = 1,
|
990
|
+
page_size: int = 10,
|
991
|
+
partial_collection_name: str = "",
|
992
|
+
partial_binary_name: str = "",
|
993
|
+
partial_binary_sha256: str = "",
|
994
|
+
tags: list[str] | str = "",
|
995
|
+
model_name: str = "",
|
996
|
+
) -> Response:
|
997
|
+
"""
|
998
|
+
"""
|
999
|
+
end_point = "/v2/search/collections"
|
1000
|
+
res: Response = reveng_req(requests.get, end_point, params={
|
1001
|
+
"page": page,
|
1002
|
+
"page_size": page_size,
|
1003
|
+
"partial_collection_name": partial_collection_name,
|
1004
|
+
})
|
1005
|
+
res.raise_for_status()
|
1006
|
+
return res
|
1007
|
+
|
1008
|
+
|
1009
|
+
# Bin_id is referred to as hash in this program - to maintain usage BID = id
|
1010
|
+
# of a binary bin_id = hash
|
1011
|
+
# Assumes a file has been passed, correct hash only
|
1012
|
+
# Returns the BID of the binary_id (hash)
|
1013
|
+
def RE_latest_bid(bin_id: str) -> int:
|
1014
|
+
res: Response = reveng_req(
|
1015
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
1016
|
+
)
|
1017
|
+
|
1018
|
+
bid = -1
|
1019
|
+
|
1020
|
+
if res.ok:
|
1021
|
+
# Filter the result who matches the SHA-256
|
1022
|
+
binaries = list(
|
1023
|
+
filter(
|
1024
|
+
lambda binary: binary["sha_256_hash"] == bin_id,
|
1025
|
+
res.json()["query_results"],
|
1026
|
+
)
|
1027
|
+
)
|
1028
|
+
|
1029
|
+
# Check only one record is returned
|
1030
|
+
if len(binaries) == 1:
|
1031
|
+
binary = binaries[0]
|
1032
|
+
bid = binary["binary_id"]
|
1033
|
+
|
1034
|
+
logger.info(
|
1035
|
+
"Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
1036
|
+
bid,
|
1037
|
+
binary["binary_name"],
|
1038
|
+
binary["creation"],
|
1039
|
+
binary["model_name"],
|
1040
|
+
binary["status"],
|
1041
|
+
)
|
1042
|
+
elif len(binaries) > 1:
|
1043
|
+
binaries.sort(
|
1044
|
+
key=lambda binary: datetime.fromisoformat(
|
1045
|
+
binary["creation"]
|
1046
|
+
).timestamp(),
|
1047
|
+
reverse=True,
|
1048
|
+
)
|
1049
|
+
|
1050
|
+
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
1051
|
+
|
1052
|
+
options_dict = {}
|
1053
|
+
|
1054
|
+
for idx, binary in enumerate(binaries):
|
1055
|
+
logger.info(
|
1056
|
+
"[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
1057
|
+
idx,
|
1058
|
+
binary["binary_id"],
|
1059
|
+
binary["binary_name"],
|
1060
|
+
binary["creation"],
|
1061
|
+
binary["model_name"],
|
1062
|
+
binary["status"],
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
options_dict[idx] = binary["binary_id"]
|
1066
|
+
try:
|
1067
|
+
bid = options_dict[0]
|
1068
|
+
except Exception:
|
1069
|
+
bid = options_dict[0]
|
1070
|
+
logger.warning("Select the most recent analysis - ID: %d", bid)
|
1071
|
+
else:
|
1072
|
+
logger.warning("No matches found for hash: %s", bin_id)
|
1073
|
+
else:
|
1074
|
+
logger.warning("Bad Request: %s", res.text)
|
1075
|
+
|
1076
|
+
res.raise_for_status()
|
1077
|
+
return bid
|
1078
|
+
|
1079
|
+
|
1080
|
+
# NOTE: newest API as per documentation still using /v1/ prefix
|
1081
|
+
def RE_models() -> Response:
|
1082
|
+
res: Response = reveng_req(requests.get, "v1/models")
|
1083
|
+
|
1084
|
+
res.raise_for_status()
|
1085
|
+
return res
|
1086
|
+
|
1087
|
+
|
1088
|
+
# NOTE: newest API as per documentation still using /v1/ prefix
|
1089
|
+
def RE_functions_dump(function_ids: list[int]) -> Response:
|
1090
|
+
res: Response = reveng_req(
|
1091
|
+
requests.post, "v1/functions/dump", json_data={"function_id_list": function_ids}
|
1092
|
+
)
|
1093
|
+
|
1094
|
+
res.raise_for_status()
|
1095
|
+
return res
|
1096
|
+
|
1097
|
+
|
1098
|
+
# NOTE: this API endpoint does not actually exist
|
1099
|
+
def RE_generate_summaries(function_id: int) -> Response:
|
1100
|
+
res: Response = reveng_req(
|
1101
|
+
requests.get, f"v1/functions/blocks_comments/{function_id}"
|
1102
|
+
)
|
1103
|
+
|
1104
|
+
res.raise_for_status()
|
1105
|
+
return res
|
1106
|
+
|
1107
|
+
|
1108
|
+
def RE_collection_search(search: str) -> Response:
|
1109
|
+
res: Response = reveng_req(
|
1110
|
+
requests.get,
|
1111
|
+
"v1/collections/quick/search",
|
1112
|
+
params={"search_term": search if search else ""},
|
1113
|
+
)
|
1114
|
+
|
1115
|
+
res.raise_for_status()
|
1116
|
+
return res
|
1117
|
+
|
1118
|
+
|
1119
|
+
def RE_recent_analysis(
|
1120
|
+
status: str = "All", scope: str = "ALL", nb_analysis: int = 50
|
1121
|
+
) -> Response:
|
1122
|
+
res: Response = reveng_req(
|
1123
|
+
requests.get,
|
1124
|
+
"v1/analyse/recent",
|
1125
|
+
json_data={"status": status, "scope": scope, "n": nb_analysis},
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
res.raise_for_status()
|
1129
|
+
return res
|
1130
|
+
|
1131
|
+
|
1132
|
+
def RE_search(fpath: str) -> Response:
|
1133
|
+
bin_id = re_binary_id(fpath)
|
1134
|
+
|
1135
|
+
res: Response = reveng_req(
|
1136
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
1137
|
+
)
|
1138
|
+
|
1139
|
+
res.raise_for_status()
|
1140
|
+
return res
|
1141
|
+
|
1142
|
+
|
1143
|
+
# NOTE: this uses a newer API version
|
1144
|
+
def RE_similar_functions(
|
1145
|
+
function_id: int,
|
1146
|
+
limit: int = 20,
|
1147
|
+
distance: int | float = 0.09999999999999998,
|
1148
|
+
debug: bool = False,
|
1149
|
+
):
|
1150
|
+
params = {
|
1151
|
+
"distance": distance,
|
1152
|
+
"limit": limit,
|
1153
|
+
"debug": debug,
|
1154
|
+
}
|
1155
|
+
|
1156
|
+
res: Response = reveng_req(
|
1157
|
+
requests.get, f"v2/functions/{function_id}/similar-functions", params=params
|
1158
|
+
)
|
1159
|
+
|
1160
|
+
res.raise_for_status()
|
1161
|
+
return res
|