reait 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reait/__init__.py +0 -1
- reait/api.py +594 -105
- reait/main.py +353 -109
- {reait-1.0.1.dist-info → reait-1.1.0.dist-info}/METADATA +18 -13
- reait-1.1.0.dist-info/RECORD +9 -0
- {reait-1.0.1.dist-info → reait-1.1.0.dist-info}/WHEEL +1 -1
- reait-1.0.1.dist-info/RECORD +0 -9
- {reait-1.0.1.dist-info → reait-1.1.0.dist-info}/entry_points.txt +0 -0
- {reait-1.0.1.dist-info → reait-1.1.0.dist-info/licenses}/LICENSE +0 -0
- {reait-1.0.1.dist-info → reait-1.1.0.dist-info}/top_level.txt +0 -0
reait/api.py
CHANGED
@@ -1,28 +1,25 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
1
|
from __future__ import print_function, annotations
|
3
2
|
|
3
|
+
from os import access, R_OK, environ
|
4
|
+
from os.path import basename, isfile, expanduser, getsize
|
5
|
+
|
4
6
|
import json
|
5
|
-
import tomli
|
6
7
|
import logging
|
7
8
|
import requests
|
8
|
-
|
9
|
-
from hashlib import sha256
|
9
|
+
import tomli
|
10
10
|
from datetime import datetime
|
11
|
-
|
12
|
-
from
|
13
|
-
from os import access, R_OK
|
14
|
-
from os.path import basename, isfile, expanduser, getsize
|
15
|
-
from requests import request, Response, HTTPError
|
11
|
+
from hashlib import sha256
|
12
|
+
from lief import parse, Binary, ELF, PE, MachO
|
16
13
|
from numpy import array, vstack, dot, arccos, pi
|
17
14
|
from pandas import DataFrame
|
18
|
-
from
|
15
|
+
from requests import request, Response, HTTPError
|
16
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
19
17
|
|
20
|
-
__version__ = "1.0
|
18
|
+
__version__ = "1.1.0"
|
21
19
|
|
22
20
|
re_conf = {
|
23
|
-
"apikey": "
|
24
|
-
"host": "https://api.reveng.ai",
|
25
|
-
"model": "binnet-0.3-x86",
|
21
|
+
"apikey": environ.get("REAI_API_KEY", ""),
|
22
|
+
"host": environ.get("REAI_API_HOST", "https://api.reveng.ai"),
|
26
23
|
}
|
27
24
|
|
28
25
|
|
@@ -36,16 +33,28 @@ class ReaitError(HTTPError):
|
|
36
33
|
response.reason = reason
|
37
34
|
response.status_code = 404
|
38
35
|
response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
|
39
|
-
response.url =
|
36
|
+
response.url = (
|
37
|
+
f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
|
38
|
+
if end_point
|
39
|
+
else None
|
40
|
+
)
|
40
41
|
|
41
42
|
super().__init__(reason, response=response)
|
42
43
|
|
43
44
|
|
44
|
-
def reveng_req(
|
45
|
-
|
45
|
+
def reveng_req(
|
46
|
+
req: request,
|
47
|
+
end_point: str,
|
48
|
+
data: dict = None,
|
49
|
+
ex_headers: dict = None,
|
50
|
+
params: dict = None,
|
51
|
+
json_data: dict = None,
|
52
|
+
timeout: int = 60,
|
53
|
+
files: dict = None,
|
54
|
+
) -> Response:
|
46
55
|
"""
|
47
56
|
Constructs and sends a Request
|
48
|
-
:param
|
57
|
+
:param req: Method for the new Request
|
49
58
|
:param end_point: Endpoint to add to the base URL
|
50
59
|
:param ex_headers: Extended HTTP headers to add
|
51
60
|
:param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
|
@@ -60,22 +69,48 @@ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict =
|
|
60
69
|
if ex_headers:
|
61
70
|
headers.update(ex_headers)
|
62
71
|
|
63
|
-
logger.debug(
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
72
|
+
logger.debug(
|
73
|
+
"Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
|
74
|
+
req.__name__.upper(),
|
75
|
+
url,
|
76
|
+
headers,
|
77
|
+
data,
|
78
|
+
json_data,
|
79
|
+
params,
|
80
|
+
files,
|
81
|
+
)
|
82
|
+
|
83
|
+
response: Response = req(
|
84
|
+
url,
|
85
|
+
headers=headers,
|
86
|
+
json=json_data,
|
87
|
+
data=data,
|
88
|
+
params=params,
|
89
|
+
timeout=timeout,
|
90
|
+
files=files,
|
91
|
+
)
|
92
|
+
|
93
|
+
logger.debug(
|
94
|
+
"Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
|
95
|
+
req.__name__.upper(),
|
96
|
+
url,
|
97
|
+
response.headers,
|
98
|
+
response.status_code,
|
99
|
+
response.text,
|
100
|
+
)
|
70
101
|
|
71
102
|
return response
|
72
103
|
|
73
104
|
|
74
105
|
def re_hash_check(bin_id: str) -> bool:
|
75
|
-
res: Response = reveng_req(
|
106
|
+
res: Response = reveng_req(
|
107
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
108
|
+
)
|
76
109
|
|
77
110
|
if res.ok:
|
78
|
-
return any(
|
111
|
+
return any(
|
112
|
+
binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"]
|
113
|
+
)
|
79
114
|
else:
|
80
115
|
logger.warning("Bad Request: %s", res.text)
|
81
116
|
|
@@ -86,37 +121,63 @@ def re_hash_check(bin_id: str) -> bool:
|
|
86
121
|
# Assumes a file has been passed, correct hash only
|
87
122
|
# Returns the BID of the binary_id (hash)
|
88
123
|
def re_bid_search(bin_id: str) -> int:
|
89
|
-
res: Response = reveng_req(
|
124
|
+
res: Response = reveng_req(
|
125
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
126
|
+
)
|
90
127
|
|
91
128
|
bid = -1
|
92
129
|
|
93
130
|
if res.ok:
|
94
131
|
# Filter the result who matches the SHA-256
|
95
|
-
binaries = list(
|
132
|
+
binaries = list(
|
133
|
+
filter(
|
134
|
+
lambda binary: binary["sha_256_hash"] == bin_id,
|
135
|
+
res.json()["query_results"],
|
136
|
+
)
|
137
|
+
)
|
96
138
|
|
97
139
|
# Check only one record is returned
|
98
140
|
if len(binaries) == 1:
|
99
141
|
binary = binaries[0]
|
100
142
|
bid = binary["binary_id"]
|
101
143
|
|
102
|
-
logger.info(
|
103
|
-
|
144
|
+
logger.info(
|
145
|
+
"Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
146
|
+
bid,
|
147
|
+
binary["binary_name"],
|
148
|
+
binary["creation"],
|
149
|
+
binary["model_name"],
|
150
|
+
binary["status"],
|
151
|
+
)
|
104
152
|
elif len(binaries) > 1:
|
105
|
-
binaries.sort(
|
153
|
+
binaries.sort(
|
154
|
+
key=lambda binary: datetime.fromisoformat(
|
155
|
+
binary["creation"]
|
156
|
+
).timestamp(),
|
157
|
+
reverse=True,
|
158
|
+
)
|
106
159
|
|
107
160
|
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
108
161
|
|
109
162
|
options_dict = {}
|
110
163
|
|
111
164
|
for idx, binary in enumerate(binaries):
|
112
|
-
logger.info(
|
113
|
-
|
114
|
-
|
165
|
+
logger.info(
|
166
|
+
"[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
167
|
+
idx,
|
168
|
+
binary["binary_id"],
|
169
|
+
binary["binary_name"],
|
170
|
+
binary["creation"],
|
171
|
+
binary["model_name"],
|
172
|
+
binary["status"],
|
173
|
+
)
|
115
174
|
|
116
175
|
options_dict[idx] = binary["binary_id"]
|
117
176
|
|
118
177
|
try:
|
119
|
-
user_input = input(
|
178
|
+
user_input = input(
|
179
|
+
"[+] Please enter the option you want to use for this operation:"
|
180
|
+
)
|
120
181
|
|
121
182
|
option_number = int(user_input)
|
122
183
|
|
@@ -157,16 +218,32 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
|
|
157
218
|
elif res.status_code == 404:
|
158
219
|
logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
|
159
220
|
else:
|
160
|
-
logger.error(
|
221
|
+
logger.error(
|
222
|
+
"Error deleting binary %s under. Server returned %d.",
|
223
|
+
bin_id,
|
224
|
+
res.status_code,
|
225
|
+
)
|
161
226
|
|
162
227
|
res.raise_for_status()
|
163
228
|
return res
|
164
229
|
|
165
230
|
|
166
|
-
def RE_analyse(
|
167
|
-
|
168
|
-
|
169
|
-
|
231
|
+
def RE_analyse(
|
232
|
+
fpath: str,
|
233
|
+
model_name: str = None,
|
234
|
+
isa_options: str = None,
|
235
|
+
platform_options: str = None,
|
236
|
+
file_options: str = None,
|
237
|
+
dynamic_execution: bool = False,
|
238
|
+
command_line_args: str = None,
|
239
|
+
binary_scope: str = None,
|
240
|
+
tags: list = None,
|
241
|
+
priority: int = 0,
|
242
|
+
duplicate: bool = False,
|
243
|
+
symbols: dict = None,
|
244
|
+
debug_fpath: str = None,
|
245
|
+
skip_scraping: bool = False,
|
246
|
+
) -> Response:
|
170
247
|
"""
|
171
248
|
Start analysis job for binary file
|
172
249
|
:param fpath: File path for binary to analyse
|
@@ -182,6 +259,7 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
182
259
|
:param duplicate: Duplicate an existing binary
|
183
260
|
:param symbols: JSON object containing the base address and the list of functions
|
184
261
|
:param debug_fpath: File path for debug file
|
262
|
+
:param skip_scraping: Disable/Enable auto-tagging of binary sample in relevant APIs
|
185
263
|
"""
|
186
264
|
bin_id = re_binary_id(fpath)
|
187
265
|
result = re_hash_check(bin_id)
|
@@ -189,13 +267,19 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
189
267
|
end_point = "v1/analyse/"
|
190
268
|
|
191
269
|
if result and duplicate is False:
|
192
|
-
logger.error(
|
193
|
-
|
270
|
+
logger.error(
|
271
|
+
"Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
|
272
|
+
bin_id,
|
273
|
+
)
|
194
274
|
raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
|
195
275
|
|
196
276
|
filename = basename(fpath)
|
197
277
|
|
198
|
-
params = {
|
278
|
+
params = {
|
279
|
+
"file_name": filename,
|
280
|
+
"size_in_bytes": getsize(fpath),
|
281
|
+
"sha_256_hash": bin_id,
|
282
|
+
}
|
199
283
|
|
200
284
|
if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
|
201
285
|
try:
|
@@ -205,18 +289,30 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
|
|
205
289
|
params["debug_hash"] = debug["sha_256_hash"]
|
206
290
|
except HTTPError:
|
207
291
|
pass
|
208
|
-
|
209
|
-
for p_name in (
|
210
|
-
|
292
|
+
|
293
|
+
for p_name in (
|
294
|
+
"model_name",
|
295
|
+
"isa_options",
|
296
|
+
"platform_options",
|
297
|
+
"file_options",
|
298
|
+
"dynamic_execution",
|
299
|
+
"command_line_args",
|
300
|
+
"binary_scope",
|
301
|
+
"tags",
|
302
|
+
"priority",
|
303
|
+
"symbols",
|
304
|
+
"skip_scraping",
|
305
|
+
):
|
211
306
|
p_value = locals()[p_name]
|
212
307
|
|
213
308
|
if p_value:
|
214
309
|
params[p_name] = p_value
|
215
310
|
|
216
311
|
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
217
|
-
|
218
312
|
if res.ok:
|
219
|
-
logger.info(
|
313
|
+
logger.info(
|
314
|
+
"Successfully submitted binary for analysis. %s - %s", fpath, bin_id
|
315
|
+
)
|
220
316
|
elif res.status_code == 400:
|
221
317
|
if "error" in res.json().keys():
|
222
318
|
logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
|
@@ -239,22 +335,32 @@ def RE_upload(fpath: str) -> Response:
|
|
239
335
|
res = Response()
|
240
336
|
res.status_code = 200
|
241
337
|
res.url = f"{re_conf['host']}/v1/upload"
|
242
|
-
res._content = (
|
243
|
-
|
244
|
-
|
338
|
+
res._content = (
|
339
|
+
(
|
340
|
+
'{0}"success": true,'
|
341
|
+
'"message": "File already uploaded!",'
|
342
|
+
'"sha_256_hash": "{1}"{2}'
|
343
|
+
)
|
344
|
+
.format("{", bin_id, "}")
|
345
|
+
.encode()
|
346
|
+
)
|
245
347
|
else:
|
246
348
|
with open(fpath, "rb") as fd:
|
247
349
|
res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
|
248
350
|
|
249
351
|
if res.ok:
|
250
|
-
logger.info(
|
352
|
+
logger.info(
|
353
|
+
"Successfully uploaded binary to your account. %s - %s", fpath, bin_id
|
354
|
+
)
|
251
355
|
elif res.status_code == 400:
|
252
356
|
if "error" in res.json().keys():
|
253
357
|
logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
|
254
358
|
elif res.status_code == 413:
|
255
359
|
logger.warning("File too large. Please upload files under 10MB.")
|
256
360
|
elif res.status_code == 500:
|
257
|
-
logger.error(
|
361
|
+
logger.error(
|
362
|
+
"Internal Server Error. Please contact support. Skipping upload..."
|
363
|
+
)
|
258
364
|
|
259
365
|
res.raise_for_status()
|
260
366
|
return res
|
@@ -277,8 +383,10 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
|
|
277
383
|
res: Response = reveng_req(requests.get, end_point)
|
278
384
|
|
279
385
|
if res.status_code == 400:
|
280
|
-
logger.warning(
|
281
|
-
|
386
|
+
logger.warning(
|
387
|
+
"Analysis for %s still in progress. Please check the logs (-l) and try again later.",
|
388
|
+
bin_id,
|
389
|
+
)
|
282
390
|
|
283
391
|
res.raise_for_status()
|
284
392
|
return res
|
@@ -376,7 +484,11 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
376
484
|
df = DataFrame(data=embeddings)
|
377
485
|
np_embedding = array(embedding).reshape(1, -1)
|
378
486
|
source_embeddings = vstack(df["embedding"].values)
|
379
|
-
closest =
|
487
|
+
closest = (
|
488
|
+
cosine_similarity(source_embeddings, np_embedding)
|
489
|
+
.squeeze()
|
490
|
+
.argsort()[::-1][:nns]
|
491
|
+
)
|
380
492
|
distances = cosine_similarity(source_embeddings[closest], np_embedding)
|
381
493
|
|
382
494
|
# match closest embeddings with similarity
|
@@ -384,16 +496,25 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
|
|
384
496
|
|
385
497
|
# create json similarity object
|
386
498
|
similarities = list(zip(distances, closest_df.index.tolist()))
|
387
|
-
json_sims = [
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
499
|
+
json_sims = [
|
500
|
+
{
|
501
|
+
"similaritiy": float(d[0]),
|
502
|
+
"vaddr": int(df.iloc[v]["vaddr"]),
|
503
|
+
"name": str(df.iloc[v]["name"]),
|
504
|
+
"size": int(df.iloc[v]["size"]),
|
505
|
+
}
|
506
|
+
for d, v in similarities
|
507
|
+
]
|
392
508
|
return json_sims
|
393
509
|
|
394
510
|
|
395
|
-
def RE_nearest_symbols_batch(
|
396
|
-
|
511
|
+
def RE_nearest_symbols_batch(
|
512
|
+
function_ids: list[int],
|
513
|
+
nns: int = 5,
|
514
|
+
collections: list[str] = None,
|
515
|
+
distance: float = 0.1,
|
516
|
+
debug_enabled: bool = False,
|
517
|
+
) -> Response:
|
397
518
|
"""
|
398
519
|
Get nearest functions to a passed function ids
|
399
520
|
:param function_ids: List of function ids
|
@@ -402,11 +523,12 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
|
|
402
523
|
:param distance: How close we want the ANN search to filter for
|
403
524
|
:param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
|
404
525
|
"""
|
405
|
-
params = {
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
526
|
+
params = {
|
527
|
+
"function_id_list": function_ids,
|
528
|
+
"result_per_function": nns,
|
529
|
+
"debug_mode": debug_enabled,
|
530
|
+
"distance": distance,
|
531
|
+
}
|
410
532
|
|
411
533
|
if collections:
|
412
534
|
# api param is collection, not collections
|
@@ -418,8 +540,13 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
|
|
418
540
|
return res
|
419
541
|
|
420
542
|
|
421
|
-
def RE_nearest_functions(
|
422
|
-
|
543
|
+
def RE_nearest_functions(
|
544
|
+
fpath: str,
|
545
|
+
binary_id: int = 0,
|
546
|
+
nns: int = 5,
|
547
|
+
distance: float = 0.1,
|
548
|
+
debug_enabled: bool = False,
|
549
|
+
) -> Response:
|
423
550
|
"""
|
424
551
|
Get the nearest functions
|
425
552
|
:param fpath: File path for binary to analyse
|
@@ -436,10 +563,11 @@ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
|
|
436
563
|
if bid == -1:
|
437
564
|
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
438
565
|
|
439
|
-
params = {
|
440
|
-
|
441
|
-
|
442
|
-
|
566
|
+
params = {
|
567
|
+
"result_per_function": nns,
|
568
|
+
"debug_mode": debug_enabled,
|
569
|
+
"distance": distance,
|
570
|
+
}
|
443
571
|
|
444
572
|
res: Response = reveng_req(requests.post, end_point, json_data=params)
|
445
573
|
|
@@ -484,14 +612,47 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
|
|
484
612
|
return res
|
485
613
|
|
486
614
|
|
615
|
+
def RE_binary_additonal_details(fpath: str, binary_id: int = None) -> Response:
|
616
|
+
bin_id = re_binary_id(fpath)
|
617
|
+
bid = re_bid_search(bin_id) if binary_id is None else binary_id
|
618
|
+
if bid == -1:
|
619
|
+
raise ReaitError(f"No matches found for hash: {bin_id}")
|
620
|
+
|
621
|
+
endpoint = f"v2/binaries/{bid}/additional-details"
|
622
|
+
res: Response = reveng_req(requests.get, endpoint)
|
623
|
+
res.raise_for_status()
|
624
|
+
|
625
|
+
logger.info(f"Additional Details Info({fpath}):\n")
|
626
|
+
logger.info(f"\n{json.dumps(res.json(), indent=4)}")
|
627
|
+
return res
|
628
|
+
|
629
|
+
|
630
|
+
def RE_binary_details(fpath: str, binary_id: int = None) -> Response:
|
631
|
+
bin_id = re_binary_id(fpath)
|
632
|
+
bid = re_bid_search(bin_id) if binary_id is None else binary_id
|
633
|
+
if bid == -1:
|
634
|
+
raise ReaitError(f"No matches found for hash: {bin_id}")
|
635
|
+
|
636
|
+
endpoint = f"v2/binaries/{bid}/details"
|
637
|
+
res: Response = reveng_req(requests.get, endpoint)
|
638
|
+
res.raise_for_status()
|
639
|
+
|
640
|
+
logger.info(f"Details Info({fpath}):\n")
|
641
|
+
logger.info(f"\n{json.dumps(res.json(), indent=4)}")
|
642
|
+
return res
|
643
|
+
|
644
|
+
|
487
645
|
def RE_functions_rename(function_id: int, new_name: str) -> Response:
|
488
646
|
"""
|
489
647
|
Send the new name of a function to C2
|
490
648
|
:param function_id: ID of a function
|
491
649
|
:param new_name: New function name
|
492
650
|
"""
|
493
|
-
res: Response = reveng_req(
|
494
|
-
|
651
|
+
res: Response = reveng_req(
|
652
|
+
requests.post,
|
653
|
+
f"v1/functions/rename/{function_id}",
|
654
|
+
json_data={"new_name": new_name},
|
655
|
+
)
|
495
656
|
|
496
657
|
if res.ok:
|
497
658
|
logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
|
@@ -507,17 +668,24 @@ def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
|
|
507
668
|
Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
|
508
669
|
:param mapping: dictionary containing the function_id as key and function_name as value
|
509
670
|
"""
|
510
|
-
params = {
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
671
|
+
params = {
|
672
|
+
"new_name_mapping": [
|
673
|
+
{
|
674
|
+
"function_id": func_id,
|
675
|
+
"function_name": func_name,
|
676
|
+
}
|
677
|
+
for func_id, func_name in mapping.items()
|
678
|
+
]
|
679
|
+
}
|
680
|
+
|
681
|
+
res: Response = reveng_req(
|
682
|
+
requests.post, "v1/functions/batch/rename", json_data=params
|
683
|
+
)
|
517
684
|
|
518
685
|
res.raise_for_status()
|
519
686
|
return res
|
520
687
|
|
688
|
+
|
521
689
|
def RE_settings() -> Response:
|
522
690
|
"""
|
523
691
|
Get the configuration settings
|
@@ -553,6 +721,57 @@ def RE_authentication() -> Response:
|
|
553
721
|
return res
|
554
722
|
|
555
723
|
|
724
|
+
def RE_functions_list(
|
725
|
+
analysis_id: int,
|
726
|
+
search_term: str = "",
|
727
|
+
min_v_address: int = 0,
|
728
|
+
max_v_address: int = 0,
|
729
|
+
) -> Response:
|
730
|
+
"""
|
731
|
+
Get the functions of a binary
|
732
|
+
:param binary_id: Binary ID
|
733
|
+
"""
|
734
|
+
params = {}
|
735
|
+
if search_term:
|
736
|
+
params["search_term"] = search_term
|
737
|
+
|
738
|
+
if min_v_address != 0:
|
739
|
+
params["min_v_address"] = min_v_address
|
740
|
+
|
741
|
+
if max_v_address != 0:
|
742
|
+
params["max_v_address"] = max_v_address
|
743
|
+
|
744
|
+
res: Response = reveng_req(
|
745
|
+
requests.get, f"v2/analyses/{analysis_id}/info/functions/list", params=params
|
746
|
+
)
|
747
|
+
|
748
|
+
res.raise_for_status()
|
749
|
+
|
750
|
+
return res
|
751
|
+
|
752
|
+
|
753
|
+
def RE_function_callers_callees(function: int) -> Response:
|
754
|
+
"""
|
755
|
+
Get the callers and callees of a functions
|
756
|
+
:param function: Function ID
|
757
|
+
"""
|
758
|
+
res: Response = reveng_req(requests.get, f"v2/functions/{function}/callees_callers")
|
759
|
+
|
760
|
+
res.raise_for_status()
|
761
|
+
return res
|
762
|
+
|
763
|
+
|
764
|
+
def RE_analysis_info(analysis_id: int) -> Response:
|
765
|
+
"""
|
766
|
+
Get the analysis information
|
767
|
+
:param analysis_id: Analysis ID
|
768
|
+
"""
|
769
|
+
res: Response = reveng_req(requests.get, f"v2/analyses/{analysis_id}/info/basic")
|
770
|
+
|
771
|
+
res.raise_for_status()
|
772
|
+
return res
|
773
|
+
|
774
|
+
|
556
775
|
def re_binary_id(fpath: str) -> str:
|
557
776
|
"""
|
558
777
|
Take the SHA-256 hash of binary file
|
@@ -567,9 +786,7 @@ def re_binary_id(fpath: str) -> str:
|
|
567
786
|
|
568
787
|
return hf.hexdigest()
|
569
788
|
else:
|
570
|
-
|
571
|
-
|
572
|
-
return "Undefined"
|
789
|
+
return fpath
|
573
790
|
|
574
791
|
|
575
792
|
def _binary_isa(binary: Binary, exec_type: str) -> str:
|
@@ -578,10 +795,9 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
578
795
|
"""
|
579
796
|
if exec_type == "ELF":
|
580
797
|
arch = binary.header.machine_type
|
581
|
-
|
582
|
-
if arch == ELF.ARCH.i386:
|
798
|
+
if arch == ELF.ARCH.I386:
|
583
799
|
return "x86"
|
584
|
-
elif arch == ELF.ARCH.
|
800
|
+
elif arch == ELF.ARCH.X86_64:
|
585
801
|
return "x86_64"
|
586
802
|
elif arch == ELF.ARCH.ARM:
|
587
803
|
return "ARM32"
|
@@ -589,7 +805,6 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
589
805
|
return "ARM64"
|
590
806
|
elif exec_type == "PE":
|
591
807
|
machine_type = binary.header.machine
|
592
|
-
|
593
808
|
if machine_type == PE.Header.MACHINE_TYPES.I386:
|
594
809
|
return "x86"
|
595
810
|
elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
|
@@ -601,17 +816,23 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
|
|
601
816
|
elif exec_type == "Mach-O":
|
602
817
|
cpu_type = binary.header.cpu_type
|
603
818
|
|
604
|
-
if cpu_type == MachO.
|
819
|
+
if cpu_type == MachO.Header.CPU_TYPE.X86:
|
605
820
|
return "x86"
|
606
|
-
elif cpu_type == MachO.
|
821
|
+
elif cpu_type == MachO.Header.CPU_TYPE.X86_64:
|
607
822
|
return "x86_64"
|
608
|
-
elif cpu_type == MachO.
|
823
|
+
elif cpu_type == MachO.Header.CPU_TYPE.ARM:
|
609
824
|
return "ARM32"
|
610
|
-
elif cpu_type == MachO.
|
825
|
+
elif cpu_type == MachO.Header.CPU_TYPE.ARM64:
|
611
826
|
return "ARM64"
|
612
827
|
|
613
|
-
logger.error(
|
614
|
-
|
828
|
+
logger.error(
|
829
|
+
"Error, could not determine or unsupported "
|
830
|
+
f"ISA for binary format: {exec_type}."
|
831
|
+
)
|
832
|
+
raise RuntimeError(
|
833
|
+
"Error, could not determine or unsupported "
|
834
|
+
f"ISA for binary format: {exec_type}."
|
835
|
+
)
|
615
836
|
|
616
837
|
|
617
838
|
def _binary_format(binary: Binary) -> str:
|
@@ -625,8 +846,12 @@ def _binary_format(binary: Binary) -> str:
|
|
625
846
|
if binary.format == Binary.FORMATS.MACHO:
|
626
847
|
return "Mach-O"
|
627
848
|
|
628
|
-
logger.error(
|
629
|
-
|
849
|
+
logger.error(
|
850
|
+
"Error, could not determine or unsupported" f" binary format: {binary.format}."
|
851
|
+
)
|
852
|
+
raise RuntimeError(
|
853
|
+
"Error, could not determine or " f"unsupported binary format: {binary.format}"
|
854
|
+
)
|
630
855
|
|
631
856
|
|
632
857
|
def file_type(fpath: str) -> tuple[str, str]:
|
@@ -656,17 +881,281 @@ def parse_config() -> None:
|
|
656
881
|
with open(fpath) as fd:
|
657
882
|
config = tomli.loads(fd.read())
|
658
883
|
|
659
|
-
for key in (
|
884
|
+
for key in (
|
885
|
+
"apikey",
|
886
|
+
"host",
|
887
|
+
"model",
|
888
|
+
):
|
660
889
|
if key in config:
|
661
890
|
re_conf[key] = config[key]
|
662
891
|
else:
|
663
892
|
logger.info("File %s doesn't exist or isn't readable", fpath)
|
664
893
|
|
665
894
|
|
666
|
-
def
|
895
|
+
def RE_analysis_id(fpath: str, binary_id: int = 0) -> Response:
|
896
|
+
"""
|
897
|
+
Get the Analysis ID for the Binary ID
|
898
|
+
:param fpath: File path for binary to analyse
|
899
|
+
:param binary_id: ID of binary
|
900
|
+
"""
|
901
|
+
bin_id = re_binary_id(fpath)
|
902
|
+
bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
|
903
|
+
|
904
|
+
end_point = f"v2/analyses/lookup/{bid}"
|
905
|
+
|
906
|
+
if bid == -1:
|
907
|
+
raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
|
908
|
+
|
909
|
+
res: Response = reveng_req(requests.get, end_point)
|
910
|
+
|
911
|
+
logger.info("Analysis ID for %s:\n%s", fpath, res.text)
|
912
|
+
|
913
|
+
res.raise_for_status()
|
914
|
+
return res
|
915
|
+
|
916
|
+
|
917
|
+
def RE_generate_data_types(analysis_id: int, function_ids: list[int]) -> Response:
|
918
|
+
"""
|
919
|
+
Generate data types for the analysis
|
920
|
+
:param aid: Analysis ID
|
921
|
+
"""
|
922
|
+
end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
|
923
|
+
|
924
|
+
res: Response = reveng_req(
|
925
|
+
requests.post, end_point, json_data={"function_ids": function_ids}
|
926
|
+
)
|
927
|
+
res.raise_for_status()
|
928
|
+
return res
|
929
|
+
|
930
|
+
|
931
|
+
def RE_list_data_types(analysis_id: int, function_ids: list[int]) -> Response:
|
932
|
+
"""
|
933
|
+
List data types for the analysis
|
934
|
+
:param aid: Analysis ID
|
935
|
+
:param function_ids: List of function IDs
|
936
|
+
"""
|
937
|
+
end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
|
938
|
+
|
939
|
+
res: Response = reveng_req(
|
940
|
+
requests.get, end_point, json_data={"function_ids": function_ids}
|
941
|
+
)
|
942
|
+
res.raise_for_status()
|
943
|
+
return res
|
944
|
+
|
945
|
+
|
946
|
+
def RE_begin_ai_decompilation(function_id: int) -> Response:
|
667
947
|
"""
|
668
|
-
|
669
|
-
|
948
|
+
Begin AI decompilation for the function
|
949
|
+
:param function_id: Function ID
|
670
950
|
"""
|
671
|
-
|
672
|
-
|
951
|
+
end_point = f"/v2/functions/{function_id}/ai-decompilation"
|
952
|
+
|
953
|
+
res: Response = reveng_req(
|
954
|
+
requests.post,
|
955
|
+
end_point,
|
956
|
+
data=None,
|
957
|
+
)
|
958
|
+
res.raise_for_status()
|
959
|
+
return res
|
960
|
+
|
961
|
+
|
962
|
+
def RE_poll_ai_decompilation(function_id: int) -> Response:
|
963
|
+
"""
|
964
|
+
Poll AI decompilation for the function
|
965
|
+
:param function_id: Function ID
|
966
|
+
"""
|
967
|
+
end_point = f"/v2/functions/{function_id}/ai-decompilation"
|
968
|
+
|
969
|
+
res: Response = reveng_req(
|
970
|
+
requests.get,
|
971
|
+
end_point,
|
972
|
+
)
|
973
|
+
res.raise_for_status()
|
974
|
+
return res
|
975
|
+
|
976
|
+
|
977
|
+
def RE_analysis_lookup(binary_id: int) -> Response:
|
978
|
+
"""
|
979
|
+
Get the Analysis ID from a Binary ID
|
980
|
+
:param binary_id: Binary ID
|
981
|
+
"""
|
982
|
+
end_point = f"/v2/analyses/lookup/{binary_id}"
|
983
|
+
res: Response = reveng_req(requests.get, end_point)
|
984
|
+
res.raise_for_status()
|
985
|
+
return res
|
986
|
+
|
987
|
+
|
988
|
+
def RE_collections_search(
|
989
|
+
page: int = 1,
|
990
|
+
page_size: int = 10,
|
991
|
+
partial_collection_name: str = "",
|
992
|
+
partial_binary_name: str = "",
|
993
|
+
partial_binary_sha256: str = "",
|
994
|
+
tags: list[str] | str = "",
|
995
|
+
model_name: str = "",
|
996
|
+
) -> Response:
|
997
|
+
"""
|
998
|
+
"""
|
999
|
+
end_point = "/v2/search/collections"
|
1000
|
+
res: Response = reveng_req(requests.get, end_point, params={
|
1001
|
+
"page": page,
|
1002
|
+
"page_size": page_size,
|
1003
|
+
"partial_collection_name": partial_collection_name,
|
1004
|
+
})
|
1005
|
+
res.raise_for_status()
|
1006
|
+
return res
|
1007
|
+
|
1008
|
+
|
1009
|
+
# Bin_id is referred to as hash in this program - to maintain usage BID = id
|
1010
|
+
# of a binary bin_id = hash
|
1011
|
+
# Assumes a file has been passed, correct hash only
|
1012
|
+
# Returns the BID of the binary_id (hash)
|
1013
|
+
def RE_latest_bid(bin_id: str) -> int:
|
1014
|
+
res: Response = reveng_req(
|
1015
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
1016
|
+
)
|
1017
|
+
|
1018
|
+
bid = -1
|
1019
|
+
|
1020
|
+
if res.ok:
|
1021
|
+
# Filter the result who matches the SHA-256
|
1022
|
+
binaries = list(
|
1023
|
+
filter(
|
1024
|
+
lambda binary: binary["sha_256_hash"] == bin_id,
|
1025
|
+
res.json()["query_results"],
|
1026
|
+
)
|
1027
|
+
)
|
1028
|
+
|
1029
|
+
# Check only one record is returned
|
1030
|
+
if len(binaries) == 1:
|
1031
|
+
binary = binaries[0]
|
1032
|
+
bid = binary["binary_id"]
|
1033
|
+
|
1034
|
+
logger.info(
|
1035
|
+
"Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
1036
|
+
bid,
|
1037
|
+
binary["binary_name"],
|
1038
|
+
binary["creation"],
|
1039
|
+
binary["model_name"],
|
1040
|
+
binary["status"],
|
1041
|
+
)
|
1042
|
+
elif len(binaries) > 1:
|
1043
|
+
binaries.sort(
|
1044
|
+
key=lambda binary: datetime.fromisoformat(
|
1045
|
+
binary["creation"]
|
1046
|
+
).timestamp(),
|
1047
|
+
reverse=True,
|
1048
|
+
)
|
1049
|
+
|
1050
|
+
logger.info("%d matches found for hash: %s", len(binaries), bin_id)
|
1051
|
+
|
1052
|
+
options_dict = {}
|
1053
|
+
|
1054
|
+
for idx, binary in enumerate(binaries):
|
1055
|
+
logger.info(
|
1056
|
+
"[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
|
1057
|
+
idx,
|
1058
|
+
binary["binary_id"],
|
1059
|
+
binary["binary_name"],
|
1060
|
+
binary["creation"],
|
1061
|
+
binary["model_name"],
|
1062
|
+
binary["status"],
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
options_dict[idx] = binary["binary_id"]
|
1066
|
+
try:
|
1067
|
+
bid = options_dict[0]
|
1068
|
+
except Exception:
|
1069
|
+
bid = options_dict[0]
|
1070
|
+
logger.warning("Select the most recent analysis - ID: %d", bid)
|
1071
|
+
else:
|
1072
|
+
logger.warning("No matches found for hash: %s", bin_id)
|
1073
|
+
else:
|
1074
|
+
logger.warning("Bad Request: %s", res.text)
|
1075
|
+
|
1076
|
+
res.raise_for_status()
|
1077
|
+
return bid
|
1078
|
+
|
1079
|
+
|
1080
|
+
# NOTE: newest API as per documentation still using /v1/ prefix
|
1081
|
+
def RE_models() -> Response:
|
1082
|
+
res: Response = reveng_req(requests.get, "v1/models")
|
1083
|
+
|
1084
|
+
res.raise_for_status()
|
1085
|
+
return res
|
1086
|
+
|
1087
|
+
|
1088
|
+
# NOTE: newest API as per documentation still using /v1/ prefix
|
1089
|
+
def RE_functions_dump(function_ids: list[int]) -> Response:
|
1090
|
+
res: Response = reveng_req(
|
1091
|
+
requests.post, "v1/functions/dump", json_data={"function_id_list": function_ids}
|
1092
|
+
)
|
1093
|
+
|
1094
|
+
res.raise_for_status()
|
1095
|
+
return res
|
1096
|
+
|
1097
|
+
|
1098
|
+
# NOTE: this API endpoint does not actually exist
|
1099
|
+
def RE_generate_summaries(function_id: int) -> Response:
|
1100
|
+
res: Response = reveng_req(
|
1101
|
+
requests.get, f"v1/functions/blocks_comments/{function_id}"
|
1102
|
+
)
|
1103
|
+
|
1104
|
+
res.raise_for_status()
|
1105
|
+
return res
|
1106
|
+
|
1107
|
+
|
1108
|
+
def RE_collection_search(search: str) -> Response:
|
1109
|
+
res: Response = reveng_req(
|
1110
|
+
requests.get,
|
1111
|
+
"v1/collections/quick/search",
|
1112
|
+
params={"search_term": search if search else ""},
|
1113
|
+
)
|
1114
|
+
|
1115
|
+
res.raise_for_status()
|
1116
|
+
return res
|
1117
|
+
|
1118
|
+
|
1119
|
+
def RE_recent_analysis(
|
1120
|
+
status: str = "All", scope: str = "ALL", nb_analysis: int = 50
|
1121
|
+
) -> Response:
|
1122
|
+
res: Response = reveng_req(
|
1123
|
+
requests.get,
|
1124
|
+
"v1/analyse/recent",
|
1125
|
+
json_data={"status": status, "scope": scope, "n": nb_analysis},
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
res.raise_for_status()
|
1129
|
+
return res
|
1130
|
+
|
1131
|
+
|
1132
|
+
def RE_search(fpath: str) -> Response:
|
1133
|
+
bin_id = re_binary_id(fpath)
|
1134
|
+
|
1135
|
+
res: Response = reveng_req(
|
1136
|
+
requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
|
1137
|
+
)
|
1138
|
+
|
1139
|
+
res.raise_for_status()
|
1140
|
+
return res
|
1141
|
+
|
1142
|
+
|
1143
|
+
# NOTE: this uses a newer API version
|
1144
|
+
def RE_similar_functions(
|
1145
|
+
function_id: int,
|
1146
|
+
limit: int = 20,
|
1147
|
+
distance: int | float = 0.09999999999999998,
|
1148
|
+
debug: bool = False,
|
1149
|
+
):
|
1150
|
+
params = {
|
1151
|
+
"distance": distance,
|
1152
|
+
"limit": limit,
|
1153
|
+
"debug": debug,
|
1154
|
+
}
|
1155
|
+
|
1156
|
+
res: Response = reveng_req(
|
1157
|
+
requests.get, f"v2/functions/{function_id}/similar-functions", params=params
|
1158
|
+
)
|
1159
|
+
|
1160
|
+
res.raise_for_status()
|
1161
|
+
return res
|