reait 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,28 +1,25 @@
1
- # -*- coding: utf-8 -*-
2
1
  from __future__ import print_function, annotations
3
2
 
3
+ from os import access, R_OK, environ
4
+ from os.path import basename, isfile, expanduser, getsize
5
+
4
6
  import json
5
- import tomli
6
7
  import logging
7
8
  import requests
8
-
9
- from hashlib import sha256
9
+ import tomli
10
10
  from datetime import datetime
11
-
12
- from sklearn.metrics.pairwise import cosine_similarity
13
- from os import access, R_OK
14
- from os.path import basename, isfile, expanduser, getsize
15
- from requests import request, Response, HTTPError
11
+ from hashlib import sha256
12
+ from lief import parse, Binary, ELF, PE, MachO
16
13
  from numpy import array, vstack, dot, arccos, pi
17
14
  from pandas import DataFrame
18
- from lief import parse, Binary, ELF, PE, MachO
15
+ from requests import request, Response, HTTPError
16
+ from sklearn.metrics.pairwise import cosine_similarity
19
17
 
20
- __version__ = "1.0.1"
18
+ __version__ = "1.1.1"
21
19
 
22
20
  re_conf = {
23
- "apikey": "l1br3",
24
- "host": "https://api.reveng.ai",
25
- "model": "binnet-0.3-x86",
21
+ "apikey": environ.get("REAI_API_KEY", ""),
22
+ "host": environ.get("REAI_API_HOST", "https://api.reveng.ai"),
26
23
  }
27
24
 
28
25
 
@@ -35,17 +32,30 @@ class ReaitError(HTTPError):
35
32
 
36
33
  response.reason = reason
37
34
  response.status_code = 404
38
- response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
39
- response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
35
+ response._content = b'{"success": false, "error": "' + \
36
+ reason.encode() + b'"}'
37
+ response.url = (
38
+ f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
39
+ if end_point
40
+ else None
41
+ )
40
42
 
41
43
  super().__init__(reason, response=response)
42
44
 
43
45
 
44
- def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
45
- params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
46
+ def reveng_req(
47
+ req: request,
48
+ end_point: str,
49
+ data: dict = None,
50
+ ex_headers: dict = None,
51
+ params: dict = None,
52
+ json_data: dict = None,
53
+ timeout: int = 60,
54
+ files: dict = None,
55
+ ) -> Response:
46
56
  """
47
57
  Constructs and sends a Request
48
- :param r: Method for the new Request
58
+ :param req: Method for the new Request
49
59
  :param end_point: Endpoint to add to the base URL
50
60
  :param ex_headers: Extended HTTP headers to add
51
61
  :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
@@ -60,22 +70,48 @@ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict =
60
70
  if ex_headers:
61
71
  headers.update(ex_headers)
62
72
 
63
- logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
64
- r.__name__.upper(), url, headers, data, json_data, params, files)
65
-
66
- response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
67
-
68
- logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
69
- r.__name__.upper(), url, response.headers, response.status_code, response.text)
73
+ logger.debug(
74
+ "Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
75
+ req.__name__.upper(),
76
+ url,
77
+ headers,
78
+ data,
79
+ json_data,
80
+ params,
81
+ files,
82
+ )
83
+
84
+ response: Response = req(
85
+ url,
86
+ headers=headers,
87
+ json=json_data,
88
+ data=data,
89
+ params=params,
90
+ timeout=timeout,
91
+ files=files,
92
+ )
93
+
94
+ logger.debug(
95
+ "Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
96
+ req.__name__.upper(),
97
+ url,
98
+ response.headers,
99
+ response.status_code,
100
+ response.text,
101
+ )
70
102
 
71
103
  return response
72
104
 
73
105
 
74
106
  def re_hash_check(bin_id: str) -> bool:
75
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
107
+ res: Response = reveng_req(
108
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
109
+ )
76
110
 
77
111
  if res.ok:
78
- return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
112
+ return any(
113
+ binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"]
114
+ )
79
115
  else:
80
116
  logger.warning("Bad Request: %s", res.text)
81
117
 
@@ -86,37 +122,63 @@ def re_hash_check(bin_id: str) -> bool:
86
122
  # Assumes a file has been passed, correct hash only
87
123
  # Returns the BID of the binary_id (hash)
88
124
  def re_bid_search(bin_id: str) -> int:
89
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
125
+ res: Response = reveng_req(
126
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
127
+ )
90
128
 
91
129
  bid = -1
92
130
 
93
131
  if res.ok:
94
132
  # Filter the result who matches the SHA-256
95
- binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
133
+ binaries = list(
134
+ filter(
135
+ lambda binary: binary["sha_256_hash"] == bin_id,
136
+ res.json()["query_results"],
137
+ )
138
+ )
96
139
 
97
140
  # Check only one record is returned
98
141
  if len(binaries) == 1:
99
142
  binary = binaries[0]
100
143
  bid = binary["binary_id"]
101
144
 
102
- logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
103
- bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
145
+ logger.info(
146
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
147
+ bid,
148
+ binary["binary_name"],
149
+ binary["creation"],
150
+ binary["model_name"],
151
+ binary["status"],
152
+ )
104
153
  elif len(binaries) > 1:
105
- binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
154
+ binaries.sort(
155
+ key=lambda binary: datetime.fromisoformat(
156
+ binary["creation"]
157
+ ).timestamp(),
158
+ reverse=True,
159
+ )
106
160
 
107
161
  logger.info("%d matches found for hash: %s", len(binaries), bin_id)
108
162
 
109
163
  options_dict = {}
110
164
 
111
165
  for idx, binary in enumerate(binaries):
112
- logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
113
- idx, binary["binary_id"], binary["binary_name"], binary["creation"],
114
- binary["model_name"], binary["status"])
166
+ logger.info(
167
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
168
+ idx,
169
+ binary["binary_id"],
170
+ binary["binary_name"],
171
+ binary["creation"],
172
+ binary["model_name"],
173
+ binary["status"],
174
+ )
115
175
 
116
176
  options_dict[idx] = binary["binary_id"]
117
177
 
118
178
  try:
119
- user_input = input("[+] Please enter the option you want to use for this operation:")
179
+ user_input = input(
180
+ "[+] Please enter the option you want to use for this operation:"
181
+ )
120
182
 
121
183
  option_number = int(user_input)
122
184
 
@@ -157,16 +219,35 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
157
219
  elif res.status_code == 404:
158
220
  logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
159
221
  else:
160
- logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
222
+ logger.error(
223
+ "Error deleting binary %s under. Server returned %d.",
224
+ bin_id,
225
+ res.status_code,
226
+ )
161
227
 
162
228
  res.raise_for_status()
163
229
  return res
164
230
 
165
231
 
166
- def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
167
- platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
168
- command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
169
- duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
232
+ def RE_analyse(
233
+ fpath: str,
234
+ model_name: str = None,
235
+ isa_options: str = None,
236
+ platform_options: str = None,
237
+ file_options: str = None,
238
+ dynamic_execution: bool = False,
239
+ command_line_args: str = None,
240
+ binary_scope: str = None,
241
+ tags: list = None,
242
+ priority: int = 0,
243
+ duplicate: bool = False,
244
+ symbols: dict = None,
245
+ debug_fpath: str = None,
246
+ skip_scraping: bool = False,
247
+ skip_capabilities: bool = False,
248
+ skip_sbom: bool = False,
249
+ advanced_analysis: bool = False
250
+ ) -> Response:
170
251
  """
171
252
  Start analysis job for binary file
172
253
  :param fpath: File path for binary to analyse
@@ -182,6 +263,7 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
182
263
  :param duplicate: Duplicate an existing binary
183
264
  :param symbols: JSON object containing the base address and the list of functions
184
265
  :param debug_fpath: File path for debug file
266
+ :param skip_scraping: Disable/Enable auto-tagging of binary sample in relevant APIs
185
267
  """
186
268
  bin_id = re_binary_id(fpath)
187
269
  result = re_hash_check(bin_id)
@@ -189,13 +271,19 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
189
271
  end_point = "v1/analyse/"
190
272
 
191
273
  if result and duplicate is False:
192
- logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
193
- bin_id)
274
+ logger.error(
275
+ "Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
276
+ bin_id,
277
+ )
194
278
  raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
195
279
 
196
280
  filename = basename(fpath)
197
281
 
198
- params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
282
+ params = {
283
+ "file_name": filename,
284
+ "size_in_bytes": getsize(fpath),
285
+ "sha_256_hash": bin_id,
286
+ }
199
287
 
200
288
  if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
201
289
  try:
@@ -205,21 +293,37 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
205
293
  params["debug_hash"] = debug["sha_256_hash"]
206
294
  except HTTPError:
207
295
  pass
208
-
209
- for p_name in ("model_name", "isa_options", "platform_options", "file_options",
210
- "dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
296
+
297
+ for p_name in (
298
+ "model_name",
299
+ "isa_options",
300
+ "platform_options",
301
+ "file_options",
302
+ "dynamic_execution",
303
+ "command_line_args",
304
+ "binary_scope",
305
+ "tags",
306
+ "priority",
307
+ "symbols",
308
+ "skip_scraping",
309
+ "skip_capabilities",
310
+ "skip_sbom",
311
+ "advanced_analysis"
312
+ ):
211
313
  p_value = locals()[p_name]
212
314
 
213
315
  if p_value:
214
316
  params[p_name] = p_value
215
317
 
216
318
  res: Response = reveng_req(requests.post, end_point, json_data=params)
217
-
218
319
  if res.ok:
219
- logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
320
+ logger.info(
321
+ "Successfully submitted binary for analysis. %s - %s", fpath, bin_id
322
+ )
220
323
  elif res.status_code == 400:
221
324
  if "error" in res.json().keys():
222
- logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
325
+ logger.warning("Error analysing %s - %s",
326
+ fpath, res.json()["error"])
223
327
 
224
328
  res.raise_for_status()
225
329
  return res
@@ -234,27 +338,40 @@ def RE_upload(fpath: str) -> Response:
234
338
  result = re_hash_check(bin_id)
235
339
 
236
340
  if result:
237
- logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
341
+ logger.info(
342
+ "File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
238
343
 
239
344
  res = Response()
240
345
  res.status_code = 200
241
346
  res.url = f"{re_conf['host']}/v1/upload"
242
- res._content = ('{0}"success": true,'
243
- '"message": "File already uploaded!",'
244
- '"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
347
+ res._content = (
348
+ (
349
+ '{0}"success": true,'
350
+ '"message": "File already uploaded!",'
351
+ '"sha_256_hash": "{1}"{2}'
352
+ )
353
+ .format("{", bin_id, "}")
354
+ .encode()
355
+ )
245
356
  else:
246
357
  with open(fpath, "rb") as fd:
247
- res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
358
+ res: Response = reveng_req(
359
+ requests.post, "v1/upload", files={"file": fd})
248
360
 
249
361
  if res.ok:
250
- logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
362
+ logger.info(
363
+ "Successfully uploaded binary to your account. %s - %s", fpath, bin_id
364
+ )
251
365
  elif res.status_code == 400:
252
366
  if "error" in res.json().keys():
253
- logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
367
+ logger.warning("Error uploading %s - %s",
368
+ fpath, res.json()["error"])
254
369
  elif res.status_code == 413:
255
370
  logger.warning("File too large. Please upload files under 10MB.")
256
371
  elif res.status_code == 500:
257
- logger.error("Internal Server Error. Please contact support. Skipping upload...")
372
+ logger.error(
373
+ "Internal Server Error. Please contact support. Skipping upload..."
374
+ )
258
375
 
259
376
  res.raise_for_status()
260
377
  return res
@@ -277,8 +394,10 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
277
394
  res: Response = reveng_req(requests.get, end_point)
278
395
 
279
396
  if res.status_code == 400:
280
- logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
281
- bin_id)
397
+ logger.warning(
398
+ "Analysis for %s still in progress. Please check the logs (-l) and try again later.",
399
+ bin_id,
400
+ )
282
401
 
283
402
  res.raise_for_status()
284
403
  return res
@@ -376,7 +495,11 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
376
495
  df = DataFrame(data=embeddings)
377
496
  np_embedding = array(embedding).reshape(1, -1)
378
497
  source_embeddings = vstack(df["embedding"].values)
379
- closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
498
+ closest = (
499
+ cosine_similarity(source_embeddings, np_embedding)
500
+ .squeeze()
501
+ .argsort()[::-1][:nns]
502
+ )
380
503
  distances = cosine_similarity(source_embeddings[closest], np_embedding)
381
504
 
382
505
  # match closest embeddings with similarity
@@ -384,16 +507,26 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
384
507
 
385
508
  # create json similarity object
386
509
  similarities = list(zip(distances, closest_df.index.tolist()))
387
- json_sims = [{"similaritiy": float(d[0]),
388
- "vaddr": int(df.iloc[v]["vaddr"]),
389
- "name": str(df.iloc[v]["name"]),
390
- "size": int(df.iloc[v]["size"]),
391
- } for d, v in similarities]
510
+ json_sims = [
511
+ {
512
+ "similaritiy": float(d[0]),
513
+ "vaddr": int(df.iloc[v]["vaddr"]),
514
+ "name": str(df.iloc[v]["name"]),
515
+ "size": int(df.iloc[v]["size"]),
516
+ }
517
+ for d, v in similarities
518
+ ]
392
519
  return json_sims
393
520
 
394
521
 
395
- def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
396
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
522
+ def RE_nearest_symbols_batch(
523
+ function_ids: list[int],
524
+ nns: int = 5,
525
+ collections: list[int] = None,
526
+ binaries: list[int] = None,
527
+ distance: float = 0.1,
528
+ debug_enabled: bool = False,
529
+ ) -> Response:
397
530
  """
398
531
  Get nearest functions to a passed function ids
399
532
  :param function_ids: List of function ids
@@ -402,24 +535,33 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
402
535
  :param distance: How close we want the ANN search to filter for
403
536
  :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
404
537
  """
405
- params = {"function_id_list": function_ids,
406
- "result_per_function": nns,
407
- "debug_mode": debug_enabled,
408
- "distance": distance,
409
- }
538
+ params = {
539
+ "function_id_list": function_ids,
540
+ "result_per_function": nns,
541
+ "debug_mode": debug_enabled,
542
+ "distance": distance,
543
+ }
410
544
 
411
545
  if collections:
412
- # api param is collection, not collections
413
- params["collection"] = collections
546
+ params["collection_search_list"] = collections
547
+
548
+ if binaries:
549
+ params["binaries_search_list"] = binaries
414
550
 
415
- res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
551
+ res: Response = reveng_req(
552
+ requests.post, "v1/ann/symbol/batch", json_data=params)
416
553
 
417
554
  res.raise_for_status()
418
555
  return res
419
556
 
420
557
 
421
- def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
422
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
558
+ def RE_nearest_functions(
559
+ fpath: str,
560
+ binary_id: int = 0,
561
+ nns: int = 5,
562
+ distance: float = 0.1,
563
+ debug_enabled: bool = False,
564
+ ) -> Response:
423
565
  """
424
566
  Get the nearest functions
425
567
  :param fpath: File path for binary to analyse
@@ -436,10 +578,11 @@ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
436
578
  if bid == -1:
437
579
  raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
438
580
 
439
- params = {"result_per_function": nns,
440
- "debug_mode": debug_enabled,
441
- "distance": distance,
442
- }
581
+ params = {
582
+ "result_per_function": nns,
583
+ "debug_mode": debug_enabled,
584
+ "distance": distance,
585
+ }
443
586
 
444
587
  res: Response = reveng_req(requests.post, end_point, json_data=params)
445
588
 
@@ -484,19 +627,54 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
484
627
  return res
485
628
 
486
629
 
630
+ def RE_binary_additonal_details(fpath: str, binary_id: int = None) -> Response:
631
+ bin_id = re_binary_id(fpath)
632
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
633
+ if bid == -1:
634
+ raise ReaitError(f"No matches found for hash: {bin_id}")
635
+
636
+ endpoint = f"v2/binaries/{bid}/additional-details"
637
+ res: Response = reveng_req(requests.get, endpoint)
638
+ res.raise_for_status()
639
+
640
+ logger.info(f"Additional Details Info({fpath}):\n")
641
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
642
+ return res
643
+
644
+
645
+ def RE_binary_details(fpath: str, binary_id: int = None) -> Response:
646
+ bin_id = re_binary_id(fpath)
647
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
648
+ if bid == -1:
649
+ raise ReaitError(f"No matches found for hash: {bin_id}")
650
+
651
+ endpoint = f"v2/binaries/{bid}/details"
652
+ res: Response = reveng_req(requests.get, endpoint)
653
+ res.raise_for_status()
654
+
655
+ logger.info(f"Details Info({fpath}):\n")
656
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
657
+ return res
658
+
659
+
487
660
  def RE_functions_rename(function_id: int, new_name: str) -> Response:
488
661
  """
489
662
  Send the new name of a function to C2
490
663
  :param function_id: ID of a function
491
664
  :param new_name: New function name
492
665
  """
493
- res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
494
- json_data={"new_name": new_name})
666
+ res: Response = reveng_req(
667
+ requests.post,
668
+ f"v1/functions/rename/{function_id}",
669
+ json_data={"new_name": new_name},
670
+ )
495
671
 
496
672
  if res.ok:
497
- logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
673
+ logger.info("FunctionId %d has been renamed with '%s'.",
674
+ function_id, new_name)
498
675
  else:
499
- logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
676
+ logger.warning("Error, cannot rename FunctionId %d. %s",
677
+ function_id, res.text)
500
678
 
501
679
  res.raise_for_status()
502
680
  return res
@@ -507,17 +685,24 @@ def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
507
685
  Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
508
686
  :param mapping: dictionary containing the function_id as key and function_name as value
509
687
  """
510
- params = {"new_name_mapping":
511
- [{"function_id": func_id,
512
- "function_name": func_name,
513
- } for func_id, func_name in mapping.items()]
514
- }
515
-
516
- res: Response = reveng_req(requests.post, "v1/functions/batch/rename", json_data=params)
688
+ params = {
689
+ "new_name_mapping": [
690
+ {
691
+ "function_id": func_id,
692
+ "function_name": func_name,
693
+ }
694
+ for func_id, func_name in mapping.items()
695
+ ]
696
+ }
697
+
698
+ res: Response = reveng_req(
699
+ requests.post, "v1/functions/batch/rename", json_data=params
700
+ )
517
701
 
518
702
  res.raise_for_status()
519
703
  return res
520
704
 
705
+
521
706
  def RE_settings() -> Response:
522
707
  """
523
708
  Get the configuration settings
@@ -553,6 +738,59 @@ def RE_authentication() -> Response:
553
738
  return res
554
739
 
555
740
 
741
+ def RE_functions_list(
742
+ analysis_id: int,
743
+ search_term: str = "",
744
+ min_v_address: int = 0,
745
+ max_v_address: int = 0,
746
+ ) -> Response:
747
+ """
748
+ Get the functions of a binary
749
+ :param binary_id: Binary ID
750
+ """
751
+ params = {}
752
+ if search_term:
753
+ params["search_term"] = search_term
754
+
755
+ if min_v_address != 0:
756
+ params["min_v_address"] = min_v_address
757
+
758
+ if max_v_address != 0:
759
+ params["max_v_address"] = max_v_address
760
+
761
+ res: Response = reveng_req(
762
+ requests.get, f"v2/analyses/{analysis_id}/info/functions/list", params=params
763
+ )
764
+
765
+ res.raise_for_status()
766
+
767
+ return res
768
+
769
+
770
+ def RE_function_callers_callees(function: int) -> Response:
771
+ """
772
+ Get the callers and callees of a functions
773
+ :param function: Function ID
774
+ """
775
+ res: Response = reveng_req(
776
+ requests.get, f"v2/functions/{function}/callees_callers")
777
+
778
+ res.raise_for_status()
779
+ return res
780
+
781
+
782
+ def RE_analysis_info(analysis_id: int) -> Response:
783
+ """
784
+ Get the analysis information
785
+ :param analysis_id: Analysis ID
786
+ """
787
+ res: Response = reveng_req(
788
+ requests.get, f"v2/analyses/{analysis_id}/info/basic")
789
+
790
+ res.raise_for_status()
791
+ return res
792
+
793
+
556
794
  def re_binary_id(fpath: str) -> str:
557
795
  """
558
796
  Take the SHA-256 hash of binary file
@@ -567,9 +805,7 @@ def re_binary_id(fpath: str) -> str:
567
805
 
568
806
  return hf.hexdigest()
569
807
  else:
570
- logger.error("File '%s' doesn't exist or isn't readable", fpath)
571
-
572
- return "Undefined"
808
+ return fpath
573
809
 
574
810
 
575
811
  def _binary_isa(binary: Binary, exec_type: str) -> str:
@@ -578,10 +814,9 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
578
814
  """
579
815
  if exec_type == "ELF":
580
816
  arch = binary.header.machine_type
581
-
582
- if arch == ELF.ARCH.i386:
817
+ if arch == ELF.ARCH.I386:
583
818
  return "x86"
584
- elif arch == ELF.ARCH.x86_64:
819
+ elif arch == ELF.ARCH.X86_64:
585
820
  return "x86_64"
586
821
  elif arch == ELF.ARCH.ARM:
587
822
  return "ARM32"
@@ -589,7 +824,6 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
589
824
  return "ARM64"
590
825
  elif exec_type == "PE":
591
826
  machine_type = binary.header.machine
592
-
593
827
  if machine_type == PE.Header.MACHINE_TYPES.I386:
594
828
  return "x86"
595
829
  elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
@@ -601,17 +835,23 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
601
835
  elif exec_type == "Mach-O":
602
836
  cpu_type = binary.header.cpu_type
603
837
 
604
- if cpu_type == MachO.CPU_TYPES.x86:
838
+ if cpu_type == MachO.Header.CPU_TYPE.X86:
605
839
  return "x86"
606
- elif cpu_type == MachO.CPU_TYPES.x86_64:
840
+ elif cpu_type == MachO.Header.CPU_TYPE.X86_64:
607
841
  return "x86_64"
608
- elif cpu_type == MachO.CPU_TYPES.ARM:
842
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM:
609
843
  return "ARM32"
610
- elif cpu_type == MachO.CPU_TYPES.ARM64:
844
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM64:
611
845
  return "ARM64"
612
846
 
613
- logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
614
- raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
847
+ logger.error(
848
+ "Error, could not determine or unsupported "
849
+ f"ISA for binary format: {exec_type}."
850
+ )
851
+ raise RuntimeError(
852
+ "Error, could not determine or unsupported "
853
+ f"ISA for binary format: {exec_type}."
854
+ )
615
855
 
616
856
 
617
857
  def _binary_format(binary: Binary) -> str:
@@ -625,8 +865,12 @@ def _binary_format(binary: Binary) -> str:
625
865
  if binary.format == Binary.FORMATS.MACHO:
626
866
  return "Mach-O"
627
867
 
628
- logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
629
- raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
868
+ logger.error(
869
+ "Error, could not determine or unsupported" f" binary format: {binary.format}."
870
+ )
871
+ raise RuntimeError(
872
+ "Error, could not determine or " f"unsupported binary format: {binary.format}"
873
+ )
630
874
 
631
875
 
632
876
  def file_type(fpath: str) -> tuple[str, str]:
@@ -656,17 +900,294 @@ def parse_config() -> None:
656
900
  with open(fpath) as fd:
657
901
  config = tomli.loads(fd.read())
658
902
 
659
- for key in ("apikey", "host", "model",):
903
+ for key in (
904
+ "apikey",
905
+ "host",
906
+ "model",
907
+ ):
660
908
  if key in config:
661
909
  re_conf[key] = config[key]
662
910
  else:
663
911
  logger.info("File %s doesn't exist or isn't readable", fpath)
664
912
 
665
913
 
666
- def angular_distance(x, y) -> float:
914
+ def RE_analysis_id(fpath: str, binary_id: int = 0) -> Response:
915
+ """
916
+ Get the Analysis ID for the Binary ID
917
+ :param fpath: File path for binary to analyse
918
+ :param binary_id: ID of binary
919
+ """
920
+ bin_id = re_binary_id(fpath)
921
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
922
+
923
+ end_point = f"v2/analyses/lookup/{bid}"
924
+
925
+ if bid == -1:
926
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
927
+
928
+ res: Response = reveng_req(requests.get, end_point)
929
+
930
+ logger.info("Analysis ID for %s:\n%s", fpath, res.text)
931
+
932
+ res.raise_for_status()
933
+ return res
934
+
935
+
936
+ def RE_generate_data_types(analysis_id: int, function_ids: list[int]) -> Response:
667
937
  """
668
- Compute angular distance between two embedding vectors
669
- Normalised euclidian distance
938
+ Generate data types for the analysis
939
+ :param aid: Analysis ID
670
940
  """
671
- cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
672
- return 1.0 - arccos(cos) / pi
941
+ end_point = f"/v2/analyses/{analysis_id}/functions/data_types"
942
+
943
+ res: Response = reveng_req(
944
+ requests.post, end_point, json_data={"function_ids": function_ids}
945
+ )
946
+ res.raise_for_status()
947
+ return res
948
+
949
+
950
+ def RE_list_data_types(analysis_id: int, function_ids: list[int]) -> Response:
951
+ """
952
+ List data types for the analysis
953
+ :param aid: Analysis ID
954
+ :param function_ids: List of function IDs
955
+ """
956
+ end_point = f"/v2/analyses/{analysis_id}/functions/data_types"
957
+
958
+ res: Response = reveng_req(
959
+ requests.get, end_point, json_data={"function_ids": function_ids}
960
+ )
961
+ res.raise_for_status()
962
+ return res
963
+
964
+
965
+ def RE_begin_ai_decompilation(function_id: int) -> Response:
966
+ """
967
+ Begin AI decompilation for the function
968
+ :param function_id: Function ID
969
+ """
970
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
971
+
972
+ res: Response = reveng_req(
973
+ requests.post,
974
+ end_point,
975
+ data=None,
976
+ )
977
+ res.raise_for_status()
978
+ return res
979
+
980
+
981
+ def RE_poll_ai_decompilation(function_id: int) -> Response:
982
+ """
983
+ Poll AI decompilation for the function
984
+ :param function_id: Function ID
985
+ """
986
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
987
+
988
+ res: Response = reveng_req(
989
+ requests.get,
990
+ end_point,
991
+ )
992
+ res.raise_for_status()
993
+ return res
994
+
995
+
996
+ def RE_analysis_lookup(binary_id: int) -> Response:
997
+ """
998
+ Get the Analysis ID from a Binary ID
999
+ :param binary_id: Binary ID
1000
+ """
1001
+ end_point = f"/v2/analyses/lookup/{binary_id}"
1002
+ res: Response = reveng_req(requests.get, end_point)
1003
+ res.raise_for_status()
1004
+ return res
1005
+
1006
+
1007
+ def RE_collections_search(
1008
+ page: int = 1,
1009
+ page_size: int = 10,
1010
+ search: str = "",
1011
+ ) -> Response:
1012
+ """
1013
+ """
1014
+ end_point = "/v2/search/collections"
1015
+ res: Response = reveng_req(requests.get, end_point, params={
1016
+ "page": page,
1017
+ "page_size": page_size,
1018
+ "partial_collection_name": search,
1019
+ })
1020
+ res.raise_for_status()
1021
+ return res
1022
+
1023
+
1024
+ def RE_binaries_search(
1025
+ page: int = 1,
1026
+ page_size: int = 10,
1027
+ search: str = "",
1028
+ ) -> Response:
1029
+ """
1030
+ """
1031
+ end_point = "/v2/search/binaries"
1032
+ res: Response = reveng_req(requests.get, end_point, params={
1033
+ "page": page,
1034
+ "page_size": page_size,
1035
+ "partial_name": search,
1036
+ })
1037
+ res.raise_for_status()
1038
+ return res
1039
+
1040
+
1041
+ # Bin_id is referred to as hash in this program - to maintain usage BID = id
1042
+ # of a binary bin_id = hash
1043
+ # Assumes a file has been passed, correct hash only
1044
+ # Returns the BID of the binary_id (hash)
1045
+ def RE_latest_bid(bin_id: str) -> int:
1046
+ res: Response = reveng_req(
1047
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1048
+ )
1049
+
1050
+ bid = -1
1051
+
1052
+ if res.ok:
1053
+ # Filter the result who matches the SHA-256
1054
+ binaries = list(
1055
+ filter(
1056
+ lambda binary: binary["sha_256_hash"] == bin_id,
1057
+ res.json()["query_results"],
1058
+ )
1059
+ )
1060
+
1061
+ # Check only one record is returned
1062
+ if len(binaries) == 1:
1063
+ binary = binaries[0]
1064
+ bid = binary["binary_id"]
1065
+
1066
+ logger.info(
1067
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1068
+ bid,
1069
+ binary["binary_name"],
1070
+ binary["creation"],
1071
+ binary["model_name"],
1072
+ binary["status"],
1073
+ )
1074
+ elif len(binaries) > 1:
1075
+ binaries.sort(
1076
+ key=lambda binary: datetime.fromisoformat(
1077
+ binary["creation"]
1078
+ ).timestamp(),
1079
+ reverse=True,
1080
+ )
1081
+
1082
+ logger.info("%d matches found for hash: %s", len(binaries), bin_id)
1083
+
1084
+ options_dict = {}
1085
+
1086
+ for idx, binary in enumerate(binaries):
1087
+ logger.info(
1088
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1089
+ idx,
1090
+ binary["binary_id"],
1091
+ binary["binary_name"],
1092
+ binary["creation"],
1093
+ binary["model_name"],
1094
+ binary["status"],
1095
+ )
1096
+
1097
+ options_dict[idx] = binary["binary_id"]
1098
+ try:
1099
+ bid = options_dict[0]
1100
+ except Exception:
1101
+ bid = options_dict[0]
1102
+ logger.warning("Select the most recent analysis - ID: %d", bid)
1103
+ else:
1104
+ logger.warning("No matches found for hash: %s", bin_id)
1105
+ else:
1106
+ logger.warning("Bad Request: %s", res.text)
1107
+
1108
+ res.raise_for_status()
1109
+ return bid
1110
+
1111
+
1112
+ # NOTE: newest API as per documentation still using /v1/ prefix
1113
+ def RE_models() -> Response:
1114
+ res: Response = reveng_req(requests.get, "v1/models")
1115
+
1116
+ res.raise_for_status()
1117
+ return res
1118
+
1119
+
1120
+ # NOTE: newest API as per documentation still using /v1/ prefix
1121
+ def RE_functions_dump(function_ids: list[int]) -> Response:
1122
+ res: Response = reveng_req(
1123
+ requests.post, "v1/functions/dump", json_data={"function_id_list": function_ids}
1124
+ )
1125
+
1126
+ res.raise_for_status()
1127
+ return res
1128
+
1129
+
1130
+ # NOTE: this API endpoint does not actually exist
1131
+ def RE_generate_summaries(function_id: int) -> Response:
1132
+ res: Response = reveng_req(
1133
+ requests.get, f"v1/functions/blocks_comments/{function_id}"
1134
+ )
1135
+
1136
+ res.raise_for_status()
1137
+ return res
1138
+
1139
+
1140
+ def RE_collection_search(search: str) -> Response:
1141
+ res: Response = reveng_req(
1142
+ requests.get,
1143
+ "v1/collections/quick/search",
1144
+ params={"search_term": search if search else ""},
1145
+ )
1146
+
1147
+ res.raise_for_status()
1148
+ return res
1149
+
1150
+
1151
+ def RE_recent_analysis(
1152
+ status: str = "All", scope: str = "ALL", nb_analysis: int = 50
1153
+ ) -> Response:
1154
+ res: Response = reveng_req(
1155
+ requests.get,
1156
+ "v1/analyse/recent",
1157
+ json_data={"status": status, "scope": scope, "n": nb_analysis},
1158
+ )
1159
+
1160
+ res.raise_for_status()
1161
+ return res
1162
+
1163
+
1164
+ def RE_search(fpath: str) -> Response:
1165
+ bin_id = re_binary_id(fpath)
1166
+
1167
+ res: Response = reveng_req(
1168
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1169
+ )
1170
+
1171
+ res.raise_for_status()
1172
+ return res
1173
+
1174
+
1175
+ # NOTE: this uses a newer API version
1176
+ def RE_similar_functions(
1177
+ function_id: int,
1178
+ limit: int = 20,
1179
+ distance: int | float = 0.09999999999999998,
1180
+ debug: bool = False,
1181
+ ):
1182
+ params = {
1183
+ "distance": distance,
1184
+ "limit": limit,
1185
+ "debug": debug,
1186
+ }
1187
+
1188
+ res: Response = reveng_req(
1189
+ requests.get, f"v2/functions/{function_id}/similar-functions", params=params
1190
+ )
1191
+
1192
+ res.raise_for_status()
1193
+ return res