reait 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,28 +1,25 @@
1
- # -*- coding: utf-8 -*-
2
1
  from __future__ import print_function, annotations
3
2
 
3
+ from os import access, R_OK, environ
4
+ from os.path import basename, isfile, expanduser, getsize
5
+
4
6
  import json
5
- import tomli
6
7
  import logging
7
8
  import requests
8
-
9
- from hashlib import sha256
9
+ import tomli
10
10
  from datetime import datetime
11
-
12
- from sklearn.metrics.pairwise import cosine_similarity
13
- from os import access, R_OK
14
- from os.path import basename, isfile, expanduser, getsize
15
- from requests import request, Response, HTTPError
11
+ from hashlib import sha256
12
+ from lief import parse, Binary, ELF, PE, MachO
16
13
  from numpy import array, vstack, dot, arccos, pi
17
14
  from pandas import DataFrame
18
- from lief import parse, Binary, ELF, PE, MachO
15
+ from requests import request, Response, HTTPError
16
+ from sklearn.metrics.pairwise import cosine_similarity
19
17
 
20
- __version__ = "1.0.0"
18
+ __version__ = "1.1.0"
21
19
 
22
20
  re_conf = {
23
- "apikey": "l1br3",
24
- "host": "https://api.reveng.ai",
25
- "model": "binnet-0.3-x86",
21
+ "apikey": environ.get("REAI_API_KEY", ""),
22
+ "host": environ.get("REAI_API_HOST", "https://api.reveng.ai"),
26
23
  }
27
24
 
28
25
 
@@ -36,16 +33,28 @@ class ReaitError(HTTPError):
36
33
  response.reason = reason
37
34
  response.status_code = 404
38
35
  response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
39
- response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
36
+ response.url = (
37
+ f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
38
+ if end_point
39
+ else None
40
+ )
40
41
 
41
42
  super().__init__(reason, response=response)
42
43
 
43
44
 
44
- def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
45
- params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
45
+ def reveng_req(
46
+ req: request,
47
+ end_point: str,
48
+ data: dict = None,
49
+ ex_headers: dict = None,
50
+ params: dict = None,
51
+ json_data: dict = None,
52
+ timeout: int = 60,
53
+ files: dict = None,
54
+ ) -> Response:
46
55
  """
47
56
  Constructs and sends a Request
48
- :param r: Method for the new Request
57
+ :param req: Method for the new Request
49
58
  :param end_point: Endpoint to add to the base URL
50
59
  :param ex_headers: Extended HTTP headers to add
51
60
  :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
@@ -60,22 +69,48 @@ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict =
60
69
  if ex_headers:
61
70
  headers.update(ex_headers)
62
71
 
63
- logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
64
- r.__name__.upper(), url, headers, data, json_data, params, files)
65
-
66
- response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
67
-
68
- logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
69
- r.__name__.upper(), url, response.headers, response.status_code, response.text)
72
+ logger.debug(
73
+ "Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
74
+ req.__name__.upper(),
75
+ url,
76
+ headers,
77
+ data,
78
+ json_data,
79
+ params,
80
+ files,
81
+ )
82
+
83
+ response: Response = req(
84
+ url,
85
+ headers=headers,
86
+ json=json_data,
87
+ data=data,
88
+ params=params,
89
+ timeout=timeout,
90
+ files=files,
91
+ )
92
+
93
+ logger.debug(
94
+ "Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
95
+ req.__name__.upper(),
96
+ url,
97
+ response.headers,
98
+ response.status_code,
99
+ response.text,
100
+ )
70
101
 
71
102
  return response
72
103
 
73
104
 
74
105
  def re_hash_check(bin_id: str) -> bool:
75
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
106
+ res: Response = reveng_req(
107
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
108
+ )
76
109
 
77
110
  if res.ok:
78
- return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
111
+ return any(
112
+ binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"]
113
+ )
79
114
  else:
80
115
  logger.warning("Bad Request: %s", res.text)
81
116
 
@@ -86,37 +121,63 @@ def re_hash_check(bin_id: str) -> bool:
86
121
  # Assumes a file has been passed, correct hash only
87
122
  # Returns the BID of the binary_id (hash)
88
123
  def re_bid_search(bin_id: str) -> int:
89
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
124
+ res: Response = reveng_req(
125
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
126
+ )
90
127
 
91
128
  bid = -1
92
129
 
93
130
  if res.ok:
94
131
  # Filter the result who matches the SHA-256
95
- binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
132
+ binaries = list(
133
+ filter(
134
+ lambda binary: binary["sha_256_hash"] == bin_id,
135
+ res.json()["query_results"],
136
+ )
137
+ )
96
138
 
97
139
  # Check only one record is returned
98
140
  if len(binaries) == 1:
99
141
  binary = binaries[0]
100
142
  bid = binary["binary_id"]
101
143
 
102
- logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
103
- bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
144
+ logger.info(
145
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
146
+ bid,
147
+ binary["binary_name"],
148
+ binary["creation"],
149
+ binary["model_name"],
150
+ binary["status"],
151
+ )
104
152
  elif len(binaries) > 1:
105
- binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
153
+ binaries.sort(
154
+ key=lambda binary: datetime.fromisoformat(
155
+ binary["creation"]
156
+ ).timestamp(),
157
+ reverse=True,
158
+ )
106
159
 
107
160
  logger.info("%d matches found for hash: %s", len(binaries), bin_id)
108
161
 
109
162
  options_dict = {}
110
163
 
111
164
  for idx, binary in enumerate(binaries):
112
- logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
113
- idx, binary["binary_id"], binary["binary_name"], binary["creation"],
114
- binary["model_name"], binary["status"])
165
+ logger.info(
166
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
167
+ idx,
168
+ binary["binary_id"],
169
+ binary["binary_name"],
170
+ binary["creation"],
171
+ binary["model_name"],
172
+ binary["status"],
173
+ )
115
174
 
116
175
  options_dict[idx] = binary["binary_id"]
117
176
 
118
177
  try:
119
- user_input = input("[+] Please enter the option you want to use for this operation:")
178
+ user_input = input(
179
+ "[+] Please enter the option you want to use for this operation:"
180
+ )
120
181
 
121
182
  option_number = int(user_input)
122
183
 
@@ -157,16 +218,32 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
157
218
  elif res.status_code == 404:
158
219
  logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
159
220
  else:
160
- logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
221
+ logger.error(
222
+ "Error deleting binary %s under. Server returned %d.",
223
+ bin_id,
224
+ res.status_code,
225
+ )
161
226
 
162
227
  res.raise_for_status()
163
228
  return res
164
229
 
165
230
 
166
- def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
167
- platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
168
- command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
169
- duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
231
+ def RE_analyse(
232
+ fpath: str,
233
+ model_name: str = None,
234
+ isa_options: str = None,
235
+ platform_options: str = None,
236
+ file_options: str = None,
237
+ dynamic_execution: bool = False,
238
+ command_line_args: str = None,
239
+ binary_scope: str = None,
240
+ tags: list = None,
241
+ priority: int = 0,
242
+ duplicate: bool = False,
243
+ symbols: dict = None,
244
+ debug_fpath: str = None,
245
+ skip_scraping: bool = False,
246
+ ) -> Response:
170
247
  """
171
248
  Start analysis job for binary file
172
249
  :param fpath: File path for binary to analyse
@@ -182,6 +259,7 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
182
259
  :param duplicate: Duplicate an existing binary
183
260
  :param symbols: JSON object containing the base address and the list of functions
184
261
  :param debug_fpath: File path for debug file
262
+ :param skip_scraping: Disable/Enable auto-tagging of binary sample in relevant APIs
185
263
  """
186
264
  bin_id = re_binary_id(fpath)
187
265
  result = re_hash_check(bin_id)
@@ -189,13 +267,19 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
189
267
  end_point = "v1/analyse/"
190
268
 
191
269
  if result and duplicate is False:
192
- logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
193
- bin_id)
270
+ logger.error(
271
+ "Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
272
+ bin_id,
273
+ )
194
274
  raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
195
275
 
196
276
  filename = basename(fpath)
197
277
 
198
- params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
278
+ params = {
279
+ "file_name": filename,
280
+ "size_in_bytes": getsize(fpath),
281
+ "sha_256_hash": bin_id,
282
+ }
199
283
 
200
284
  if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
201
285
  try:
@@ -205,18 +289,30 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
205
289
  params["debug_hash"] = debug["sha_256_hash"]
206
290
  except HTTPError:
207
291
  pass
208
-
209
- for p_name in ("model_name", "isa_options", "platform_options", "file_options",
210
- "dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
292
+
293
+ for p_name in (
294
+ "model_name",
295
+ "isa_options",
296
+ "platform_options",
297
+ "file_options",
298
+ "dynamic_execution",
299
+ "command_line_args",
300
+ "binary_scope",
301
+ "tags",
302
+ "priority",
303
+ "symbols",
304
+ "skip_scraping",
305
+ ):
211
306
  p_value = locals()[p_name]
212
307
 
213
308
  if p_value:
214
309
  params[p_name] = p_value
215
310
 
216
311
  res: Response = reveng_req(requests.post, end_point, json_data=params)
217
-
218
312
  if res.ok:
219
- logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
313
+ logger.info(
314
+ "Successfully submitted binary for analysis. %s - %s", fpath, bin_id
315
+ )
220
316
  elif res.status_code == 400:
221
317
  if "error" in res.json().keys():
222
318
  logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
@@ -239,22 +335,32 @@ def RE_upload(fpath: str) -> Response:
239
335
  res = Response()
240
336
  res.status_code = 200
241
337
  res.url = f"{re_conf['host']}/v1/upload"
242
- res._content = ('{0}"success": true,'
243
- '"message": "File already uploaded!",'
244
- '"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
338
+ res._content = (
339
+ (
340
+ '{0}"success": true,'
341
+ '"message": "File already uploaded!",'
342
+ '"sha_256_hash": "{1}"{2}'
343
+ )
344
+ .format("{", bin_id, "}")
345
+ .encode()
346
+ )
245
347
  else:
246
348
  with open(fpath, "rb") as fd:
247
349
  res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
248
350
 
249
351
  if res.ok:
250
- logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
352
+ logger.info(
353
+ "Successfully uploaded binary to your account. %s - %s", fpath, bin_id
354
+ )
251
355
  elif res.status_code == 400:
252
356
  if "error" in res.json().keys():
253
357
  logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
254
358
  elif res.status_code == 413:
255
359
  logger.warning("File too large. Please upload files under 10MB.")
256
360
  elif res.status_code == 500:
257
- logger.error("Internal Server Error. Please contact support. Skipping upload...")
361
+ logger.error(
362
+ "Internal Server Error. Please contact support. Skipping upload..."
363
+ )
258
364
 
259
365
  res.raise_for_status()
260
366
  return res
@@ -277,8 +383,10 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
277
383
  res: Response = reveng_req(requests.get, end_point)
278
384
 
279
385
  if res.status_code == 400:
280
- logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
281
- bin_id)
386
+ logger.warning(
387
+ "Analysis for %s still in progress. Please check the logs (-l) and try again later.",
388
+ bin_id,
389
+ )
282
390
 
283
391
  res.raise_for_status()
284
392
  return res
@@ -376,7 +484,11 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
376
484
  df = DataFrame(data=embeddings)
377
485
  np_embedding = array(embedding).reshape(1, -1)
378
486
  source_embeddings = vstack(df["embedding"].values)
379
- closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
487
+ closest = (
488
+ cosine_similarity(source_embeddings, np_embedding)
489
+ .squeeze()
490
+ .argsort()[::-1][:nns]
491
+ )
380
492
  distances = cosine_similarity(source_embeddings[closest], np_embedding)
381
493
 
382
494
  # match closest embeddings with similarity
@@ -384,16 +496,25 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
384
496
 
385
497
  # create json similarity object
386
498
  similarities = list(zip(distances, closest_df.index.tolist()))
387
- json_sims = [{"similaritiy": float(d[0]),
388
- "vaddr": int(df.iloc[v]["vaddr"]),
389
- "name": str(df.iloc[v]["name"]),
390
- "size": int(df.iloc[v]["size"])
391
- } for d, v in similarities]
499
+ json_sims = [
500
+ {
501
+ "similaritiy": float(d[0]),
502
+ "vaddr": int(df.iloc[v]["vaddr"]),
503
+ "name": str(df.iloc[v]["name"]),
504
+ "size": int(df.iloc[v]["size"]),
505
+ }
506
+ for d, v in similarities
507
+ ]
392
508
  return json_sims
393
509
 
394
510
 
395
- def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
396
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
511
+ def RE_nearest_symbols_batch(
512
+ function_ids: list[int],
513
+ nns: int = 5,
514
+ collections: list[str] = None,
515
+ distance: float = 0.1,
516
+ debug_enabled: bool = False,
517
+ ) -> Response:
397
518
  """
398
519
  Get nearest functions to a passed function ids
399
520
  :param function_ids: List of function ids
@@ -402,10 +523,12 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
402
523
  :param distance: How close we want the ANN search to filter for
403
524
  :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
404
525
  """
405
- params = {"function_id_list": function_ids,
406
- "result_per_function": nns,
407
- "debug_mode": debug_enabled,
408
- "distance": distance,}
526
+ params = {
527
+ "function_id_list": function_ids,
528
+ "result_per_function": nns,
529
+ "debug_mode": debug_enabled,
530
+ "distance": distance,
531
+ }
409
532
 
410
533
  if collections:
411
534
  # api param is collection, not collections
@@ -417,8 +540,13 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
417
540
  return res
418
541
 
419
542
 
420
- def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
421
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
543
+ def RE_nearest_functions(
544
+ fpath: str,
545
+ binary_id: int = 0,
546
+ nns: int = 5,
547
+ distance: float = 0.1,
548
+ debug_enabled: bool = False,
549
+ ) -> Response:
422
550
  """
423
551
  Get the nearest functions
424
552
  :param fpath: File path for binary to analyse
@@ -435,9 +563,11 @@ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
435
563
  if bid == -1:
436
564
  raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
437
565
 
438
- params = {"result_per_function": nns,
439
- "debug_mode": debug_enabled,
440
- "distance": distance, }
566
+ params = {
567
+ "result_per_function": nns,
568
+ "debug_mode": debug_enabled,
569
+ "distance": distance,
570
+ }
441
571
 
442
572
  res: Response = reveng_req(requests.post, end_point, json_data=params)
443
573
 
@@ -482,14 +612,47 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
482
612
  return res
483
613
 
484
614
 
615
+ def RE_binary_additonal_details(fpath: str, binary_id: int = None) -> Response:
616
+ bin_id = re_binary_id(fpath)
617
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
618
+ if bid == -1:
619
+ raise ReaitError(f"No matches found for hash: {bin_id}")
620
+
621
+ endpoint = f"v2/binaries/{bid}/additional-details"
622
+ res: Response = reveng_req(requests.get, endpoint)
623
+ res.raise_for_status()
624
+
625
+ logger.info(f"Additional Details Info({fpath}):\n")
626
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
627
+ return res
628
+
629
+
630
+ def RE_binary_details(fpath: str, binary_id: int = None) -> Response:
631
+ bin_id = re_binary_id(fpath)
632
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
633
+ if bid == -1:
634
+ raise ReaitError(f"No matches found for hash: {bin_id}")
635
+
636
+ endpoint = f"v2/binaries/{bid}/details"
637
+ res: Response = reveng_req(requests.get, endpoint)
638
+ res.raise_for_status()
639
+
640
+ logger.info(f"Details Info({fpath}):\n")
641
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
642
+ return res
643
+
644
+
485
645
  def RE_functions_rename(function_id: int, new_name: str) -> Response:
486
646
  """
487
647
  Send the new name of a function to C2
488
648
  :param function_id: ID of a function
489
649
  :param new_name: New function name
490
650
  """
491
- res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
492
- json_data={"new_name": new_name})
651
+ res: Response = reveng_req(
652
+ requests.post,
653
+ f"v1/functions/rename/{function_id}",
654
+ json_data={"new_name": new_name},
655
+ )
493
656
 
494
657
  if res.ok:
495
658
  logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
@@ -500,6 +663,29 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
500
663
  return res
501
664
 
502
665
 
666
+ def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
667
+ """
668
+ Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
669
+ :param mapping: dictionary containing the function_id as key and function_name as value
670
+ """
671
+ params = {
672
+ "new_name_mapping": [
673
+ {
674
+ "function_id": func_id,
675
+ "function_name": func_name,
676
+ }
677
+ for func_id, func_name in mapping.items()
678
+ ]
679
+ }
680
+
681
+ res: Response = reveng_req(
682
+ requests.post, "v1/functions/batch/rename", json_data=params
683
+ )
684
+
685
+ res.raise_for_status()
686
+ return res
687
+
688
+
503
689
  def RE_settings() -> Response:
504
690
  """
505
691
  Get the configuration settings
@@ -535,6 +721,57 @@ def RE_authentication() -> Response:
535
721
  return res
536
722
 
537
723
 
724
+ def RE_functions_list(
725
+ analysis_id: int,
726
+ search_term: str = "",
727
+ min_v_address: int = 0,
728
+ max_v_address: int = 0,
729
+ ) -> Response:
730
+ """
731
+ Get the functions of a binary
732
+ :param binary_id: Binary ID
733
+ """
734
+ params = {}
735
+ if search_term:
736
+ params["search_term"] = search_term
737
+
738
+ if min_v_address != 0:
739
+ params["min_v_address"] = min_v_address
740
+
741
+ if max_v_address != 0:
742
+ params["max_v_address"] = max_v_address
743
+
744
+ res: Response = reveng_req(
745
+ requests.get, f"v2/analyses/{analysis_id}/info/functions/list", params=params
746
+ )
747
+
748
+ res.raise_for_status()
749
+
750
+ return res
751
+
752
+
753
+ def RE_function_callers_callees(function: int) -> Response:
754
+ """
755
+ Get the callers and callees of a functions
756
+ :param function: Function ID
757
+ """
758
+ res: Response = reveng_req(requests.get, f"v2/functions/{function}/callees_callers")
759
+
760
+ res.raise_for_status()
761
+ return res
762
+
763
+
764
+ def RE_analysis_info(analysis_id: int) -> Response:
765
+ """
766
+ Get the analysis information
767
+ :param analysis_id: Analysis ID
768
+ """
769
+ res: Response = reveng_req(requests.get, f"v2/analyses/{analysis_id}/info/basic")
770
+
771
+ res.raise_for_status()
772
+ return res
773
+
774
+
538
775
  def re_binary_id(fpath: str) -> str:
539
776
  """
540
777
  Take the SHA-256 hash of binary file
@@ -549,9 +786,7 @@ def re_binary_id(fpath: str) -> str:
549
786
 
550
787
  return hf.hexdigest()
551
788
  else:
552
- logger.error("File '%s' doesn't exist or isn't readable", fpath)
553
-
554
- return "undefined"
789
+ return fpath
555
790
 
556
791
 
557
792
  def _binary_isa(binary: Binary, exec_type: str) -> str:
@@ -560,10 +795,9 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
560
795
  """
561
796
  if exec_type == "ELF":
562
797
  arch = binary.header.machine_type
563
-
564
- if arch == ELF.ARCH.i386:
798
+ if arch == ELF.ARCH.I386:
565
799
  return "x86"
566
- elif arch == ELF.ARCH.x86_64:
800
+ elif arch == ELF.ARCH.X86_64:
567
801
  return "x86_64"
568
802
  elif arch == ELF.ARCH.ARM:
569
803
  return "ARM32"
@@ -571,7 +805,6 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
571
805
  return "ARM64"
572
806
  elif exec_type == "PE":
573
807
  machine_type = binary.header.machine
574
-
575
808
  if machine_type == PE.Header.MACHINE_TYPES.I386:
576
809
  return "x86"
577
810
  elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
@@ -583,17 +816,23 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
583
816
  elif exec_type == "Mach-O":
584
817
  cpu_type = binary.header.cpu_type
585
818
 
586
- if cpu_type == MachO.CPU_TYPES.x86:
819
+ if cpu_type == MachO.Header.CPU_TYPE.X86:
587
820
  return "x86"
588
- elif cpu_type == MachO.CPU_TYPES.x86_64:
821
+ elif cpu_type == MachO.Header.CPU_TYPE.X86_64:
589
822
  return "x86_64"
590
- elif cpu_type == MachO.CPU_TYPES.ARM:
823
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM:
591
824
  return "ARM32"
592
- elif cpu_type == MachO.CPU_TYPES.ARM64:
825
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM64:
593
826
  return "ARM64"
594
827
 
595
- logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
596
- raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
828
+ logger.error(
829
+ "Error, could not determine or unsupported "
830
+ f"ISA for binary format: {exec_type}."
831
+ )
832
+ raise RuntimeError(
833
+ "Error, could not determine or unsupported "
834
+ f"ISA for binary format: {exec_type}."
835
+ )
597
836
 
598
837
 
599
838
  def _binary_format(binary: Binary) -> str:
@@ -607,8 +846,12 @@ def _binary_format(binary: Binary) -> str:
607
846
  if binary.format == Binary.FORMATS.MACHO:
608
847
  return "Mach-O"
609
848
 
610
- logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
611
- raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
849
+ logger.error(
850
+ "Error, could not determine or unsupported" f" binary format: {binary.format}."
851
+ )
852
+ raise RuntimeError(
853
+ "Error, could not determine or " f"unsupported binary format: {binary.format}"
854
+ )
612
855
 
613
856
 
614
857
  def file_type(fpath: str) -> tuple[str, str]:
@@ -638,17 +881,281 @@ def parse_config() -> None:
638
881
  with open(fpath) as fd:
639
882
  config = tomli.loads(fd.read())
640
883
 
641
- for key in ("apikey", "host", "model",):
884
+ for key in (
885
+ "apikey",
886
+ "host",
887
+ "model",
888
+ ):
642
889
  if key in config:
643
890
  re_conf[key] = config[key]
644
891
  else:
645
892
  logger.info("File %s doesn't exist or isn't readable", fpath)
646
893
 
647
894
 
648
- def angular_distance(x, y) -> float:
895
+ def RE_analysis_id(fpath: str, binary_id: int = 0) -> Response:
896
+ """
897
+ Get the Analysis ID for the Binary ID
898
+ :param fpath: File path for binary to analyse
899
+ :param binary_id: ID of binary
900
+ """
901
+ bin_id = re_binary_id(fpath)
902
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
903
+
904
+ end_point = f"v2/analyses/lookup/{bid}"
905
+
906
+ if bid == -1:
907
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
908
+
909
+ res: Response = reveng_req(requests.get, end_point)
910
+
911
+ logger.info("Analysis ID for %s:\n%s", fpath, res.text)
912
+
913
+ res.raise_for_status()
914
+ return res
915
+
916
+
917
+ def RE_generate_data_types(analysis_id: int, function_ids: list[int]) -> Response:
649
918
  """
650
- Compute angular distance between two embedding vectors
651
- Normalised euclidian distance
919
+ Generate data types for the analysis
920
+ :param aid: Analysis ID
652
921
  """
653
- cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
654
- return 1.0 - arccos(cos) / pi
922
+ end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
923
+
924
+ res: Response = reveng_req(
925
+ requests.post, end_point, json_data={"function_ids": function_ids}
926
+ )
927
+ res.raise_for_status()
928
+ return res
929
+
930
+
931
+ def RE_list_data_types(analysis_id: int, function_ids: list[int]) -> Response:
932
+ """
933
+ List data types for the analysis
934
+ :param aid: Analysis ID
935
+ :param function_ids: List of function IDs
936
+ """
937
+ end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
938
+
939
+ res: Response = reveng_req(
940
+ requests.get, end_point, json_data={"function_ids": function_ids}
941
+ )
942
+ res.raise_for_status()
943
+ return res
944
+
945
+
946
+ def RE_begin_ai_decompilation(function_id: int) -> Response:
947
+ """
948
+ Begin AI decompilation for the function
949
+ :param function_id: Function ID
950
+ """
951
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
952
+
953
+ res: Response = reveng_req(
954
+ requests.post,
955
+ end_point,
956
+ data=None,
957
+ )
958
+ res.raise_for_status()
959
+ return res
960
+
961
+
962
+ def RE_poll_ai_decompilation(function_id: int) -> Response:
963
+ """
964
+ Poll AI decompilation for the function
965
+ :param function_id: Function ID
966
+ """
967
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
968
+
969
+ res: Response = reveng_req(
970
+ requests.get,
971
+ end_point,
972
+ )
973
+ res.raise_for_status()
974
+ return res
975
+
976
+
977
+ def RE_analysis_lookup(binary_id: int) -> Response:
978
+ """
979
+ Get the Analysis ID from a Binary ID
980
+ :param binary_id: Binary ID
981
+ """
982
+ end_point = f"/v2/analyses/lookup/{binary_id}"
983
+ res: Response = reveng_req(requests.get, end_point)
984
+ res.raise_for_status()
985
+ return res
986
+
987
+
988
+ def RE_collections_search(
989
+ page: int = 1,
990
+ page_size: int = 10,
991
+ partial_collection_name: str = "",
992
+ partial_binary_name: str = "",
993
+ partial_binary_sha256: str = "",
994
+ tags: list[str] | str = "",
995
+ model_name: str = "",
996
+ ) -> Response:
997
+ """
998
+ """
999
+ end_point = "/v2/search/collections"
1000
+ res: Response = reveng_req(requests.get, end_point, params={
1001
+ "page": page,
1002
+ "page_size": page_size,
1003
+ "partial_collection_name": partial_collection_name,
1004
+ })
1005
+ res.raise_for_status()
1006
+ return res
1007
+
1008
+
1009
+ # Bin_id is referred to as hash in this program - to maintain usage BID = id
1010
+ # of a binary bin_id = hash
1011
+ # Assumes a file has been passed, correct hash only
1012
+ # Returns the BID of the binary_id (hash)
1013
+ def RE_latest_bid(bin_id: str) -> int:
1014
+ res: Response = reveng_req(
1015
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1016
+ )
1017
+
1018
+ bid = -1
1019
+
1020
+ if res.ok:
1021
+ # Filter the result who matches the SHA-256
1022
+ binaries = list(
1023
+ filter(
1024
+ lambda binary: binary["sha_256_hash"] == bin_id,
1025
+ res.json()["query_results"],
1026
+ )
1027
+ )
1028
+
1029
+ # Check only one record is returned
1030
+ if len(binaries) == 1:
1031
+ binary = binaries[0]
1032
+ bid = binary["binary_id"]
1033
+
1034
+ logger.info(
1035
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1036
+ bid,
1037
+ binary["binary_name"],
1038
+ binary["creation"],
1039
+ binary["model_name"],
1040
+ binary["status"],
1041
+ )
1042
+ elif len(binaries) > 1:
1043
+ binaries.sort(
1044
+ key=lambda binary: datetime.fromisoformat(
1045
+ binary["creation"]
1046
+ ).timestamp(),
1047
+ reverse=True,
1048
+ )
1049
+
1050
+ logger.info("%d matches found for hash: %s", len(binaries), bin_id)
1051
+
1052
+ options_dict = {}
1053
+
1054
+ for idx, binary in enumerate(binaries):
1055
+ logger.info(
1056
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1057
+ idx,
1058
+ binary["binary_id"],
1059
+ binary["binary_name"],
1060
+ binary["creation"],
1061
+ binary["model_name"],
1062
+ binary["status"],
1063
+ )
1064
+
1065
+ options_dict[idx] = binary["binary_id"]
1066
+ try:
1067
+ bid = options_dict[0]
1068
+ except Exception:
1069
+ bid = options_dict[0]
1070
+ logger.warning("Select the most recent analysis - ID: %d", bid)
1071
+ else:
1072
+ logger.warning("No matches found for hash: %s", bin_id)
1073
+ else:
1074
+ logger.warning("Bad Request: %s", res.text)
1075
+
1076
+ res.raise_for_status()
1077
+ return bid
1078
+
1079
+
1080
+ # NOTE: newest API as per documentation still using /v1/ prefix
1081
+ def RE_models() -> Response:
1082
+ res: Response = reveng_req(requests.get, "v1/models")
1083
+
1084
+ res.raise_for_status()
1085
+ return res
1086
+
1087
+
1088
+ # NOTE: newest API as per documentation still using /v1/ prefix
1089
+ def RE_functions_dump(function_ids: list[int]) -> Response:
1090
+ res: Response = reveng_req(
1091
+ requests.post, "v1/functions/dump", json_data={"function_id_list": function_ids}
1092
+ )
1093
+
1094
+ res.raise_for_status()
1095
+ return res
1096
+
1097
+
1098
+ # NOTE: this API endpoint does not actually exist
1099
+ def RE_generate_summaries(function_id: int) -> Response:
1100
+ res: Response = reveng_req(
1101
+ requests.get, f"v1/functions/blocks_comments/{function_id}"
1102
+ )
1103
+
1104
+ res.raise_for_status()
1105
+ return res
1106
+
1107
+
1108
+ def RE_collection_search(search: str) -> Response:
1109
+ res: Response = reveng_req(
1110
+ requests.get,
1111
+ "v1/collections/quick/search",
1112
+ params={"search_term": search if search else ""},
1113
+ )
1114
+
1115
+ res.raise_for_status()
1116
+ return res
1117
+
1118
+
1119
+ def RE_recent_analysis(
1120
+ status: str = "All", scope: str = "ALL", nb_analysis: int = 50
1121
+ ) -> Response:
1122
+ res: Response = reveng_req(
1123
+ requests.get,
1124
+ "v1/analyse/recent",
1125
+ json_data={"status": status, "scope": scope, "n": nb_analysis},
1126
+ )
1127
+
1128
+ res.raise_for_status()
1129
+ return res
1130
+
1131
+
1132
+ def RE_search(fpath: str) -> Response:
1133
+ bin_id = re_binary_id(fpath)
1134
+
1135
+ res: Response = reveng_req(
1136
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1137
+ )
1138
+
1139
+ res.raise_for_status()
1140
+ return res
1141
+
1142
+
1143
+ # NOTE: this uses a newer API version
1144
+ def RE_similar_functions(
1145
+ function_id: int,
1146
+ limit: int = 20,
1147
+ distance: int | float = 0.09999999999999998,
1148
+ debug: bool = False,
1149
+ ):
1150
+ params = {
1151
+ "distance": distance,
1152
+ "limit": limit,
1153
+ "debug": debug,
1154
+ }
1155
+
1156
+ res: Response = reveng_req(
1157
+ requests.get, f"v2/functions/{function_id}/similar-functions", params=params
1158
+ )
1159
+
1160
+ res.raise_for_status()
1161
+ return res