reait 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,28 +1,25 @@
1
- # -*- coding: utf-8 -*-
2
1
  from __future__ import print_function, annotations
3
2
 
3
+ from os import access, R_OK, environ
4
+ from os.path import basename, isfile, expanduser, getsize
5
+
4
6
  import json
5
- import tomli
6
7
  import logging
7
8
  import requests
8
-
9
- from hashlib import sha256
9
+ import tomli
10
10
  from datetime import datetime
11
-
12
- from sklearn.metrics.pairwise import cosine_similarity
13
- from os import access, R_OK
14
- from os.path import basename, isfile, expanduser, getsize
15
- from requests import request, Response, HTTPError
11
+ from hashlib import sha256
12
+ from lief import parse, Binary, ELF, PE, MachO
16
13
  from numpy import array, vstack, dot, arccos, pi
17
14
  from pandas import DataFrame
18
- from lief import parse, Binary, ELF, PE, MachO
15
+ from requests import request, Response, HTTPError
16
+ from sklearn.metrics.pairwise import cosine_similarity
19
17
 
20
- __version__ = "1.0.1"
18
+ __version__ = "1.1.0"
21
19
 
22
20
  re_conf = {
23
- "apikey": "l1br3",
24
- "host": "https://api.reveng.ai",
25
- "model": "binnet-0.3-x86",
21
+ "apikey": environ.get("REAI_API_KEY", ""),
22
+ "host": environ.get("REAI_API_HOST", "https://api.reveng.ai"),
26
23
  }
27
24
 
28
25
 
@@ -36,16 +33,28 @@ class ReaitError(HTTPError):
36
33
  response.reason = reason
37
34
  response.status_code = 404
38
35
  response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
39
- response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
36
+ response.url = (
37
+ f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
38
+ if end_point
39
+ else None
40
+ )
40
41
 
41
42
  super().__init__(reason, response=response)
42
43
 
43
44
 
44
- def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
45
- params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
45
+ def reveng_req(
46
+ req: request,
47
+ end_point: str,
48
+ data: dict = None,
49
+ ex_headers: dict = None,
50
+ params: dict = None,
51
+ json_data: dict = None,
52
+ timeout: int = 60,
53
+ files: dict = None,
54
+ ) -> Response:
46
55
  """
47
56
  Constructs and sends a Request
48
- :param r: Method for the new Request
57
+ :param req: Method for the new Request
49
58
  :param end_point: Endpoint to add to the base URL
50
59
  :param ex_headers: Extended HTTP headers to add
51
60
  :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
@@ -60,22 +69,48 @@ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict =
60
69
  if ex_headers:
61
70
  headers.update(ex_headers)
62
71
 
63
- logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
64
- r.__name__.upper(), url, headers, data, json_data, params, files)
65
-
66
- response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
67
-
68
- logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
69
- r.__name__.upper(), url, response.headers, response.status_code, response.text)
72
+ logger.debug(
73
+ "Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
74
+ req.__name__.upper(),
75
+ url,
76
+ headers,
77
+ data,
78
+ json_data,
79
+ params,
80
+ files,
81
+ )
82
+
83
+ response: Response = req(
84
+ url,
85
+ headers=headers,
86
+ json=json_data,
87
+ data=data,
88
+ params=params,
89
+ timeout=timeout,
90
+ files=files,
91
+ )
92
+
93
+ logger.debug(
94
+ "Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
95
+ req.__name__.upper(),
96
+ url,
97
+ response.headers,
98
+ response.status_code,
99
+ response.text,
100
+ )
70
101
 
71
102
  return response
72
103
 
73
104
 
74
105
  def re_hash_check(bin_id: str) -> bool:
75
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
106
+ res: Response = reveng_req(
107
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
108
+ )
76
109
 
77
110
  if res.ok:
78
- return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
111
+ return any(
112
+ binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"]
113
+ )
79
114
  else:
80
115
  logger.warning("Bad Request: %s", res.text)
81
116
 
@@ -86,37 +121,63 @@ def re_hash_check(bin_id: str) -> bool:
86
121
  # Assumes a file has been passed, correct hash only
87
122
  # Returns the BID of the binary_id (hash)
88
123
  def re_bid_search(bin_id: str) -> int:
89
- res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
124
+ res: Response = reveng_req(
125
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
126
+ )
90
127
 
91
128
  bid = -1
92
129
 
93
130
  if res.ok:
94
131
  # Filter the result who matches the SHA-256
95
- binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
132
+ binaries = list(
133
+ filter(
134
+ lambda binary: binary["sha_256_hash"] == bin_id,
135
+ res.json()["query_results"],
136
+ )
137
+ )
96
138
 
97
139
  # Check only one record is returned
98
140
  if len(binaries) == 1:
99
141
  binary = binaries[0]
100
142
  bid = binary["binary_id"]
101
143
 
102
- logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
103
- bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
144
+ logger.info(
145
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
146
+ bid,
147
+ binary["binary_name"],
148
+ binary["creation"],
149
+ binary["model_name"],
150
+ binary["status"],
151
+ )
104
152
  elif len(binaries) > 1:
105
- binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
153
+ binaries.sort(
154
+ key=lambda binary: datetime.fromisoformat(
155
+ binary["creation"]
156
+ ).timestamp(),
157
+ reverse=True,
158
+ )
106
159
 
107
160
  logger.info("%d matches found for hash: %s", len(binaries), bin_id)
108
161
 
109
162
  options_dict = {}
110
163
 
111
164
  for idx, binary in enumerate(binaries):
112
- logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
113
- idx, binary["binary_id"], binary["binary_name"], binary["creation"],
114
- binary["model_name"], binary["status"])
165
+ logger.info(
166
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
167
+ idx,
168
+ binary["binary_id"],
169
+ binary["binary_name"],
170
+ binary["creation"],
171
+ binary["model_name"],
172
+ binary["status"],
173
+ )
115
174
 
116
175
  options_dict[idx] = binary["binary_id"]
117
176
 
118
177
  try:
119
- user_input = input("[+] Please enter the option you want to use for this operation:")
178
+ user_input = input(
179
+ "[+] Please enter the option you want to use for this operation:"
180
+ )
120
181
 
121
182
  option_number = int(user_input)
122
183
 
@@ -157,16 +218,32 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
157
218
  elif res.status_code == 404:
158
219
  logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
159
220
  else:
160
- logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
221
+ logger.error(
222
+ "Error deleting binary %s under. Server returned %d.",
223
+ bin_id,
224
+ res.status_code,
225
+ )
161
226
 
162
227
  res.raise_for_status()
163
228
  return res
164
229
 
165
230
 
166
- def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
167
- platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
168
- command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
169
- duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
231
+ def RE_analyse(
232
+ fpath: str,
233
+ model_name: str = None,
234
+ isa_options: str = None,
235
+ platform_options: str = None,
236
+ file_options: str = None,
237
+ dynamic_execution: bool = False,
238
+ command_line_args: str = None,
239
+ binary_scope: str = None,
240
+ tags: list = None,
241
+ priority: int = 0,
242
+ duplicate: bool = False,
243
+ symbols: dict = None,
244
+ debug_fpath: str = None,
245
+ skip_scraping: bool = False,
246
+ ) -> Response:
170
247
  """
171
248
  Start analysis job for binary file
172
249
  :param fpath: File path for binary to analyse
@@ -182,6 +259,7 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
182
259
  :param duplicate: Duplicate an existing binary
183
260
  :param symbols: JSON object containing the base address and the list of functions
184
261
  :param debug_fpath: File path for debug file
262
+ :param skip_scraping: Disable/Enable auto-tagging of binary sample in relevant APIs
185
263
  """
186
264
  bin_id = re_binary_id(fpath)
187
265
  result = re_hash_check(bin_id)
@@ -189,13 +267,19 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
189
267
  end_point = "v1/analyse/"
190
268
 
191
269
  if result and duplicate is False:
192
- logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
193
- bin_id)
270
+ logger.error(
271
+ "Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
272
+ bin_id,
273
+ )
194
274
  raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
195
275
 
196
276
  filename = basename(fpath)
197
277
 
198
- params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
278
+ params = {
279
+ "file_name": filename,
280
+ "size_in_bytes": getsize(fpath),
281
+ "sha_256_hash": bin_id,
282
+ }
199
283
 
200
284
  if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
201
285
  try:
@@ -205,18 +289,30 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
205
289
  params["debug_hash"] = debug["sha_256_hash"]
206
290
  except HTTPError:
207
291
  pass
208
-
209
- for p_name in ("model_name", "isa_options", "platform_options", "file_options",
210
- "dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
292
+
293
+ for p_name in (
294
+ "model_name",
295
+ "isa_options",
296
+ "platform_options",
297
+ "file_options",
298
+ "dynamic_execution",
299
+ "command_line_args",
300
+ "binary_scope",
301
+ "tags",
302
+ "priority",
303
+ "symbols",
304
+ "skip_scraping",
305
+ ):
211
306
  p_value = locals()[p_name]
212
307
 
213
308
  if p_value:
214
309
  params[p_name] = p_value
215
310
 
216
311
  res: Response = reveng_req(requests.post, end_point, json_data=params)
217
-
218
312
  if res.ok:
219
- logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
313
+ logger.info(
314
+ "Successfully submitted binary for analysis. %s - %s", fpath, bin_id
315
+ )
220
316
  elif res.status_code == 400:
221
317
  if "error" in res.json().keys():
222
318
  logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
@@ -239,22 +335,32 @@ def RE_upload(fpath: str) -> Response:
239
335
  res = Response()
240
336
  res.status_code = 200
241
337
  res.url = f"{re_conf['host']}/v1/upload"
242
- res._content = ('{0}"success": true,'
243
- '"message": "File already uploaded!",'
244
- '"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
338
+ res._content = (
339
+ (
340
+ '{0}"success": true,'
341
+ '"message": "File already uploaded!",'
342
+ '"sha_256_hash": "{1}"{2}'
343
+ )
344
+ .format("{", bin_id, "}")
345
+ .encode()
346
+ )
245
347
  else:
246
348
  with open(fpath, "rb") as fd:
247
349
  res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
248
350
 
249
351
  if res.ok:
250
- logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
352
+ logger.info(
353
+ "Successfully uploaded binary to your account. %s - %s", fpath, bin_id
354
+ )
251
355
  elif res.status_code == 400:
252
356
  if "error" in res.json().keys():
253
357
  logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
254
358
  elif res.status_code == 413:
255
359
  logger.warning("File too large. Please upload files under 10MB.")
256
360
  elif res.status_code == 500:
257
- logger.error("Internal Server Error. Please contact support. Skipping upload...")
361
+ logger.error(
362
+ "Internal Server Error. Please contact support. Skipping upload..."
363
+ )
258
364
 
259
365
  res.raise_for_status()
260
366
  return res
@@ -277,8 +383,10 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
277
383
  res: Response = reveng_req(requests.get, end_point)
278
384
 
279
385
  if res.status_code == 400:
280
- logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
281
- bin_id)
386
+ logger.warning(
387
+ "Analysis for %s still in progress. Please check the logs (-l) and try again later.",
388
+ bin_id,
389
+ )
282
390
 
283
391
  res.raise_for_status()
284
392
  return res
@@ -376,7 +484,11 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
376
484
  df = DataFrame(data=embeddings)
377
485
  np_embedding = array(embedding).reshape(1, -1)
378
486
  source_embeddings = vstack(df["embedding"].values)
379
- closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
487
+ closest = (
488
+ cosine_similarity(source_embeddings, np_embedding)
489
+ .squeeze()
490
+ .argsort()[::-1][:nns]
491
+ )
380
492
  distances = cosine_similarity(source_embeddings[closest], np_embedding)
381
493
 
382
494
  # match closest embeddings with similarity
@@ -384,16 +496,25 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
384
496
 
385
497
  # create json similarity object
386
498
  similarities = list(zip(distances, closest_df.index.tolist()))
387
- json_sims = [{"similaritiy": float(d[0]),
388
- "vaddr": int(df.iloc[v]["vaddr"]),
389
- "name": str(df.iloc[v]["name"]),
390
- "size": int(df.iloc[v]["size"]),
391
- } for d, v in similarities]
499
+ json_sims = [
500
+ {
501
+ "similaritiy": float(d[0]),
502
+ "vaddr": int(df.iloc[v]["vaddr"]),
503
+ "name": str(df.iloc[v]["name"]),
504
+ "size": int(df.iloc[v]["size"]),
505
+ }
506
+ for d, v in similarities
507
+ ]
392
508
  return json_sims
393
509
 
394
510
 
395
- def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
396
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
511
+ def RE_nearest_symbols_batch(
512
+ function_ids: list[int],
513
+ nns: int = 5,
514
+ collections: list[str] = None,
515
+ distance: float = 0.1,
516
+ debug_enabled: bool = False,
517
+ ) -> Response:
397
518
  """
398
519
  Get nearest functions to a passed function ids
399
520
  :param function_ids: List of function ids
@@ -402,11 +523,12 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
402
523
  :param distance: How close we want the ANN search to filter for
403
524
  :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
404
525
  """
405
- params = {"function_id_list": function_ids,
406
- "result_per_function": nns,
407
- "debug_mode": debug_enabled,
408
- "distance": distance,
409
- }
526
+ params = {
527
+ "function_id_list": function_ids,
528
+ "result_per_function": nns,
529
+ "debug_mode": debug_enabled,
530
+ "distance": distance,
531
+ }
410
532
 
411
533
  if collections:
412
534
  # api param is collection, not collections
@@ -418,8 +540,13 @@ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections:
418
540
  return res
419
541
 
420
542
 
421
- def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
422
- distance: float = 0.1, debug_enabled: bool = False) -> Response:
543
+ def RE_nearest_functions(
544
+ fpath: str,
545
+ binary_id: int = 0,
546
+ nns: int = 5,
547
+ distance: float = 0.1,
548
+ debug_enabled: bool = False,
549
+ ) -> Response:
423
550
  """
424
551
  Get the nearest functions
425
552
  :param fpath: File path for binary to analyse
@@ -436,10 +563,11 @@ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
436
563
  if bid == -1:
437
564
  raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
438
565
 
439
- params = {"result_per_function": nns,
440
- "debug_mode": debug_enabled,
441
- "distance": distance,
442
- }
566
+ params = {
567
+ "result_per_function": nns,
568
+ "debug_mode": debug_enabled,
569
+ "distance": distance,
570
+ }
443
571
 
444
572
  res: Response = reveng_req(requests.post, end_point, json_data=params)
445
573
 
@@ -484,14 +612,47 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
484
612
  return res
485
613
 
486
614
 
615
+ def RE_binary_additonal_details(fpath: str, binary_id: int = None) -> Response:
616
+ bin_id = re_binary_id(fpath)
617
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
618
+ if bid == -1:
619
+ raise ReaitError(f"No matches found for hash: {bin_id}")
620
+
621
+ endpoint = f"v2/binaries/{bid}/additional-details"
622
+ res: Response = reveng_req(requests.get, endpoint)
623
+ res.raise_for_status()
624
+
625
+ logger.info(f"Additional Details Info({fpath}):\n")
626
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
627
+ return res
628
+
629
+
630
+ def RE_binary_details(fpath: str, binary_id: int = None) -> Response:
631
+ bin_id = re_binary_id(fpath)
632
+ bid = re_bid_search(bin_id) if binary_id is None else binary_id
633
+ if bid == -1:
634
+ raise ReaitError(f"No matches found for hash: {bin_id}")
635
+
636
+ endpoint = f"v2/binaries/{bid}/details"
637
+ res: Response = reveng_req(requests.get, endpoint)
638
+ res.raise_for_status()
639
+
640
+ logger.info(f"Details Info({fpath}):\n")
641
+ logger.info(f"\n{json.dumps(res.json(), indent=4)}")
642
+ return res
643
+
644
+
487
645
  def RE_functions_rename(function_id: int, new_name: str) -> Response:
488
646
  """
489
647
  Send the new name of a function to C2
490
648
  :param function_id: ID of a function
491
649
  :param new_name: New function name
492
650
  """
493
- res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
494
- json_data={"new_name": new_name})
651
+ res: Response = reveng_req(
652
+ requests.post,
653
+ f"v1/functions/rename/{function_id}",
654
+ json_data={"new_name": new_name},
655
+ )
495
656
 
496
657
  if res.ok:
497
658
  logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
@@ -507,17 +668,24 @@ def RE_functions_rename_batch(mapping: dict[int, str]) -> Response:
507
668
  Send a list of dictionaries, with a corresponding key as function ID and the desired function_name
508
669
  :param mapping: dictionary containing the function_id as key and function_name as value
509
670
  """
510
- params = {"new_name_mapping":
511
- [{"function_id": func_id,
512
- "function_name": func_name,
513
- } for func_id, func_name in mapping.items()]
514
- }
515
-
516
- res: Response = reveng_req(requests.post, "v1/functions/batch/rename", json_data=params)
671
+ params = {
672
+ "new_name_mapping": [
673
+ {
674
+ "function_id": func_id,
675
+ "function_name": func_name,
676
+ }
677
+ for func_id, func_name in mapping.items()
678
+ ]
679
+ }
680
+
681
+ res: Response = reveng_req(
682
+ requests.post, "v1/functions/batch/rename", json_data=params
683
+ )
517
684
 
518
685
  res.raise_for_status()
519
686
  return res
520
687
 
688
+
521
689
  def RE_settings() -> Response:
522
690
  """
523
691
  Get the configuration settings
@@ -553,6 +721,57 @@ def RE_authentication() -> Response:
553
721
  return res
554
722
 
555
723
 
724
+ def RE_functions_list(
725
+ analysis_id: int,
726
+ search_term: str = "",
727
+ min_v_address: int = 0,
728
+ max_v_address: int = 0,
729
+ ) -> Response:
730
+ """
731
+ Get the functions of a binary
732
+ :param binary_id: Binary ID
733
+ """
734
+ params = {}
735
+ if search_term:
736
+ params["search_term"] = search_term
737
+
738
+ if min_v_address != 0:
739
+ params["min_v_address"] = min_v_address
740
+
741
+ if max_v_address != 0:
742
+ params["max_v_address"] = max_v_address
743
+
744
+ res: Response = reveng_req(
745
+ requests.get, f"v2/analyses/{analysis_id}/info/functions/list", params=params
746
+ )
747
+
748
+ res.raise_for_status()
749
+
750
+ return res
751
+
752
+
753
+ def RE_function_callers_callees(function: int) -> Response:
754
+ """
755
+ Get the callers and callees of a functions
756
+ :param function: Function ID
757
+ """
758
+ res: Response = reveng_req(requests.get, f"v2/functions/{function}/callees_callers")
759
+
760
+ res.raise_for_status()
761
+ return res
762
+
763
+
764
+ def RE_analysis_info(analysis_id: int) -> Response:
765
+ """
766
+ Get the analysis information
767
+ :param analysis_id: Analysis ID
768
+ """
769
+ res: Response = reveng_req(requests.get, f"v2/analyses/{analysis_id}/info/basic")
770
+
771
+ res.raise_for_status()
772
+ return res
773
+
774
+
556
775
  def re_binary_id(fpath: str) -> str:
557
776
  """
558
777
  Take the SHA-256 hash of binary file
@@ -567,9 +786,7 @@ def re_binary_id(fpath: str) -> str:
567
786
 
568
787
  return hf.hexdigest()
569
788
  else:
570
- logger.error("File '%s' doesn't exist or isn't readable", fpath)
571
-
572
- return "Undefined"
789
+ return fpath
573
790
 
574
791
 
575
792
  def _binary_isa(binary: Binary, exec_type: str) -> str:
@@ -578,10 +795,9 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
578
795
  """
579
796
  if exec_type == "ELF":
580
797
  arch = binary.header.machine_type
581
-
582
- if arch == ELF.ARCH.i386:
798
+ if arch == ELF.ARCH.I386:
583
799
  return "x86"
584
- elif arch == ELF.ARCH.x86_64:
800
+ elif arch == ELF.ARCH.X86_64:
585
801
  return "x86_64"
586
802
  elif arch == ELF.ARCH.ARM:
587
803
  return "ARM32"
@@ -589,7 +805,6 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
589
805
  return "ARM64"
590
806
  elif exec_type == "PE":
591
807
  machine_type = binary.header.machine
592
-
593
808
  if machine_type == PE.Header.MACHINE_TYPES.I386:
594
809
  return "x86"
595
810
  elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
@@ -601,17 +816,23 @@ def _binary_isa(binary: Binary, exec_type: str) -> str:
601
816
  elif exec_type == "Mach-O":
602
817
  cpu_type = binary.header.cpu_type
603
818
 
604
- if cpu_type == MachO.CPU_TYPES.x86:
819
+ if cpu_type == MachO.Header.CPU_TYPE.X86:
605
820
  return "x86"
606
- elif cpu_type == MachO.CPU_TYPES.x86_64:
821
+ elif cpu_type == MachO.Header.CPU_TYPE.X86_64:
607
822
  return "x86_64"
608
- elif cpu_type == MachO.CPU_TYPES.ARM:
823
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM:
609
824
  return "ARM32"
610
- elif cpu_type == MachO.CPU_TYPES.ARM64:
825
+ elif cpu_type == MachO.Header.CPU_TYPE.ARM64:
611
826
  return "ARM64"
612
827
 
613
- logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
614
- raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
828
+ logger.error(
829
+ "Error, could not determine or unsupported "
830
+ f"ISA for binary format: {exec_type}."
831
+ )
832
+ raise RuntimeError(
833
+ "Error, could not determine or unsupported "
834
+ f"ISA for binary format: {exec_type}."
835
+ )
615
836
 
616
837
 
617
838
  def _binary_format(binary: Binary) -> str:
@@ -625,8 +846,12 @@ def _binary_format(binary: Binary) -> str:
625
846
  if binary.format == Binary.FORMATS.MACHO:
626
847
  return "Mach-O"
627
848
 
628
- logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
629
- raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
849
+ logger.error(
850
+ "Error, could not determine or unsupported" f" binary format: {binary.format}."
851
+ )
852
+ raise RuntimeError(
853
+ "Error, could not determine or " f"unsupported binary format: {binary.format}"
854
+ )
630
855
 
631
856
 
632
857
  def file_type(fpath: str) -> tuple[str, str]:
@@ -656,17 +881,281 @@ def parse_config() -> None:
656
881
  with open(fpath) as fd:
657
882
  config = tomli.loads(fd.read())
658
883
 
659
- for key in ("apikey", "host", "model",):
884
+ for key in (
885
+ "apikey",
886
+ "host",
887
+ "model",
888
+ ):
660
889
  if key in config:
661
890
  re_conf[key] = config[key]
662
891
  else:
663
892
  logger.info("File %s doesn't exist or isn't readable", fpath)
664
893
 
665
894
 
666
- def angular_distance(x, y) -> float:
895
+ def RE_analysis_id(fpath: str, binary_id: int = 0) -> Response:
896
+ """
897
+ Get the Analysis ID for the Binary ID
898
+ :param fpath: File path for binary to analyse
899
+ :param binary_id: ID of binary
900
+ """
901
+ bin_id = re_binary_id(fpath)
902
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
903
+
904
+ end_point = f"v2/analyses/lookup/{bid}"
905
+
906
+ if bid == -1:
907
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
908
+
909
+ res: Response = reveng_req(requests.get, end_point)
910
+
911
+ logger.info("Analysis ID for %s:\n%s", fpath, res.text)
912
+
913
+ res.raise_for_status()
914
+ return res
915
+
916
+
917
+ def RE_generate_data_types(analysis_id: int, function_ids: list[int]) -> Response:
918
+ """
919
+ Generate data types for the analysis
920
+ :param aid: Analysis ID
921
+ """
922
+ end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
923
+
924
+ res: Response = reveng_req(
925
+ requests.post, end_point, json_data={"function_ids": function_ids}
926
+ )
927
+ res.raise_for_status()
928
+ return res
929
+
930
+
931
+ def RE_list_data_types(analysis_id: int, function_ids: list[int]) -> Response:
932
+ """
933
+ List data types for the analysis
934
+ :param aid: Analysis ID
935
+ :param function_ids: List of function IDs
936
+ """
937
+ end_point = f"/v2/analyses/{analysis_id}/info/functions/data_types"
938
+
939
+ res: Response = reveng_req(
940
+ requests.get, end_point, json_data={"function_ids": function_ids}
941
+ )
942
+ res.raise_for_status()
943
+ return res
944
+
945
+
946
+ def RE_begin_ai_decompilation(function_id: int) -> Response:
667
947
  """
668
- Compute angular distance between two embedding vectors
669
- Normalised euclidian distance
948
+ Begin AI decompilation for the function
949
+ :param function_id: Function ID
670
950
  """
671
- cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
672
- return 1.0 - arccos(cos) / pi
951
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
952
+
953
+ res: Response = reveng_req(
954
+ requests.post,
955
+ end_point,
956
+ data=None,
957
+ )
958
+ res.raise_for_status()
959
+ return res
960
+
961
+
962
+ def RE_poll_ai_decompilation(function_id: int) -> Response:
963
+ """
964
+ Poll AI decompilation for the function
965
+ :param function_id: Function ID
966
+ """
967
+ end_point = f"/v2/functions/{function_id}/ai-decompilation"
968
+
969
+ res: Response = reveng_req(
970
+ requests.get,
971
+ end_point,
972
+ )
973
+ res.raise_for_status()
974
+ return res
975
+
976
+
977
+ def RE_analysis_lookup(binary_id: int) -> Response:
978
+ """
979
+ Get the Analysis ID from a Binary ID
980
+ :param binary_id: Binary ID
981
+ """
982
+ end_point = f"/v2/analyses/lookup/{binary_id}"
983
+ res: Response = reveng_req(requests.get, end_point)
984
+ res.raise_for_status()
985
+ return res
986
+
987
+
988
+ def RE_collections_search(
989
+ page: int = 1,
990
+ page_size: int = 10,
991
+ partial_collection_name: str = "",
992
+ partial_binary_name: str = "",
993
+ partial_binary_sha256: str = "",
994
+ tags: list[str] | str = "",
995
+ model_name: str = "",
996
+ ) -> Response:
997
+ """
998
+ """
999
+ end_point = "/v2/search/collections"
1000
+ res: Response = reveng_req(requests.get, end_point, params={
1001
+ "page": page,
1002
+ "page_size": page_size,
1003
+ "partial_collection_name": partial_collection_name,
1004
+ })
1005
+ res.raise_for_status()
1006
+ return res
1007
+
1008
+
1009
+ # Bin_id is referred to as hash in this program - to maintain usage BID = id
1010
+ # of a binary bin_id = hash
1011
+ # Assumes a file has been passed, correct hash only
1012
+ # Returns the BID of the binary_id (hash)
1013
+ def RE_latest_bid(bin_id: str) -> int:
1014
+ res: Response = reveng_req(
1015
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1016
+ )
1017
+
1018
+ bid = -1
1019
+
1020
+ if res.ok:
1021
+ # Filter the result who matches the SHA-256
1022
+ binaries = list(
1023
+ filter(
1024
+ lambda binary: binary["sha_256_hash"] == bin_id,
1025
+ res.json()["query_results"],
1026
+ )
1027
+ )
1028
+
1029
+ # Check only one record is returned
1030
+ if len(binaries) == 1:
1031
+ binary = binaries[0]
1032
+ bid = binary["binary_id"]
1033
+
1034
+ logger.info(
1035
+ "Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1036
+ bid,
1037
+ binary["binary_name"],
1038
+ binary["creation"],
1039
+ binary["model_name"],
1040
+ binary["status"],
1041
+ )
1042
+ elif len(binaries) > 1:
1043
+ binaries.sort(
1044
+ key=lambda binary: datetime.fromisoformat(
1045
+ binary["creation"]
1046
+ ).timestamp(),
1047
+ reverse=True,
1048
+ )
1049
+
1050
+ logger.info("%d matches found for hash: %s", len(binaries), bin_id)
1051
+
1052
+ options_dict = {}
1053
+
1054
+ for idx, binary in enumerate(binaries):
1055
+ logger.info(
1056
+ "[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
1057
+ idx,
1058
+ binary["binary_id"],
1059
+ binary["binary_name"],
1060
+ binary["creation"],
1061
+ binary["model_name"],
1062
+ binary["status"],
1063
+ )
1064
+
1065
+ options_dict[idx] = binary["binary_id"]
1066
+ try:
1067
+ bid = options_dict[0]
1068
+ except Exception:
1069
+ bid = options_dict[0]
1070
+ logger.warning("Select the most recent analysis - ID: %d", bid)
1071
+ else:
1072
+ logger.warning("No matches found for hash: %s", bin_id)
1073
+ else:
1074
+ logger.warning("Bad Request: %s", res.text)
1075
+
1076
+ res.raise_for_status()
1077
+ return bid
1078
+
1079
+
1080
+ # NOTE: newest API as per documentation still using /v1/ prefix
1081
+ def RE_models() -> Response:
1082
+ res: Response = reveng_req(requests.get, "v1/models")
1083
+
1084
+ res.raise_for_status()
1085
+ return res
1086
+
1087
+
1088
+ # NOTE: newest API as per documentation still using /v1/ prefix
1089
+ def RE_functions_dump(function_ids: list[int]) -> Response:
1090
+ res: Response = reveng_req(
1091
+ requests.post, "v1/functions/dump", json_data={"function_id_list": function_ids}
1092
+ )
1093
+
1094
+ res.raise_for_status()
1095
+ return res
1096
+
1097
+
1098
+ # NOTE: this API endpoint does not actually exist
1099
+ def RE_generate_summaries(function_id: int) -> Response:
1100
+ res: Response = reveng_req(
1101
+ requests.get, f"v1/functions/blocks_comments/{function_id}"
1102
+ )
1103
+
1104
+ res.raise_for_status()
1105
+ return res
1106
+
1107
+
1108
+ def RE_collection_search(search: str) -> Response:
1109
+ res: Response = reveng_req(
1110
+ requests.get,
1111
+ "v1/collections/quick/search",
1112
+ params={"search_term": search if search else ""},
1113
+ )
1114
+
1115
+ res.raise_for_status()
1116
+ return res
1117
+
1118
+
1119
+ def RE_recent_analysis(
1120
+ status: str = "All", scope: str = "ALL", nb_analysis: int = 50
1121
+ ) -> Response:
1122
+ res: Response = reveng_req(
1123
+ requests.get,
1124
+ "v1/analyse/recent",
1125
+ json_data={"status": status, "scope": scope, "n": nb_analysis},
1126
+ )
1127
+
1128
+ res.raise_for_status()
1129
+ return res
1130
+
1131
+
1132
+ def RE_search(fpath: str) -> Response:
1133
+ bin_id = re_binary_id(fpath)
1134
+
1135
+ res: Response = reveng_req(
1136
+ requests.get, "v1/search", json_data={"sha_256_hash": bin_id}
1137
+ )
1138
+
1139
+ res.raise_for_status()
1140
+ return res
1141
+
1142
+
1143
+ # NOTE: this uses a newer API version
1144
+ def RE_similar_functions(
1145
+ function_id: int,
1146
+ limit: int = 20,
1147
+ distance: int | float = 0.09999999999999998,
1148
+ debug: bool = False,
1149
+ ):
1150
+ params = {
1151
+ "distance": distance,
1152
+ "limit": limit,
1153
+ "debug": debug,
1154
+ }
1155
+
1156
+ res: Response = reveng_req(
1157
+ requests.get, f"v2/functions/{function_id}/similar-functions", params=params
1158
+ )
1159
+
1160
+ res.raise_for_status()
1161
+ return res