reait 0.0.20__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,33 +1,48 @@
1
- #!/usr/bin/env python
2
1
  # -*- coding: utf-8 -*-
3
2
  from __future__ import print_function, annotations
4
3
 
4
+ import json
5
+ import tomli
6
+ import logging
7
+ import requests
8
+
5
9
  from hashlib import sha256
10
+ from datetime import datetime
6
11
 
7
12
  from sklearn.metrics.pairwise import cosine_similarity
8
- from os.path import basename, exists, expanduser
13
+ from os import access, R_OK
14
+ from os.path import basename, isfile, expanduser, getsize
9
15
  from requests import request, Response, HTTPError
10
- import requests
11
16
  from numpy import array, vstack, dot, arccos, pi
12
17
  from pandas import DataFrame
13
- import json
14
- import tomli
15
- import logging
16
- from lief import parse, ELF, PE, MachO
18
+ from lief import parse, Binary, ELF, PE, MachO
17
19
 
20
+ __version__ = "1.0.0"
18
21
 
19
22
  re_conf = {
20
23
  "apikey": "l1br3",
21
24
  "host": "https://api.reveng.ai",
22
- "model": "binnet-0.2-x86"
25
+ "model": "binnet-0.3-x86",
23
26
  }
24
27
 
25
28
 
26
29
  logger = logging.getLogger("REAIT")
27
30
 
28
31
 
29
- def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, params=None,
30
- json_data: dict = None, timeout: int = 30) -> Response:
32
+ class ReaitError(HTTPError):
33
+ def __init__(self, reason: str, end_point: str = None):
34
+ response = Response()
35
+
36
+ response.reason = reason
37
+ response.status_code = 404
38
+ response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
39
+ response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
40
+
41
+ super().__init__(reason, response=response)
42
+
43
+
44
+ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
45
+ params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
31
46
  """
32
47
  Constructs and sends a Request
33
48
  :param r: Method for the new Request
@@ -37,98 +52,85 @@ def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, p
37
52
  :param params: Dictionary, list of tuples or bytes to send in the query string for the query string
38
53
  :param json_data: A JSON serializable Python object to send in the body
39
54
  :param timeout: Number of seconds to stop waiting for a Response
55
+ :param files: Dictionary of files to send to the specified URL
40
56
  """
41
- url = f"{re_conf['host']}/{end_point}"
57
+ url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
42
58
  headers = {"Authorization": re_conf["apikey"]}
43
59
 
44
60
  if ex_headers:
45
61
  headers.update(ex_headers)
46
62
 
47
- logger.debug("Making request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s",
48
- url, headers, data, json_data, params)
63
+ logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
64
+ r.__name__.upper(), url, headers, data, json_data, params, files)
49
65
 
50
- response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
66
+ response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
51
67
 
52
- logger.debug("Making response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
53
- url, response.headers, response.status_code, response.text)
68
+ logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
69
+ r.__name__.upper(), url, response.headers, response.status_code, response.text)
54
70
 
55
71
  return response
56
72
 
57
73
 
58
74
  def re_hash_check(bin_id: str) -> bool:
59
- status = False
60
- res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
75
+ res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
61
76
 
62
- if res.status_code == 200:
63
- binaries_data = res.json()["binaries"]
64
- status = len(binaries_data) > 0
65
- elif res.status_code == 400:
66
- logger.warning("Bad Request: %s", res.text)
77
+ if res.ok:
78
+ return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
67
79
  else:
68
- logger.error("Internal Server Error.")
80
+ logger.warning("Bad Request: %s", res.text)
69
81
 
70
- res.raise_for_status()
71
- return status
82
+ return False
72
83
 
73
84
 
74
85
  # Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
75
86
  # Assumes a file has been passed, correct hash only
76
87
  # Returns the BID of the binary_id (hash)
77
88
  def re_bid_search(bin_id: str) -> int:
78
- res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All")
89
+ res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
79
90
 
80
91
  bid = -1
81
92
 
82
- # Valid request
83
- if res.status_code == 200:
93
+ if res.ok:
94
+ # Filter the result who matches the SHA-256
95
+ binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
96
+
84
97
  # Check only one record is returned
85
- binaries_data = res.json()["binaries"]
98
+ if len(binaries) == 1:
99
+ binary = binaries[0]
100
+ bid = binary["binary_id"]
101
+
102
+ logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
103
+ bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
104
+ elif len(binaries) > 1:
105
+ binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
86
106
 
87
- if len(binaries_data) > 1:
88
- logger.info("%d matches found for hash: %s.", len(binaries_data), bin_id)
107
+ logger.info("%d matches found for hash: %s", len(binaries), bin_id)
89
108
 
90
- if len(binaries_data) > 1:
91
- options_dict = {}
109
+ options_dict = {}
92
110
 
93
- for idx, binary in enumerate(binaries_data):
94
- logger.info("[%d] - ID: {}, Name: %s, Creation: %s, Model: %s, Owner: %s, Status: %s",
95
- idx, binary["binary_id"], binary["binary_name"], binary["creation"],
96
- binary["model_name"], binary["owner"], binary["status"])
111
+ for idx, binary in enumerate(binaries):
112
+ logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
113
+ idx, binary["binary_id"], binary["binary_name"], binary["creation"],
114
+ binary["model_name"], binary["status"])
97
115
 
98
- options_dict[idx] = binary["binary_id"]
116
+ options_dict[idx] = binary["binary_id"]
99
117
 
118
+ try:
100
119
  user_input = input("[+] Please enter the option you want to use for this operation:")
101
120
 
102
- try:
103
- option_number = int(user_input)
121
+ option_number = int(user_input)
104
122
 
105
- bid = options_dict.get(option_number, -1)
123
+ bid = options_dict.get(option_number, -1)
106
124
 
107
- if bid == -1:
108
- logger.warning("Invalid option.")
109
- except Exception:
110
- bid = -1
125
+ if bid == -1:
111
126
  logger.warning("Invalid option.")
112
- # Only 1 match found
113
- elif len(binaries_data) == 1:
114
- binary = binaries_data[0]
115
- bid = binary["binary_id"]
116
- else:
117
- logger.warning("No matches found for hash: %s.", bin_id)
118
- elif len(binaries_data) == 1:
119
- binary = binaries_data[0]
120
- bid = binary["binary_id"]
121
-
122
- logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
123
- "Creation: %s, Model: %s, Owner: %s, Status: %s",
124
- bid, binary["binary_name"], binary["creation"],
125
- binary["model_name"], binary["owner"], binary["status"])
127
+ except Exception:
128
+ bid = options_dict[0]
129
+ logger.warning("Select the most recent analysis - ID: %d", bid)
126
130
  else:
127
- logger.warning("No matches found for hash: %s.", bin_id)
128
- elif res.status_code == 400:
129
- logger.warning("Bad Request: %s", res.text)
131
+ logger.warning("No matches found for hash: %s", bin_id)
130
132
  else:
131
- logger.error("Internal Server Error.")
133
+ logger.warning("Bad Request: %s", res.text)
132
134
 
133
135
  res.raise_for_status()
134
136
  return bid
@@ -143,15 +145,17 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
143
145
  bin_id = re_binary_id(fpath)
144
146
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
145
147
 
148
+ end_point = f"v1/analyse/{bid}"
149
+
146
150
  if bid == -1:
147
- raise HTTPError(f"No matches found for hash: {bin_id}")
151
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
148
152
 
149
- res = reveng_req(requests.delete, f"analyse/{bid}")
153
+ res: Response = reveng_req(requests.delete, end_point)
150
154
 
151
- if res.status_code == 200:
152
- logger.info("Securely deleted %s analysis.", bin_id)
155
+ if res.ok:
156
+ logger.info("Securely deleted analysis ID %s - %s.", bid, bin_id)
153
157
  elif res.status_code == 404:
154
- logger.warning("Error analysis not found for %s.", bin_id)
158
+ logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
155
159
  else:
156
160
  logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
157
161
 
@@ -159,10 +163,10 @@ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
159
163
  return res
160
164
 
161
165
 
162
- def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, platform_options: str = None,
163
- file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None,
164
- scope: str = None, tags: list = None, priority: int = 0,
165
- duplicate: bool = False, symbols: dict = None) -> Response:
166
+ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
167
+ platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
168
+ command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
169
+ duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
166
170
  """
167
171
  Start analysis job for binary file
168
172
  :param fpath: File path for binary to analyse
@@ -172,46 +176,56 @@ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, plat
172
176
  :param platform_options: OS platform
173
177
  :param dynamic_execution: Enable dynamic execution in sandbox during analysis
174
178
  :param command_line_args: Command line arguments to pass when running binary sample in the sandbox
175
- :param scope: Analysis visibility
179
+ :param binary_scope: Analysis visibility
176
180
  :param tags: Assign tags to an analysis
177
181
  :param priority: Priority to processing queue
178
182
  :param duplicate: Duplicate an existing binary
179
- :param symbols: List of functions
183
+ :param symbols: JSON object containing the base address and the list of functions
184
+ :param debug_fpath: File path for debug file
180
185
  """
181
186
  bin_id = re_binary_id(fpath)
182
187
  result = re_hash_check(bin_id)
183
188
 
189
+ end_point = "v1/analyse/"
190
+
184
191
  if result and duplicate is False:
185
192
  logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
186
193
  bin_id)
187
- raise HTTPError(f"Duplicate analysis for hash: {bin_id}")
194
+ raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
188
195
 
189
196
  filename = basename(fpath)
190
197
 
191
- params = {"file_name": filename, "sha_256_hash": bin_id}
198
+ params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
199
+
200
+ if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
201
+ try:
202
+ debug = RE_upload(debug_fpath).json()
192
203
 
204
+ if debug["success"]:
205
+ params["debug_hash"] = debug["sha_256_hash"]
206
+ except HTTPError:
207
+ pass
208
+
193
209
  for p_name in ("model_name", "isa_options", "platform_options", "file_options",
194
- "dynamic_execution", "command_line_args", "scope", "tags", "priority", "symbols"):
210
+ "dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
195
211
  p_value = locals()[p_name]
196
212
 
197
213
  if p_value:
198
214
  params[p_name] = p_value
199
215
 
200
- res = reveng_req(requests.post, f"analyse", json_data=params)
216
+ res: Response = reveng_req(requests.post, end_point, json_data=params)
201
217
 
202
- if res.status_code == 200:
203
- logger.info("Successfully submitted binary for analysis. %s - %s", fpath, re_binary_id(fpath))
218
+ if res.ok:
219
+ logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
204
220
  elif res.status_code == 400:
205
- response = res.json()
206
-
207
- if "error" in response.keys():
208
- logger.warning("Error analysing %s - %s", fpath, response["error"])
221
+ if "error" in res.json().keys():
222
+ logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
209
223
 
210
224
  res.raise_for_status()
211
225
  return res
212
226
 
213
227
 
214
- def RE_upload(fpath: str) -> Response | bool:
228
+ def RE_upload(fpath: str) -> Response:
215
229
  """
216
230
  Upload binary to Server
217
231
  :param fpath: File path for binary to analyse
@@ -220,22 +234,27 @@ def RE_upload(fpath: str) -> Response | bool:
220
234
  result = re_hash_check(bin_id)
221
235
 
222
236
  if result:
223
- logger.info("File %s - %s already exists. Skipping upload...", basename(fpath), re_binary_id(fpath))
224
- return True
225
-
226
- res = reveng_req(requests.post, f"upload", data=open(fpath, "rb").read())
227
-
228
- if res.status_code == 200:
229
- logger.info("Successfully uploaded binary to your account. %s - %s", fpath, re_binary_id(fpath))
230
- elif res.status_code == 400:
231
- response = res.json()
232
-
233
- if "error" in response.keys():
234
- logger.warning("Error uploading %s - %s", fpath, response["error"])
235
- elif res.status_code == 413:
236
- logger.warning("File too large. Please upload files under 100MB.")
237
- elif res.status_code == 500:
238
- logger.error("Internal Server Error. Please contact support. Skipping upload...")
237
+ logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
238
+
239
+ res = Response()
240
+ res.status_code = 200
241
+ res.url = f"{re_conf['host']}/v1/upload"
242
+ res._content = ('{0}"success": true,'
243
+ '"message": "File already uploaded!",'
244
+ '"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
245
+ else:
246
+ with open(fpath, "rb") as fd:
247
+ res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
248
+
249
+ if res.ok:
250
+ logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
251
+ elif res.status_code == 400:
252
+ if "error" in res.json().keys():
253
+ logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
254
+ elif res.status_code == 413:
255
+ logger.warning("File too large. Please upload files under 10MB.")
256
+ elif res.status_code == 500:
257
+ logger.error("Internal Server Error. Please contact support. Skipping upload...")
239
258
 
240
259
  res.raise_for_status()
241
260
  return res
@@ -250,65 +269,14 @@ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
250
269
  bin_id = re_binary_id(fpath)
251
270
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
252
271
 
253
- if bid == -1:
254
- raise HTTPError(f"No matches found for hash: {bin_id}")
255
-
256
- res = reveng_req(requests.get, f"embeddings/{bid}")
257
-
258
- if res.status_code == 400:
259
- logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
260
- bin_id)
261
-
262
- res.raise_for_status()
263
- return res
264
-
265
-
266
- def RE_signature(fpath: str, binary_id: int = 0) -> Response:
267
- """
268
- Fetch binary BinNet signature
269
- :param fpath: File path for binary to analyse
270
- :param binary_id: ID of binary
271
- """
272
- bin_id = re_binary_id(fpath)
273
- bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
272
+ end_point = f"v1/embeddings/binary/{bid}"
274
273
 
275
274
  if bid == -1:
276
- raise HTTPError(f"No matches found for hash: {bin_id}")
277
-
278
- res = reveng_req(requests.get, f"signature/{bid}")
279
-
280
- if res.status_code == 425:
281
- logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
282
- bin_id)
283
-
284
- res.raise_for_status()
285
- return res
286
-
287
-
288
- def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
289
- model: str = None) -> Response:
290
- """
291
- Fetch embedding for custom symbol range
292
- :param fpath: File path for binary to analyse
293
- :param start_vaddr: Start virtual address of the function to extract embeddings
294
- :param end_vaddr: End virtual address of the function to extract embeddings
295
- :param base_vaddr: Base address of the binary
296
- :param model: Binary model name
297
- """
298
- params = {}
275
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
299
276
 
300
- if end_vaddr:
301
- params["end_vaddr"] = end_vaddr
302
- if base_vaddr:
303
- params["base_vaddr"] = base_vaddr
304
- if model:
305
- params["models"] = model
277
+ res: Response = reveng_req(requests.get, end_point)
306
278
 
307
- bin_id = re_binary_id(fpath)
308
-
309
- res = reveng_req(requests.get, f"embedding/{bin_id}/{start_vaddr}", params=params)
310
-
311
- if res.status_code == 425:
279
+ if res.status_code == 400:
312
280
  logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
313
281
  bin_id)
314
282
 
@@ -326,13 +294,15 @@ def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
326
294
  bin_id = re_binary_id(fpath)
327
295
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
328
296
 
297
+ end_point = f"v1/logs/{bid}"
298
+
329
299
  if bid == -1:
330
- raise HTTPError(f"No matches found for hash: {bin_id}")
300
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
331
301
 
332
- res = reveng_req(requests.get, f"logs/{bid}")
302
+ res: Response = reveng_req(requests.get, end_point)
333
303
 
334
- if res.status_code == 200 and console:
335
- logger.info("Logs found for %s:\n%s", bin_id, res.text)
304
+ if res.ok and console:
305
+ logger.info("Logs found for %s:\n%s", bin_id, res.json()["logs"])
336
306
  elif res.status_code == 404:
337
307
  logger.warning("Error, logs not found for %s.", bin_id)
338
308
 
@@ -349,12 +319,14 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
349
319
  bin_id = re_binary_id(fpath)
350
320
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
351
321
 
322
+ end_point = f"cves/{bid}"
323
+
352
324
  if bid == -1:
353
- raise HTTPError(f"No matches found for hash: {bin_id}")
325
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
354
326
 
355
- res = reveng_req(requests.get, f"cves/{bid}")
327
+ res: Response = reveng_req(requests.get, end_point)
356
328
 
357
- if res.status_code == 200:
329
+ if res.ok:
358
330
  cves = json.loads(res.text)
359
331
  logger.info("Checking for known CVEs embedded inside %s", fpath)
360
332
 
@@ -369,7 +341,7 @@ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
369
341
  return res
370
342
 
371
343
 
372
- def RE_status(fpath: str, binary_id: int = 0) -> Response:
344
+ def RE_status(fpath: str, binary_id: int = 0, console: bool = False) -> Response:
373
345
  """
374
346
  Get the status of an ongoing binary analysis
375
347
  :param fpath: File path for binary to analyse
@@ -378,11 +350,15 @@ def RE_status(fpath: str, binary_id: int = 0) -> Response:
378
350
  bin_id = re_binary_id(fpath)
379
351
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
380
352
 
353
+ end_point = f"v1/analyse/status/{bid}"
354
+
381
355
  if bid == -1:
382
- raise HTTPError(f"No matches found for hash: {bin_id}")
356
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
383
357
 
384
- res = reveng_req(requests.get, f"analyse/status/{bid}")
358
+ res: Response = reveng_req(requests.get, end_point)
385
359
 
360
+ if res.ok and console:
361
+ logger.info("Binary analysis status: %s", res.json()["status"])
386
362
  if res.status_code == 400:
387
363
  logger.warning(" Error, status not found for %s.", bin_id)
388
364
 
@@ -408,62 +384,77 @@ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list
408
384
 
409
385
  # create json similarity object
410
386
  similarities = list(zip(distances, closest_df.index.tolist()))
411
- json_sims = [{"similaritiy": float(d[0]), "vaddr": int(df.iloc[v]["vaddr"]), "name": str(df.iloc[v]["name"]),
412
- "size": int(df.iloc[v]["size"])} for d, v in similarities]
387
+ json_sims = [{"similaritiy": float(d[0]),
388
+ "vaddr": int(df.iloc[v]["vaddr"]),
389
+ "name": str(df.iloc[v]["name"]),
390
+ "size": int(df.iloc[v]["size"])
391
+ } for d, v in similarities]
413
392
  return json_sims
414
393
 
415
394
 
416
- def RE_nearest_symbols(embedding: list, model_name: str, nns: int = 5,
417
- collections: list = None, ignore_hashes: list = None,
418
- distance: float = 0.0, debug_enabled: bool = False) -> Response:
395
+ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
396
+ distance: float = 0.1, debug_enabled: bool = False) -> Response:
419
397
  """
420
- Get function name suggestions for an embedding
421
- :param embedding: Embedding vector as python list
422
- :param model_name: Binary model name
398
+ Get nearest functions to a passed function ids
399
+ :param function_ids: List of function ids
423
400
  :param nns: Number of nearest neighbors
424
401
  :param collections: List of collections RevEng.AI collection names to search through
425
- :param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
426
402
  :param distance: How close we want the ANN search to filter for
427
403
  :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
428
404
  """
429
- params = {"nns": nns, "model_name": model_name, "debug_enabled": debug_enabled}
405
+ params = {"function_id_list": function_ids,
406
+ "result_per_function": nns,
407
+ "debug_mode": debug_enabled,
408
+ "distance": distance,}
430
409
 
431
- if collections and len(collections) > 0:
410
+ if collections:
432
411
  # api param is collection, not collections
433
- params["collection"] = "|".join(collections)
434
-
435
- if ignore_hashes and len(ignore_hashes) > 0:
436
- params["ignore_hashes"] = ignore_hashes
412
+ params["collection"] = collections
437
413
 
438
- if distance > 0.0:
439
- params["distance"] = distance
440
-
441
- res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
414
+ res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
442
415
 
443
416
  res.raise_for_status()
444
417
  return res
445
418
 
446
419
 
447
- def RE_nearest_binaries(embedding: list, model_name: str, nns: int = 5,
448
- collections: list = None, ignore_hashes: list = None) -> Response:
420
+ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
421
+ distance: float = 0.1, debug_enabled: bool = False) -> Response:
449
422
  """
450
- Get executable suggestions for a binary embedding
451
- :param embedding: Embedding vector as python list
452
- :param model_name: Binary model name
423
+ Get the nearest functions
424
+ :param fpath: File path for binary to analyse
425
+ :param binary_id: ID of binary
453
426
  :param nns: Number of nearest neighbors
454
- :param collections: List of collections RevEng.AI collection names to search through
455
- :param ignore_hashes: List[str] SHA-256 hash of binary files to ignore symbols from (usually the current binary)
427
+ :param distance: How close we want the ANN search to filter for
428
+ :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
456
429
  """
457
- params = {"nns": nns, "model_name": model_name}
430
+ bin_id = re_binary_id(fpath)
431
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
458
432
 
459
- if collections and len(collections) > 0:
460
- # api param is collection, not collections
461
- params["collection"] = "|".join(collections)
433
+ end_point = f"v1/ann/symbol/{bid}"
434
+
435
+ if bid == -1:
436
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
437
+
438
+ params = {"result_per_function": nns,
439
+ "debug_mode": debug_enabled,
440
+ "distance": distance, }
441
+
442
+ res: Response = reveng_req(requests.post, end_point, json_data=params)
443
+
444
+ res.raise_for_status()
445
+ return res
446
+
447
+
448
+ def RE_analyze_functions(fpath: str, binary_id: int = 0) -> Response:
449
+ bin_id = re_binary_id(fpath)
450
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
451
+
452
+ end_point = f"v1/analyse/functions/{bid}"
462
453
 
463
- if ignore_hashes and len(ignore_hashes) > 0:
464
- params["ignore_hashes"] = ignore_hashes
454
+ if bid == -1:
455
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
465
456
 
466
- res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
457
+ res: Response = reveng_req(requests.get, end_point)
467
458
 
468
459
  res.raise_for_status()
469
460
  return res
@@ -478,10 +469,12 @@ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
478
469
  bin_id = re_binary_id(fpath)
479
470
  bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
480
471
 
472
+ end_point = f"sboms/{bid}"
473
+
481
474
  if bid == -1:
482
- raise HTTPError(f"No matches found for hash: {bin_id}")
475
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
483
476
 
484
- res = reveng_req(requests.get, f"sboms/{bid}")
477
+ res: Response = reveng_req(requests.get, end_point)
485
478
 
486
479
  logger.info("SBOM for %s:\n%s", fpath, res.text)
487
480
 
@@ -495,9 +488,10 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
495
488
  :param function_id: ID of a function
496
489
  :param new_name: New function name
497
490
  """
498
- res = reveng_req(requests.post, f"functions/rename/{function_id}", json_data={"new_name": new_name})
491
+ res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
492
+ json_data={"new_name": new_name})
499
493
 
500
- if res.status_code == 200:
494
+ if res.ok:
501
495
  logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
502
496
  else:
503
497
  logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
@@ -506,66 +500,115 @@ def RE_functions_rename(function_id: int, new_name: str) -> Response:
506
500
  return res
507
501
 
508
502
 
503
+ def RE_settings() -> Response:
504
+ """
505
+ Get the configuration settings
506
+ """
507
+ res: Response = reveng_req(requests.get, "v1/config")
508
+
509
+ res.raise_for_status()
510
+ return res
511
+
512
+
513
+ def RE_health() -> bool:
514
+ """
515
+ Health check & verify access to the API
516
+ """
517
+ res: Response = reveng_req(requests.get, "v1")
518
+
519
+ success = res.json()["success"]
520
+
521
+ if success:
522
+ logger.info(res.json()["message"])
523
+ else:
524
+ logger.warning(res.json()["error"])
525
+ return success
526
+
527
+
528
+ def RE_authentication() -> Response:
529
+ """
530
+ Authentication Check
531
+ """
532
+ res: Response = reveng_req(requests.get, "v1/authenticate")
533
+
534
+ res.raise_for_status()
535
+ return res
536
+
537
+
509
538
  def re_binary_id(fpath: str) -> str:
510
539
  """
511
540
  Take the SHA-256 hash of binary file
512
541
  :param fpath: File path for binary to analyse
513
542
  """
514
- if not fpath or not exists(fpath):
515
- return "undefined"
543
+ if fpath and isfile(fpath) and access(fpath, R_OK):
544
+ hf = sha256()
516
545
 
517
- hf = sha256()
546
+ with open(fpath, "rb") as fd:
547
+ c = fd.read()
548
+ hf.update(c)
518
549
 
519
- with open(fpath, "rb") as f:
520
- c = f.read()
521
- hf.update(c)
550
+ return hf.hexdigest()
551
+ else:
552
+ logger.error("File '%s' doesn't exist or isn't readable", fpath)
522
553
 
523
- return hf.hexdigest()
554
+ return "undefined"
524
555
 
525
556
 
526
- def _binary_isa(lief_hdlr, exec_type: str) -> str:
557
+ def _binary_isa(binary: Binary, exec_type: str) -> str:
527
558
  """
528
559
  Get ISA format
529
560
  """
530
- if exec_type == "elf":
531
- machine_type = lief_hdlr.header.machine_type
561
+ if exec_type == "ELF":
562
+ arch = binary.header.machine_type
532
563
 
533
- if machine_type == ELF.ARCH.i386:
564
+ if arch == ELF.ARCH.i386:
534
565
  return "x86"
535
- elif machine_type == ELF.ARCH.x86_64:
566
+ elif arch == ELF.ARCH.x86_64:
536
567
  return "x86_64"
537
- elif exec_type == "pe":
538
- machine_type = lief_hdlr.header.machine
539
-
540
- if machine_type == PE.MACHINE_TYPES.I386:
568
+ elif arch == ELF.ARCH.ARM:
569
+ return "ARM32"
570
+ elif arch == ELF.ARCH.AARCH64:
571
+ return "ARM64"
572
+ elif exec_type == "PE":
573
+ machine_type = binary.header.machine
574
+
575
+ if machine_type == PE.Header.MACHINE_TYPES.I386:
541
576
  return "x86"
542
- elif machine_type == PE.MACHINE_TYPES.AMD64:
577
+ elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
543
578
  return "x86_64"
544
- elif exec_type == "macho":
545
- machine_type = lief_hdlr.header.cpu_type
546
-
547
- if machine_type == MachO.CPU_TYPES.x86:
579
+ elif machine_type == PE.Header.MACHINE_TYPES.ARM:
580
+ return "ARM32"
581
+ elif machine_type == PE.Header.MACHINE_TYPES.ARM64:
582
+ return "ARM64"
583
+ elif exec_type == "Mach-O":
584
+ cpu_type = binary.header.cpu_type
585
+
586
+ if cpu_type == MachO.CPU_TYPES.x86:
548
587
  return "x86"
549
- elif machine_type == MachO.CPU_TYPES.x86_64:
588
+ elif cpu_type == MachO.CPU_TYPES.x86_64:
550
589
  return "x86_64"
590
+ elif cpu_type == MachO.CPU_TYPES.ARM:
591
+ return "ARM32"
592
+ elif cpu_type == MachO.CPU_TYPES.ARM64:
593
+ return "ARM64"
551
594
 
552
- logger.error("Error, failed to determine or unsupported ISA for exec_type: %s.", exec_type)
553
- raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}.")
595
+ logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
596
+ raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
554
597
 
555
598
 
556
- def _binary_format(lief_hdlr) -> str:
599
+ def _binary_format(binary: Binary) -> str:
557
600
  """
558
601
  Get executable file format
559
602
  """
560
- if lief_hdlr.format == lief_hdlr.format.PE:
561
- return "pe"
562
- if lief_hdlr.format == lief_hdlr.format.ELF:
563
- return "elf"
564
- if lief_hdlr.format == lief_hdlr.format.MACHO:
565
- return "macho"
603
+ if binary.format == Binary.FORMATS.PE:
604
+ return "PE"
605
+ if binary.format == Binary.FORMATS.ELF:
606
+ return "ELF"
607
+ if binary.format == Binary.FORMATS.MACHO:
608
+ return "Mach-O"
566
609
 
567
- logger.error("Error, could not determine binary format: %s.", lief_hdlr.format)
568
- raise RuntimeError("Error, could not determine binary format.")
610
+ logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
611
+ raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
569
612
 
570
613
 
571
614
  def file_type(fpath: str) -> tuple[str, str]:
@@ -575,24 +618,31 @@ def file_type(fpath: str) -> tuple[str, str]:
575
618
  """
576
619
  binary = parse(fpath)
577
620
 
578
- # handle PE and ELF files
579
- file_format = _binary_format(binary)
580
- isa = _binary_isa(binary, file_format)
621
+ if not binary:
622
+ file_format = isa_format = "Unknown format"
623
+ else:
624
+ # handle PE and ELF files
625
+ file_format = _binary_format(binary)
626
+ isa_format = _binary_isa(binary, file_format)
581
627
 
582
- return file_format, isa
628
+ return file_format, isa_format
583
629
 
584
630
 
585
631
  def parse_config() -> None:
586
632
  """
587
633
  Parse ~/.reait.toml config file
588
634
  """
589
- if exists(expanduser("~/.reait.toml")):
590
- with open(expanduser("~/.reait.toml"), "r") as file:
591
- config = tomli.loads(file.read())
635
+ fpath = expanduser("~/.reait.toml")
636
+
637
+ if isfile(fpath) and access(fpath, R_OK):
638
+ with open(fpath) as fd:
639
+ config = tomli.loads(fd.read())
592
640
 
593
- for key in ("apikey", "host", "model"):
641
+ for key in ("apikey", "host", "model",):
594
642
  if key in config:
595
643
  re_conf[key] = config[key]
644
+ else:
645
+ logger.info("File %s doesn't exist or isn't readable", fpath)
596
646
 
597
647
 
598
648
  def angular_distance(x, y) -> float:
@@ -600,5 +650,5 @@ def angular_distance(x, y) -> float:
600
650
  Compute angular distance between two embedding vectors
601
651
  Normalised euclidian distance
602
652
  """
603
- cos = dot(x, y) / ((dot(x, x) * dot(y, y)) ** 0.5)
653
+ cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
604
654
  return 1.0 - arccos(cos) / pi