reait 0.0.19__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,349 +1,654 @@
1
- #!/usr/bin/env python
2
- from __future__ import print_function
1
+ # -*- coding: utf-8 -*-
2
+ from __future__ import print_function, annotations
3
+
4
+ import json
5
+ import tomli
6
+ import logging
7
+ import requests
8
+
3
9
  from hashlib import sha256
4
- from rich import print_json, print as rich_print
10
+ from datetime import datetime
11
+
5
12
  from sklearn.metrics.pairwise import cosine_similarity
6
- import os
7
- import re
8
- import argparse
9
- import requests
10
- from numpy import array, vstack, mean, average, dot, arccos, pi
13
+ from os import access, R_OK
14
+ from os.path import basename, isfile, expanduser, getsize
15
+ from requests import request, Response, HTTPError
16
+ from numpy import array, vstack, dot, arccos, pi
11
17
  from pandas import DataFrame
12
- import json
13
- import tomli
14
- from os.path import isfile
15
- from sys import exit
16
- from IPython import embed
17
- import lief
18
+ from lief import parse, Binary, ELF, PE, MachO
18
19
 
19
- __version__ = "0.0.19"
20
+ __version__ = "1.0.0"
20
21
 
21
22
  re_conf = {
22
- 'apikey' : 'l1br3',
23
- 'host' : 'https://api.reveng.ai',
24
- 'model': 'binnet-0.1'
23
+ "apikey": "l1br3",
24
+ "host": "https://api.reveng.ai",
25
+ "model": "binnet-0.3-x86",
25
26
  }
26
27
 
27
- def reveng_req(r: requests.request, end_point: str, data=None, ex_headers: dict = None, params=None):
28
- url = f"{re_conf['host']}/{end_point}"
29
- headers = { "Authorization": f"{re_conf['apikey']}" }
30
- if ex_headers:
31
- headers.update(ex_headers)
32
- return r(url, headers=headers, data=data, params=params)
33
28
 
29
+ logger = logging.getLogger("REAIT")
30
+
31
+
32
+ class ReaitError(HTTPError):
33
+ def __init__(self, reason: str, end_point: str = None):
34
+ response = Response()
35
+
36
+ response.reason = reason
37
+ response.status_code = 404
38
+ response._content = b'{"success": false, "error": "' + reason.encode() + b'"}'
39
+ response.url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}" if end_point else None
40
+
41
+ super().__init__(reason, response=response)
34
42
 
35
- def RE_delete(fpath: str, model_name: str):
43
+
44
+ def reveng_req(r: request, end_point: str, data: dict = None, ex_headers: dict = None,
45
+ params: dict = None, json_data: dict = None, timeout: int = 60, files: dict = None) -> Response:
36
46
  """
37
- Delete analysis results for Binary ID in command
47
+ Constructs and sends a Request
48
+ :param r: Method for the new Request
49
+ :param end_point: Endpoint to add to the base URL
50
+ :param ex_headers: Extended HTTP headers to add
51
+ :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
52
+ :param params: Dictionary, list of tuples or bytes to send in the query string for the query string
53
+ :param json_data: A JSON serializable Python object to send in the body
54
+ :param timeout: Number of seconds to stop waiting for a Response
55
+ :param files: Dictionary of files to send to the specified URL
38
56
  """
39
- bin_id = binary_id(fpath)
40
- params = { 'model_name': model_name }
41
- res = reveng_req(requests.delete, f"/analyse/{bin_id}", params=params)
42
- if res.status_code == 200:
43
- print(f"[+] Success. Securely deleted {fpath} analysis")
44
- elif res.status_code == 404:
45
- print(f"[!] Error, analysis not found for {bin_id} under {model_name}.")
57
+ url = f"{re_conf['host']}/{end_point if end_point[0] != '/' else end_point[1:]}"
58
+ headers = {"Authorization": re_conf["apikey"]}
59
+
60
+ if ex_headers:
61
+ headers.update(ex_headers)
62
+
63
+ logger.debug("Making %s request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s\n - files: %s",
64
+ r.__name__.upper(), url, headers, data, json_data, params, files)
65
+
66
+ response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout, files=files)
67
+
68
+ logger.debug("Making %s response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
69
+ r.__name__.upper(), url, response.headers, response.status_code, response.text)
70
+
71
+ return response
72
+
73
+
74
+ def re_hash_check(bin_id: str) -> bool:
75
+ res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
76
+
77
+ if res.ok:
78
+ return any(binary["sha_256_hash"] == bin_id for binary in res.json()["query_results"])
46
79
  else:
47
- print(f"[!] Error deleteing binary {bin_id} under {model_name}. Server returned {res.status_code}.")
48
- return
80
+ logger.warning("Bad Request: %s", res.text)
49
81
 
82
+ return False
50
83
 
51
- def RE_analyse(fpath: str, model: str = None, isa_options: str = None, platform_options: str = None, file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None, scope: str = None, tags: str = None):
52
- """
53
- Start analysis job for binary file
54
- """
55
- filename = os.path.basename(fpath)
56
- params={ 'file_name': filename }
57
- for p_name in ('model', 'isa_options', 'platform_options', 'file_options', 'dynamic_execution', 'command_line_args', 'scope', 'tags'):
58
- p_value = locals()[p_name]
59
- if p_value:
60
- params[p_name] = p_value
61
84
 
62
- res = reveng_req(requests.post, f"analyse", data=open(fpath, 'rb').read(), params=params)
63
- if res.status_code == 200:
64
- print("[+] Successfully submitted binary for analysis.")
65
- print(f"[+] {fpath} - {binary_id(fpath)}")
66
- return res
85
+ # Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
86
+ # Assumes a file has been passed, correct hash only
87
+ # Returns the BID of the binary_id (hash)
88
+ def re_bid_search(bin_id: str) -> int:
89
+ res: Response = reveng_req(requests.get, "v1/search", json_data={"sha_256_hash": bin_id})
67
90
 
68
- if res.status_code == 400:
69
- response = json.loads(res.text)
70
- if 'error' in response.keys():
71
- print(f"[-] Error analysing {fpath} - {response['error']}. Please check the results log file for {binary_id(fpath)}")
72
- return res
91
+ bid = -1
92
+
93
+ if res.ok:
94
+ # Filter the result who matches the SHA-256
95
+ binaries = list(filter(lambda binary: binary["sha_256_hash"] == bin_id, res.json()["query_results"]))
96
+
97
+ # Check only one record is returned
98
+ if len(binaries) == 1:
99
+ binary = binaries[0]
100
+ bid = binary["binary_id"]
101
+
102
+ logger.info("Only one record exists, selecting - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
103
+ bid, binary["binary_name"], binary["creation"], binary["model_name"], binary["status"])
104
+ elif len(binaries) > 1:
105
+ binaries.sort(key=lambda binary: datetime.fromisoformat(binary["creation"]).timestamp(), reverse=True)
106
+
107
+ logger.info("%d matches found for hash: %s", len(binaries), bin_id)
108
+
109
+ options_dict = {}
110
+
111
+ for idx, binary in enumerate(binaries):
112
+ logger.info("[%d] - ID: %d, Name: %s, Creation: %s, Model: %s, Status: %s",
113
+ idx, binary["binary_id"], binary["binary_name"], binary["creation"],
114
+ binary["model_name"], binary["status"])
115
+
116
+ options_dict[idx] = binary["binary_id"]
117
+
118
+ try:
119
+ user_input = input("[+] Please enter the option you want to use for this operation:")
120
+
121
+ option_number = int(user_input)
122
+
123
+ bid = options_dict.get(option_number, -1)
124
+
125
+ if bid == -1:
126
+ logger.warning("Invalid option.")
127
+ except Exception:
128
+ bid = options_dict[0]
129
+ logger.warning("Select the most recent analysis - ID: %d", bid)
130
+ else:
131
+ logger.warning("No matches found for hash: %s", bin_id)
132
+ else:
133
+ logger.warning("Bad Request: %s", res.text)
73
134
 
74
135
  res.raise_for_status()
136
+ return bid
75
137
 
76
138
 
77
- def RE_upload(fpath: str):
139
+ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
78
140
  """
79
- Upload binary to Server
141
+ Delete analysis results for Binary ID in command
142
+ :param fpath: File path for binary to analyse
143
+ :param binary_id: ID of binary
80
144
  """
81
- res = reveng_req(requests.post, f"upload", data=open(fpath, 'rb').read())
82
- if res.status_code == 200:
83
- print("[+] Successfully uploaded binary to your account.")
84
- print(f"[+] {fpath} - {binary_id(fpath)}")
85
- return res
145
+ bin_id = re_binary_id(fpath)
146
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
86
147
 
87
- if res.status_code == 400:
88
- if 'already exists' in json.loads(res.text)['reason']:
89
- print(f"[-] {fpath} already exists. Please check the results log file for {binary_id(fpath)}")
90
- return True
148
+ end_point = f"v1/analyse/{bid}"
149
+
150
+ if bid == -1:
151
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
152
+
153
+ res: Response = reveng_req(requests.delete, end_point)
154
+
155
+ if res.ok:
156
+ logger.info("Securely deleted analysis ID %s - %s.", bid, bin_id)
157
+ elif res.status_code == 404:
158
+ logger.warning("Error analysis not found for ID %s - %s.", bid, bin_id)
159
+ else:
160
+ logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
91
161
 
92
162
  res.raise_for_status()
163
+ return res
164
+
165
+
166
+ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None,
167
+ platform_options: str = None, file_options: str = None, dynamic_execution: bool = False,
168
+ command_line_args: str = None, binary_scope: str = None, tags: list = None, priority: int = 0,
169
+ duplicate: bool = False, symbols: dict = None, debug_fpath: str = None) -> Response:
170
+ """
171
+ Start analysis job for binary file
172
+ :param fpath: File path for binary to analyse
173
+ :param model_name: Binary model name
174
+ :param isa_options: Executable ISA
175
+ :param file_options: File options
176
+ :param platform_options: OS platform
177
+ :param dynamic_execution: Enable dynamic execution in sandbox during analysis
178
+ :param command_line_args: Command line arguments to pass when running binary sample in the sandbox
179
+ :param binary_scope: Analysis visibility
180
+ :param tags: Assign tags to an analysis
181
+ :param priority: Priority to processing queue
182
+ :param duplicate: Duplicate an existing binary
183
+ :param symbols: JSON object containing the base address and the list of functions
184
+ :param debug_fpath: File path for debug file
185
+ """
186
+ bin_id = re_binary_id(fpath)
187
+ result = re_hash_check(bin_id)
188
+
189
+ end_point = "v1/analyse/"
190
+
191
+ if result and duplicate is False:
192
+ logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
193
+ bin_id)
194
+ raise ReaitError(f"Duplicate analysis for hash: {bin_id}", end_point)
195
+
196
+ filename = basename(fpath)
197
+
198
+ params = {"file_name": filename, "size_in_bytes": getsize(fpath), "sha_256_hash": bin_id,}
199
+
200
+ if debug_fpath and isfile(debug_fpath) and access(debug_fpath, R_OK):
201
+ try:
202
+ debug = RE_upload(debug_fpath).json()
203
+
204
+ if debug["success"]:
205
+ params["debug_hash"] = debug["sha_256_hash"]
206
+ except HTTPError:
207
+ pass
208
+
209
+ for p_name in ("model_name", "isa_options", "platform_options", "file_options",
210
+ "dynamic_execution", "command_line_args", "binary_scope", "tags", "priority", "symbols",):
211
+ p_value = locals()[p_name]
212
+
213
+ if p_value:
214
+ params[p_name] = p_value
93
215
 
216
+ res: Response = reveng_req(requests.post, end_point, json_data=params)
94
217
 
95
- def RE_embeddings(fpath: str, model_name: str):
96
- """
97
- Fetch symbol embeddings
98
- """
99
- params = { 'model_name': model_name }
100
- res = reveng_req(requests.get, f"embeddings/{binary_id(fpath)}", params=params)
101
- if res.status_code == 400:
102
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
218
+ if res.ok:
219
+ logger.info("Successfully submitted binary for analysis. %s - %s", fpath, bin_id)
220
+ elif res.status_code == 400:
221
+ if "error" in res.json().keys():
222
+ logger.warning("Error analysing %s - %s", fpath, res.json()["error"])
103
223
 
104
224
  res.raise_for_status()
105
- return res.json()
225
+ return res
106
226
 
107
227
 
108
- def RE_signature(fpath: str, model_name: str):
228
+ def RE_upload(fpath: str) -> Response:
109
229
  """
110
- Fetch binary BinNet signature
230
+ Upload binary to Server
231
+ :param fpath: File path for binary to analyse
111
232
  """
112
- params = { 'model_name': model_name }
113
- res = reveng_req(requests.get, f"signature/{binary_id(fpath)}", params=params)
114
- if res.status_code == 425:
115
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
233
+ bin_id = re_binary_id(fpath)
234
+ result = re_hash_check(bin_id)
235
+
236
+ if result:
237
+ logger.info("File %s - %s already uploaded. Skipping upload...", fpath, bin_id)
238
+
239
+ res = Response()
240
+ res.status_code = 200
241
+ res.url = f"{re_conf['host']}/v1/upload"
242
+ res._content = ('{0}"success": true,'
243
+ '"message": "File already uploaded!",'
244
+ '"sha_256_hash": "{1}"{2}').format("{", bin_id, "}").encode()
245
+ else:
246
+ with open(fpath, "rb") as fd:
247
+ res: Response = reveng_req(requests.post, "v1/upload", files={"file": fd})
248
+
249
+ if res.ok:
250
+ logger.info("Successfully uploaded binary to your account. %s - %s", fpath, bin_id)
251
+ elif res.status_code == 400:
252
+ if "error" in res.json().keys():
253
+ logger.warning("Error uploading %s - %s", fpath, res.json()["error"])
254
+ elif res.status_code == 413:
255
+ logger.warning("File too large. Please upload files under 10MB.")
256
+ elif res.status_code == 500:
257
+ logger.error("Internal Server Error. Please contact support. Skipping upload...")
116
258
 
117
259
  res.raise_for_status()
118
- return res.json()
260
+ return res
119
261
 
120
262
 
121
- def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None, model: str = None):
263
+ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
122
264
  """
123
- Fetch embedding for custom symbol range
265
+ Fetch symbol embeddings
266
+ :param fpath: File path for binary to analyse
267
+ :param binary_id: ID of binary
124
268
  """
125
- params = {}
269
+ bin_id = re_binary_id(fpath)
270
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
126
271
 
127
- if end_vaddr:
128
- params['end_vaddr']: end_vaddr
129
- if base_vaddr:
130
- params['base_vaddr']: base_vaddr
131
- if model:
132
- params['model']: model
272
+ end_point = f"v1/embeddings/binary/{bid}"
133
273
 
134
- res = reveng_req(requests.get, f"embedding/{binary_id(fpath)}/{start_vaddr}", params=params)
135
- if res.status_code == 425:
136
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
137
- return
274
+ if bid == -1:
275
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
276
+
277
+ res: Response = reveng_req(requests.get, end_point)
278
+
279
+ if res.status_code == 400:
280
+ logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
281
+ bin_id)
138
282
 
139
283
  res.raise_for_status()
140
- return res.json()
284
+ return res
141
285
 
142
286
 
143
- def RE_logs(fpath: str, model_name: str):
287
+ def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
144
288
  """
145
- Delete analysis results for Binary ID in command
289
+ Get the logs for an analysis associated to Binary ID in command
290
+ :param fpath: File path for binary to analyse
291
+ :param binary_id: ID of binary
292
+ :param console: Show response in console
146
293
  """
147
- bin_id = binary_id(fpath)
148
- params = { 'model_name': model_name }
149
- res = reveng_req(requests.get, f"/logs/{bin_id}", params=params)
150
- if res.status_code == 200:
151
- print(res.text)
152
- return
294
+ bin_id = re_binary_id(fpath)
295
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
296
+
297
+ end_point = f"v1/logs/{bid}"
298
+
299
+ if bid == -1:
300
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
301
+
302
+ res: Response = reveng_req(requests.get, end_point)
303
+
304
+ if res.ok and console:
305
+ logger.info("Logs found for %s:\n%s", bin_id, res.json()["logs"])
153
306
  elif res.status_code == 404:
154
- print(f"[!] Error, binary analysis for {bin_id} under {model_name} not found.")
155
- return
307
+ logger.warning("Error, logs not found for %s.", bin_id)
156
308
 
157
309
  res.raise_for_status()
310
+ return res
158
311
 
159
312
 
160
- def RE_cves(fpath: str, model_name: str):
313
+ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
161
314
  """
162
- Check for known CVEs in Binary
315
+ Check for known CVEs in Binary
316
+ :param fpath: File path for binary to analyse
317
+ :param binary_id: ID of binary
163
318
  """
164
- bin_id = binary_id(fpath)
165
- params = { 'model_name': model_name }
166
- res = reveng_req(requests.get, f"/cves/{bin_id}", params)
167
- if res.status_code == 200:
319
+ bin_id = re_binary_id(fpath)
320
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
321
+
322
+ end_point = f"cves/{bid}"
323
+
324
+ if bid == -1:
325
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
326
+
327
+ res: Response = reveng_req(requests.get, end_point)
328
+
329
+ if res.ok:
168
330
  cves = json.loads(res.text)
169
- rich_print(f"[bold blue]Checking for known CVEs embedded inside [/bold blue] [bold bright_green]{fpath}[/bold bright_green]:")
331
+ logger.info("Checking for known CVEs embedded inside %s", fpath)
332
+
170
333
  if len(cves) == 0:
171
- rich_print(f"[bold bright_green]0 CVEs found.[/bold bright_green]")
334
+ logger.info("0 CVEs found.")
172
335
  else:
173
- rich_print(f"[bold red]Warning CVEs found![/bold red]")
174
- print_json(data=cves)
175
- return
336
+ logger.warning("Warning CVEs found!\n%s", res.text)
176
337
  elif res.status_code == 404:
177
- print(f"[!] Error, binary analysis for {bin_id} not found.")
178
- return
338
+ logger.warning("Error, binary analysis not found for %s.", bin_id)
179
339
 
180
340
  res.raise_for_status()
341
+ return res
181
342
 
182
- def RE_status(fpath: str, model_name: str):
343
+
344
+ def RE_status(fpath: str, binary_id: int = 0, console: bool = False) -> Response:
183
345
  """
184
- Check for known CVEs in Binary
346
+ Get the status of an ongoing binary analysis
347
+ :param fpath: File path for binary to analyse
348
+ :param binary_id: ID of binary
185
349
  """
186
- bin_id = binary_id(fpath)
187
- params = { 'model_name': model_name }
188
- res = reveng_req(requests.get, f"/analyse/status/{bin_id}", params)
189
- if res.status_code == 200:
190
- return res.json()
191
- elif res.status_code == 400:
192
- print(f"[!] Error, status not found for {bin_id}:{model_name} not found.")
193
- return res.json()
350
+ bin_id = re_binary_id(fpath)
351
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
352
+
353
+ end_point = f"v1/analyse/status/{bid}"
354
+
355
+ if bid == -1:
356
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
357
+
358
+ res: Response = reveng_req(requests.get, end_point)
359
+
360
+ if res.ok and console:
361
+ logger.info("Binary analysis status: %s", res.json()["status"])
362
+ if res.status_code == 400:
363
+ logger.warning(" Error, status not found for %s.", bin_id)
194
364
 
195
365
  res.raise_for_status()
366
+ return res
196
367
 
197
368
 
198
- def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
369
+ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list:
199
370
  """
200
- Compute the cosine distance between source embedding and embeddinsg from binary
371
+ Compute the cosine distance between source embedding and embedding from binary
372
+ :param embedding: Embedding vector as python list
373
+ :param embeddings: Symbol embeddings
374
+ :param nns: Number of nearest neighbors
201
375
  """
202
376
  df = DataFrame(data=embeddings)
203
377
  np_embedding = array(embedding).reshape(1, -1)
204
- source_embeddings = vstack(df['embedding'].values)
378
+ source_embeddings = vstack(df["embedding"].values)
205
379
  closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
206
380
  distances = cosine_similarity(source_embeddings[closest], np_embedding)
381
+
207
382
  # match closest embeddings with similarity
208
383
  closest_df = df.iloc[closest]
384
+
209
385
  # create json similarity object
210
386
  similarities = list(zip(distances, closest_df.index.tolist()))
211
- json_sims = [{'similaritiy': float(d[0]), 'vaddr': int(df.iloc[v]['vaddr']), 'name': str(df.iloc[v]['name']), 'size': int(df.iloc[v]['size'])} for d, v in similarities]
387
+ json_sims = [{"similaritiy": float(d[0]),
388
+ "vaddr": int(df.iloc[v]["vaddr"]),
389
+ "name": str(df.iloc[v]["name"]),
390
+ "size": int(df.iloc[v]["size"])
391
+ } for d, v in similarities]
212
392
  return json_sims
213
393
 
214
394
 
215
- def RE_nearest_symbols(embedding: list, model_name, nns: int = 5, collections : list = None):
395
+ def RE_nearest_symbols_batch(function_ids: list[int], nns: int = 5, collections: list[str] = None,
396
+ distance: float = 0.1, debug_enabled: bool = False) -> Response:
216
397
  """
217
- Get function name suggestions for an embedding
218
- :param embedding: embedding vector as python list
219
- :param nns: Number of nearest neighbors
220
- :param collections: str RegEx to search through RevEng.AI collections
398
+ Get nearest functions to a passed function ids
399
+ :param function_ids: List of function ids
400
+ :param nns: Number of nearest neighbors
401
+ :param collections: List of collections RevEng.AI collection names to search through
402
+ :param distance: How close we want the ANN search to filter for
403
+ :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
221
404
  """
222
- params={'nns': nns, 'model_name': model_name }
405
+ params = {"function_id_list": function_ids,
406
+ "result_per_function": nns,
407
+ "debug_mode": debug_enabled,
408
+ "distance": distance,}
223
409
 
224
410
  if collections:
225
- params['collections'] = collections
411
+ # api param is collection, not collections
412
+ params["collection"] = collections
413
+
414
+ res: Response = reveng_req(requests.post, "v1/ann/symbol/batch", json_data=params)
226
415
 
227
- res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
228
416
  res.raise_for_status()
229
- f_suggestions = res.json()
230
- print_json(data=f_suggestions)
417
+ return res
231
418
 
232
419
 
233
- def RE_nearest_binaries(embedding: list, model_name, nns: int = 5, collections : list = None):
420
+ def RE_nearest_functions(fpath: str, binary_id: int = 0, nns: int = 5,
421
+ distance: float = 0.1, debug_enabled: bool = False) -> Response:
234
422
  """
235
- Get executable suggestions for a binary embedding
236
- :param embedding: embedding vector as python list
237
- :param nns: Number of nearest neighbors
238
- :param collections: str RegEx to search through RevEng.AI collections
423
+ Get the nearest functions
424
+ :param fpath: File path for binary to analyse
425
+ :param binary_id: ID of binary
426
+ :param nns: Number of nearest neighbors
427
+ :param distance: How close we want the ANN search to filter for
428
+ :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
239
429
  """
240
- params={'nns': nns, 'model_name': model_name }
430
+ bin_id = re_binary_id(fpath)
431
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
241
432
 
242
- if collections:
243
- params['collections'] = collections
433
+ end_point = f"v1/ann/symbol/{bid}"
434
+
435
+ if bid == -1:
436
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
437
+
438
+ params = {"result_per_function": nns,
439
+ "debug_mode": debug_enabled,
440
+ "distance": distance, }
441
+
442
+ res: Response = reveng_req(requests.post, end_point, json_data=params)
443
+
444
+ res.raise_for_status()
445
+ return res
446
+
447
+
448
+ def RE_analyze_functions(fpath: str, binary_id: int = 0) -> Response:
449
+ bin_id = re_binary_id(fpath)
450
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
451
+
452
+ end_point = f"v1/analyse/functions/{bid}"
453
+
454
+ if bid == -1:
455
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
456
+
457
+ res: Response = reveng_req(requests.get, end_point)
458
+
459
+ res.raise_for_status()
460
+ return res
461
+
462
+
463
+ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
464
+ """
465
+ Get Software Bill Of Materials for binary
466
+ :param fpath: File path for binary to analyse
467
+ :param binary_id: ID of binary
468
+ """
469
+ bin_id = re_binary_id(fpath)
470
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
471
+
472
+ end_point = f"sboms/{bid}"
473
+
474
+ if bid == -1:
475
+ raise ReaitError(f"No matches found for hash: {bin_id}", end_point)
476
+
477
+ res: Response = reveng_req(requests.get, end_point)
478
+
479
+ logger.info("SBOM for %s:\n%s", fpath, res.text)
480
+
481
+ res.raise_for_status()
482
+ return res
483
+
484
+
485
+ def RE_functions_rename(function_id: int, new_name: str) -> Response:
486
+ """
487
+ Send the new name of a function to C2
488
+ :param function_id: ID of a function
489
+ :param new_name: New function name
490
+ """
491
+ res: Response = reveng_req(requests.post, f"v1/functions/rename/{function_id}",
492
+ json_data={"new_name": new_name})
493
+
494
+ if res.ok:
495
+ logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
496
+ else:
497
+ logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
498
+
499
+ res.raise_for_status()
500
+ return res
501
+
502
+
503
+ def RE_settings() -> Response:
504
+ """
505
+ Get the configuration settings
506
+ """
507
+ res: Response = reveng_req(requests.get, "v1/config")
244
508
 
245
- res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
246
509
  res.raise_for_status()
247
- f_suggestions = res.json()
248
- print_json(data=f_suggestions)
510
+ return res
511
+
512
+
513
+ def RE_health() -> bool:
514
+ """
515
+ Health check & verify access to the API
516
+ """
517
+ res: Response = reveng_req(requests.get, "v1")
518
+
519
+ success = res.json()["success"]
520
+
521
+ if success:
522
+ logger.info(res.json()["message"])
523
+ else:
524
+ logger.warning(res.json()["error"])
525
+ return success
249
526
 
250
527
 
251
- def RE_SBOM(fpath: str, model_name: str):
528
+ def RE_authentication() -> Response:
252
529
  """
253
- Get Software Bill Of Materials for binary
254
- :param fpath: File path for binaty to analyse
255
- :param model_name: str model name of RevEng.AI AI model
530
+ Authentication Check
256
531
  """
257
- params={'model_name': model_name }
532
+ res: Response = reveng_req(requests.get, "v1/authenticate")
258
533
 
259
- res = reveng_req(requests.get, f"sboms/{binary_id(fpath)}", params=params)
260
534
  res.raise_for_status()
261
- sbom = res.json()
262
- print_json(data=sbom)
535
+ return res
263
536
 
264
537
 
265
- def binary_id(path: str):
266
- """Take the SHA-256 hash of binary file"""
267
- hf = sha256()
268
- with open(path, "rb") as f:
269
- c = f.read()
270
- hf.update(c)
271
- return hf.hexdigest()
538
+ def re_binary_id(fpath: str) -> str:
539
+ """
540
+ Take the SHA-256 hash of binary file
541
+ :param fpath: File path for binary to analyse
542
+ """
543
+ if fpath and isfile(fpath) and access(fpath, R_OK):
544
+ hf = sha256()
272
545
 
546
+ with open(fpath, "rb") as fd:
547
+ c = fd.read()
548
+ hf.update(c)
549
+
550
+ return hf.hexdigest()
551
+ else:
552
+ logger.error("File '%s' doesn't exist or isn't readable", fpath)
273
553
 
274
- def _binary_isa(lief_hdlr, exec_type):
554
+ return "undefined"
555
+
556
+
557
+ def _binary_isa(binary: Binary, exec_type: str) -> str:
275
558
  """
276
- Get executable file format
559
+ Get ISA format
277
560
  """
278
- if exec_type == "elf":
279
- machine_type = lief_hdlr.header.machine_type
280
- if machine_type == lief.ELF.ARCH.i386:
561
+ if exec_type == "ELF":
562
+ arch = binary.header.machine_type
563
+
564
+ if arch == ELF.ARCH.i386:
281
565
  return "x86"
282
- elif machine_type == lief.ELF.ARCH.x86_64:
566
+ elif arch == ELF.ARCH.x86_64:
283
567
  return "x86_64"
284
-
285
- elif exec_type == "pe":
286
- machine_type = lief_hdlr.header.machine
287
- if machine_type == lief.PE.MACHINE_TYPES.I386:
568
+ elif arch == ELF.ARCH.ARM:
569
+ return "ARM32"
570
+ elif arch == ELF.ARCH.AARCH64:
571
+ return "ARM64"
572
+ elif exec_type == "PE":
573
+ machine_type = binary.header.machine
574
+
575
+ if machine_type == PE.Header.MACHINE_TYPES.I386:
288
576
  return "x86"
289
- elif machine_type == lief.PE.MACHINE_TYPES.AMD64:
577
+ elif machine_type == PE.Header.MACHINE_TYPES.AMD64:
290
578
  return "x86_64"
291
-
292
- elif exec_type == "macho":
293
- machine_type = lief_hdlr.header.cpu_type
294
- if machine_type == lief.MachO.CPU_TYPES.x86:
579
+ elif machine_type == PE.Header.MACHINE_TYPES.ARM:
580
+ return "ARM32"
581
+ elif machine_type == PE.Header.MACHINE_TYPES.ARM64:
582
+ return "ARM64"
583
+ elif exec_type == "Mach-O":
584
+ cpu_type = binary.header.cpu_type
585
+
586
+ if cpu_type == MachO.CPU_TYPES.x86:
295
587
  return "x86"
296
- elif machine_type == lief.MachO.CPU_TYPES.x86_64:
588
+ elif cpu_type == MachO.CPU_TYPES.x86_64:
297
589
  return "x86_64"
298
-
299
- raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}")
590
+ elif cpu_type == MachO.CPU_TYPES.ARM:
591
+ return "ARM32"
592
+ elif cpu_type == MachO.CPU_TYPES.ARM64:
593
+ return "ARM64"
594
+
595
+ logger.error("Error, could not determine or unsupported ISA for binary format: %s.", exec_type)
596
+ raise RuntimeError(f"Error, could not determine or unsupported ISA for binary format: {exec_type}.")
300
597
 
301
598
 
302
- def _binary_format(lief_hdlr):
599
+ def _binary_format(binary: Binary) -> str:
303
600
  """
304
- Get executable file format
601
+ Get executable file format
305
602
  """
306
- if lief_hdlr.format == lief_hdlr.format.PE:
307
- return "pe"
308
- if lief_hdlr.format == lief_hdlr.format.ELF:
309
- return "elf"
310
- if lief_hdlr.format == lief_hdlr.format.MACHO:
311
- return "macho"
312
-
313
- raise RuntimeError("Error, could not determine binary format")
603
+ if binary.format == Binary.FORMATS.PE:
604
+ return "PE"
605
+ if binary.format == Binary.FORMATS.ELF:
606
+ return "ELF"
607
+ if binary.format == Binary.FORMATS.MACHO:
608
+ return "Mach-O"
314
609
 
610
+ logger.error("Error, could not determine or unsupported binary format: %s.", binary.format)
611
+ raise RuntimeError(f"Error, could not determine or unsupported binary format: {binary.format}")
315
612
 
316
613
 
317
- def file_type(fpath: str):
614
+ def file_type(fpath: str) -> tuple[str, str]:
318
615
  """
319
- Determine ISA for binary
616
+ Determine ISA for binary
617
+ :param fpath: File path for binary to analyse
320
618
  """
321
- binary = lief.parse(fpath)
619
+ binary = parse(fpath)
322
620
 
323
- # handle PE and ELF files
324
- file_format = _binary_format(binary)
325
- isa = _binary_isa(binary, file_format)
326
- return file_format, isa
621
+ if not binary:
622
+ file_format = isa_format = "Unknown format"
623
+ else:
624
+ # handle PE and ELF files
625
+ file_format = _binary_format(binary)
626
+ isa_format = _binary_isa(binary, file_format)
327
627
 
628
+ return file_format, isa_format
328
629
 
329
- def parse_config():
630
+
631
+ def parse_config() -> None:
632
+ """
633
+ Parse ~/.reait.toml config file
330
634
  """
331
- Parse ~/.reait.toml config file
332
- """
333
- if not os.path.exists(os.path.expanduser("~/.reait.toml")):
334
- return
635
+ fpath = expanduser("~/.reait.toml")
335
636
 
336
- with open(os.path.expanduser("~/.reait.toml"), "r") as file:
337
- config = tomli.loads(file.read())
338
- for key in ('apikey', 'host'):
339
- if key in config:
340
- re_conf[key] = config[key]
637
+ if isfile(fpath) and access(fpath, R_OK):
638
+ with open(fpath) as fd:
639
+ config = tomli.loads(fd.read())
640
+
641
+ for key in ("apikey", "host", "model",):
642
+ if key in config:
643
+ re_conf[key] = config[key]
644
+ else:
645
+ logger.info("File %s doesn't exist or isn't readable", fpath)
341
646
 
342
647
 
343
- def angular_distance(x, y):
648
+ def angular_distance(x, y) -> float:
344
649
  """
345
650
  Compute angular distance between two embedding vectors
346
- Normalised euclidean distance
651
+ Normalised euclidian distance
347
652
  """
348
- cos = dot(x, y) / ((dot(x, x) * dot(y, y)) ** 0.5)
349
- return 1.0 - arccos(cos)/pi
653
+ cos = dot(x, y) / ((dot(x, x) * dot(y, y))**0.5)
654
+ return 1.0 - arccos(cos) / pi