reait 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reait/api.py CHANGED
@@ -1,307 +1,561 @@
1
1
  #!/usr/bin/env python
2
- from __future__ import print_function
2
+ # -*- coding: utf-8 -*-
3
+ from __future__ import print_function, annotations
4
+
3
5
  from hashlib import sha256
4
- from rich import print_json, print as rich_print
6
+
5
7
  from sklearn.metrics.pairwise import cosine_similarity
6
- import os
7
- import re
8
- import argparse
8
+ from os.path import basename, exists, expanduser
9
+ from requests import request, Response, HTTPError
9
10
  import requests
10
- from numpy import array, vstack, mean, average, dot, arccos, pi
11
+ from numpy import array, vstack, dot, arccos, pi
11
12
  from pandas import DataFrame
12
13
  import json
13
14
  import tomli
14
- from os.path import isfile
15
- from sys import exit
16
- from IPython import embed
17
- import lief
15
+ import logging
16
+ from lief import parse, ELF, PE, MachO
18
17
 
19
- __version__ = "0.0.19"
20
18
 
21
19
  re_conf = {
22
- 'apikey' : 'l1br3',
23
- 'host' : 'https://api.reveng.ai',
24
- 'model': 'binnet-0.1'
20
+ "apikey": "l1br3",
21
+ "host": "https://api.reveng.ai",
22
+ "model": "binnet-0.2-x86"
25
23
  }
26
24
 
27
- def reveng_req(r: requests.request, end_point: str, data=None, ex_headers: dict = None, params=None):
25
+
26
+ logger = logging.getLogger("REAIT")
27
+
28
+
29
+ def reveng_req(r: request, end_point: str, data=None, ex_headers: dict = None, params=None,
30
+ json_data: dict = None, timeout: int = 30) -> Response:
31
+ """
32
+ Constructs and sends a Request
33
+ :param r: Method for the new Request
34
+ :param end_point: Endpoint to add to the base URL
35
+ :param ex_headers: Extended HTTP headers to add
36
+ :param data: Dictionary, list of tuples, bytes, or file-like object to send in the body
37
+ :param params: Dictionary, list of tuples or bytes to send in the query string for the query string
38
+ :param json_data: A JSON serializable Python object to send in the body
39
+ :param timeout: Number of seconds to stop waiting for a Response
40
+ """
28
41
  url = f"{re_conf['host']}/{end_point}"
29
- headers = { "Authorization": f"{re_conf['apikey']}" }
42
+ headers = {"Authorization": re_conf["apikey"]}
43
+
30
44
  if ex_headers:
31
45
  headers.update(ex_headers)
32
- return r(url, headers=headers, data=data, params=params)
33
46
 
47
+ logger.debug("Making request %s:\n - headers: %s\n - data: %s\n - json_data: %s\n - params: %s",
48
+ url, headers, data, json_data, params)
49
+
50
+ response: Response = r(url, headers=headers, json=json_data, data=data, params=params, timeout=timeout)
51
+
52
+ logger.debug("Making response %s:\n - headers: %s\n - status_code: %d\n - content: %s",
53
+ url, response.headers, response.status_code, response.text)
54
+
55
+ return response
56
+
57
+
58
+ def re_hash_check(bin_id: str) -> bool:
59
+ status = False
60
+ res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All&user_owned=true")
34
61
 
35
- def RE_delete(fpath: str, model_name: str):
36
- """
37
- Delete analysis results for Binary ID in command
38
- """
39
- bin_id = binary_id(fpath)
40
- params = { 'model_name': model_name }
41
- res = reveng_req(requests.delete, f"/analyse/{bin_id}", params=params)
42
62
  if res.status_code == 200:
43
- print(f"[+] Success. Securely deleted {fpath} analysis")
44
- elif res.status_code == 404:
45
- print(f"[!] Error, analysis not found for {bin_id} under {model_name}.")
63
+ binaries_data = res.json()["binaries"]
64
+ status = len(binaries_data) > 0
65
+ elif res.status_code == 400:
66
+ logger.warning("Bad Request: %s", res.text)
46
67
  else:
47
- print(f"[!] Error deleteing binary {bin_id} under {model_name}. Server returned {res.status_code}.")
48
- return
68
+ logger.error("Internal Server Error.")
69
+
70
+ res.raise_for_status()
71
+ return status
72
+
49
73
 
74
+ # Bin_id is referred to as hash in this program - to maintain usage BID = id of a binary bin_id = hash
75
+ # Assumes a file has been passed, correct hash only
76
+ # Returns the BID of the binary_id (hash)
77
+ def re_bid_search(bin_id: str) -> int:
78
+ res = reveng_req(requests.get, f"search?search=sha_256_hash:{bin_id}&state=All")
79
+
80
+ bid = -1
81
+
82
+ # Valid request
83
+ if res.status_code == 200:
84
+ # Check only one record is returned
85
+ binaries_data = res.json()["binaries"]
86
+
87
+ if len(binaries_data) > 1:
88
+ logger.info("%d matches found for hash: %s.", len(binaries_data), bin_id)
89
+
90
+ if len(binaries_data) > 1:
91
+ options_dict = {}
92
+
93
+ for idx, binary in enumerate(binaries_data):
94
+ logger.info("[%d] - ID: {}, Name: %s, Creation: %s, Model: %s, Owner: %s, Status: %s",
95
+ idx, binary["binary_id"], binary["binary_name"], binary["creation"],
96
+ binary["model_name"], binary["owner"], binary["status"])
97
+
98
+ options_dict[idx] = binary["binary_id"]
99
+
100
+ user_input = input("[+] Please enter the option you want to use for this operation:")
101
+
102
+ try:
103
+ option_number = int(user_input)
104
+
105
+ bid = options_dict.get(option_number, -1)
106
+
107
+ if bid == -1:
108
+ logger.warning("Invalid option.")
109
+ except Exception:
110
+ bid = -1
111
+ logger.warning("Invalid option.")
112
+ # Only 1 match found
113
+ elif len(binaries_data) == 1:
114
+ binary = binaries_data[0]
115
+ bid = binary["binary_id"]
116
+ else:
117
+ logger.warning("No matches found for hash: %s.", bin_id)
118
+ elif len(binaries_data) == 1:
119
+ binary = binaries_data[0]
120
+ bid = binary["binary_id"]
121
+
122
+ logger.info("Only one record exists, selecting - ID: %d, Name: %s, "
123
+ "Creation: %s, Model: %s, Owner: %s, Status: %s",
124
+ bid, binary["binary_name"], binary["creation"],
125
+ binary["model_name"], binary["owner"], binary["status"])
126
+ else:
127
+ logger.warning("No matches found for hash: %s.", bin_id)
128
+ elif res.status_code == 400:
129
+ logger.warning("Bad Request: %s", res.text)
130
+ else:
131
+ logger.error("Internal Server Error.")
50
132
 
51
- def RE_analyse(fpath: str, model: str = None, isa_options: str = None, platform_options: str = None, file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None, scope: str = None, tags: str = None):
133
+ res.raise_for_status()
134
+ return bid
135
+
136
+
137
+ def RE_delete(fpath: str, binary_id: int = 0) -> Response:
52
138
  """
53
- Start analysis job for binary file
139
+ Delete analysis results for Binary ID in command
140
+ :param fpath: File path for binary to analyse
141
+ :param binary_id: ID of binary
54
142
  """
55
- filename = os.path.basename(fpath)
56
- params={ 'file_name': filename }
57
- for p_name in ('model', 'isa_options', 'platform_options', 'file_options', 'dynamic_execution', 'command_line_args', 'scope', 'tags'):
143
+ bin_id = re_binary_id(fpath)
144
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
145
+
146
+ if bid == -1:
147
+ raise HTTPError(f"No matches found for hash: {bin_id}")
148
+
149
+ res = reveng_req(requests.delete, f"analyse/{bid}")
150
+
151
+ if res.status_code == 200:
152
+ logger.info("Securely deleted %s analysis.", bin_id)
153
+ elif res.status_code == 404:
154
+ logger.warning("Error analysis not found for %s.", bin_id)
155
+ else:
156
+ logger.error("Error deleting binary %s under. Server returned %d.", bin_id, res.status_code)
157
+
158
+ res.raise_for_status()
159
+ return res
160
+
161
+
162
+ def RE_analyse(fpath: str, model_name: str = None, isa_options: str = None, platform_options: str = None,
163
+ file_options: str = None, dynamic_execution: bool = False, command_line_args: str = None,
164
+ scope: str = None, tags: list = None, priority: int = 0,
165
+ duplicate: bool = False, symbols: dict = None) -> Response:
166
+ """
167
+ Start analysis job for binary file
168
+ :param fpath: File path for binary to analyse
169
+ :param model_name: Binary model name
170
+ :param isa_options: Executable ISA
171
+ :param file_options: File options
172
+ :param platform_options: OS platform
173
+ :param dynamic_execution: Enable dynamic execution in sandbox during analysis
174
+ :param command_line_args: Command line arguments to pass when running binary sample in the sandbox
175
+ :param scope: Analysis visibility
176
+ :param tags: Assign tags to an analysis
177
+ :param priority: Priority to processing queue
178
+ :param duplicate: Duplicate an existing binary
179
+ :param symbols: List of functions
180
+ """
181
+ bin_id = re_binary_id(fpath)
182
+ result = re_hash_check(bin_id)
183
+
184
+ if result and duplicate is False:
185
+ logger.error("Error, duplicate analysis for %s. To upload again, use the --duplicate flag.",
186
+ bin_id)
187
+ raise HTTPError(f"Duplicate analysis for hash: {bin_id}")
188
+
189
+ filename = basename(fpath)
190
+
191
+ params = {"file_name": filename, "sha_256_hash": bin_id}
192
+
193
+ for p_name in ("model_name", "isa_options", "platform_options", "file_options",
194
+ "dynamic_execution", "command_line_args", "scope", "tags", "priority", "symbols"):
58
195
  p_value = locals()[p_name]
196
+
59
197
  if p_value:
60
198
  params[p_name] = p_value
61
199
 
62
- res = reveng_req(requests.post, f"analyse", data=open(fpath, 'rb').read(), params=params)
200
+ res = reveng_req(requests.post, f"analyse", json_data=params)
201
+
63
202
  if res.status_code == 200:
64
- print("[+] Successfully submitted binary for analysis.")
65
- print(f"[+] {fpath} - {binary_id(fpath)}")
66
- return res
203
+ logger.info("Successfully submitted binary for analysis. %s - %s", fpath, re_binary_id(fpath))
204
+ elif res.status_code == 400:
205
+ response = res.json()
67
206
 
68
- if res.status_code == 400:
69
- response = json.loads(res.text)
70
- if 'error' in response.keys():
71
- print(f"[-] Error analysing {fpath} - {response['error']}. Please check the results log file for {binary_id(fpath)}")
72
- return res
207
+ if "error" in response.keys():
208
+ logger.warning("Error analysing %s - %s", fpath, response["error"])
73
209
 
74
210
  res.raise_for_status()
211
+ return res
75
212
 
76
213
 
77
- def RE_upload(fpath: str):
214
+ def RE_upload(fpath: str) -> Response | bool:
78
215
  """
79
- Upload binary to Server
216
+ Upload binary to Server
217
+ :param fpath: File path for binary to analyse
80
218
  """
81
- res = reveng_req(requests.post, f"upload", data=open(fpath, 'rb').read())
219
+ bin_id = re_binary_id(fpath)
220
+ result = re_hash_check(bin_id)
221
+
222
+ if result:
223
+ logger.info("File %s - %s already exists. Skipping upload...", basename(fpath), re_binary_id(fpath))
224
+ return True
225
+
226
+ res = reveng_req(requests.post, f"upload", data=open(fpath, "rb").read())
227
+
82
228
  if res.status_code == 200:
83
- print("[+] Successfully uploaded binary to your account.")
84
- print(f"[+] {fpath} - {binary_id(fpath)}")
85
- return res
229
+ logger.info("Successfully uploaded binary to your account. %s - %s", fpath, re_binary_id(fpath))
230
+ elif res.status_code == 400:
231
+ response = res.json()
86
232
 
87
- if res.status_code == 400:
88
- if 'already exists' in json.loads(res.text)['reason']:
89
- print(f"[-] {fpath} already exists. Please check the results log file for {binary_id(fpath)}")
90
- return True
233
+ if "error" in response.keys():
234
+ logger.warning("Error uploading %s - %s", fpath, response["error"])
235
+ elif res.status_code == 413:
236
+ logger.warning("File too large. Please upload files under 100MB.")
237
+ elif res.status_code == 500:
238
+ logger.error("Internal Server Error. Please contact support. Skipping upload...")
91
239
 
92
240
  res.raise_for_status()
241
+ return res
93
242
 
94
243
 
95
- def RE_embeddings(fpath: str, model_name: str):
244
+ def RE_embeddings(fpath: str, binary_id: int = 0) -> Response:
96
245
  """
97
- Fetch symbol embeddings
246
+ Fetch symbol embeddings
247
+ :param fpath: File path for binary to analyse
248
+ :param binary_id: ID of binary
98
249
  """
99
- params = { 'model_name': model_name }
100
- res = reveng_req(requests.get, f"embeddings/{binary_id(fpath)}", params=params)
250
+ bin_id = re_binary_id(fpath)
251
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
252
+
253
+ if bid == -1:
254
+ raise HTTPError(f"No matches found for hash: {bin_id}")
255
+
256
+ res = reveng_req(requests.get, f"embeddings/{bid}")
257
+
101
258
  if res.status_code == 400:
102
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
259
+ logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
260
+ bin_id)
103
261
 
104
262
  res.raise_for_status()
105
- return res.json()
263
+ return res
106
264
 
107
265
 
108
- def RE_signature(fpath: str, model_name: str):
266
+ def RE_signature(fpath: str, binary_id: int = 0) -> Response:
109
267
  """
110
- Fetch binary BinNet signature
268
+ Fetch binary BinNet signature
269
+ :param fpath: File path for binary to analyse
270
+ :param binary_id: ID of binary
111
271
  """
112
- params = { 'model_name': model_name }
113
- res = reveng_req(requests.get, f"signature/{binary_id(fpath)}", params=params)
272
+ bin_id = re_binary_id(fpath)
273
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
274
+
275
+ if bid == -1:
276
+ raise HTTPError(f"No matches found for hash: {bin_id}")
277
+
278
+ res = reveng_req(requests.get, f"signature/{bid}")
279
+
114
280
  if res.status_code == 425:
115
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
281
+ logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
282
+ bin_id)
116
283
 
117
284
  res.raise_for_status()
118
- return res.json()
285
+ return res
119
286
 
120
287
 
121
- def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None, model: str = None):
288
+ def RE_embedding(fpath: str, start_vaddr: int, end_vaddr: int = None, base_vaddr: int = None,
289
+ model: str = None) -> Response:
122
290
  """
123
- Fetch embedding for custom symbol range
291
+ Fetch embedding for custom symbol range
292
+ :param fpath: File path for binary to analyse
293
+ :param start_vaddr: Start virtual address of the function to extract embeddings
294
+ :param end_vaddr: End virtual address of the function to extract embeddings
295
+ :param base_vaddr: Base address of the binary
296
+ :param model: Binary model name
124
297
  """
125
298
  params = {}
126
299
 
127
300
  if end_vaddr:
128
- params['end_vaddr']: end_vaddr
301
+ params["end_vaddr"] = end_vaddr
129
302
  if base_vaddr:
130
- params['base_vaddr']: base_vaddr
303
+ params["base_vaddr"] = base_vaddr
131
304
  if model:
132
- params['model']: model
305
+ params["models"] = model
306
+
307
+ bin_id = re_binary_id(fpath)
308
+
309
+ res = reveng_req(requests.get, f"embedding/{bin_id}/{start_vaddr}", params=params)
133
310
 
134
- res = reveng_req(requests.get, f"embedding/{binary_id(fpath)}/{start_vaddr}", params=params)
135
311
  if res.status_code == 425:
136
- print(f"[-] Analysis for {binary_id(fpath)} still in progress. Please check the logs (-l) and try again later.")
137
- return
312
+ logger.warning("Analysis for %s still in progress. Please check the logs (-l) and try again later.",
313
+ bin_id)
138
314
 
139
315
  res.raise_for_status()
140
- return res.json()
316
+ return res
141
317
 
142
318
 
143
- def RE_logs(fpath: str, model_name: str):
319
+ def RE_logs(fpath: str, binary_id: int = 0, console: bool = True) -> Response:
144
320
  """
145
- Delete analysis results for Binary ID in command
321
+ Get the logs for an analysis associated to Binary ID in command
322
+ :param fpath: File path for binary to analyse
323
+ :param binary_id: ID of binary
324
+ :param console: Show response in console
146
325
  """
147
- bin_id = binary_id(fpath)
148
- params = { 'model_name': model_name }
149
- res = reveng_req(requests.get, f"/logs/{bin_id}", params=params)
150
- if res.status_code == 200:
151
- print(res.text)
152
- return
326
+ bin_id = re_binary_id(fpath)
327
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
328
+
329
+ if bid == -1:
330
+ raise HTTPError(f"No matches found for hash: {bin_id}")
331
+
332
+ res = reveng_req(requests.get, f"logs/{bid}")
333
+
334
+ if res.status_code == 200 and console:
335
+ logger.info("Logs found for %s:\n%s", bin_id, res.text)
153
336
  elif res.status_code == 404:
154
- print(f"[!] Error, binary analysis for {bin_id} under {model_name} not found.")
155
- return
337
+ logger.warning("Error, logs not found for %s.", bin_id)
156
338
 
157
339
  res.raise_for_status()
340
+ return res
158
341
 
159
342
 
160
- def RE_cves(fpath: str, model_name: str):
343
+ def RE_cves(fpath: str, binary_id: int = 0) -> Response:
161
344
  """
162
- Check for known CVEs in Binary
345
+ Check for known CVEs in Binary
346
+ :param fpath: File path for binary to analyse
347
+ :param binary_id: ID of binary
163
348
  """
164
- bin_id = binary_id(fpath)
165
- params = { 'model_name': model_name }
166
- res = reveng_req(requests.get, f"/cves/{bin_id}", params)
349
+ bin_id = re_binary_id(fpath)
350
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
351
+
352
+ if bid == -1:
353
+ raise HTTPError(f"No matches found for hash: {bin_id}")
354
+
355
+ res = reveng_req(requests.get, f"cves/{bid}")
356
+
167
357
  if res.status_code == 200:
168
358
  cves = json.loads(res.text)
169
- rich_print(f"[bold blue]Checking for known CVEs embedded inside [/bold blue] [bold bright_green]{fpath}[/bold bright_green]:")
359
+ logger.info("Checking for known CVEs embedded inside %s", fpath)
360
+
170
361
  if len(cves) == 0:
171
- rich_print(f"[bold bright_green]0 CVEs found.[/bold bright_green]")
362
+ logger.info("0 CVEs found.")
172
363
  else:
173
- rich_print(f"[bold red]Warning CVEs found![/bold red]")
174
- print_json(data=cves)
175
- return
364
+ logger.warning("Warning CVEs found!\n%s", res.text)
176
365
  elif res.status_code == 404:
177
- print(f"[!] Error, binary analysis for {bin_id} not found.")
178
- return
366
+ logger.warning("Error, binary analysis not found for %s.", bin_id)
179
367
 
180
368
  res.raise_for_status()
369
+ return res
370
+
181
371
 
182
- def RE_status(fpath: str, model_name: str):
372
+ def RE_status(fpath: str, binary_id: int = 0) -> Response:
183
373
  """
184
- Check for known CVEs in Binary
374
+ Get the status of an ongoing binary analysis
375
+ :param fpath: File path for binary to analyse
376
+ :param binary_id: ID of binary
185
377
  """
186
- bin_id = binary_id(fpath)
187
- params = { 'model_name': model_name }
188
- res = reveng_req(requests.get, f"/analyse/status/{bin_id}", params)
189
- if res.status_code == 200:
190
- return res.json()
191
- elif res.status_code == 400:
192
- print(f"[!] Error, status not found for {bin_id}:{model_name} not found.")
193
- return res.json()
378
+ bin_id = re_binary_id(fpath)
379
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
380
+
381
+ if bid == -1:
382
+ raise HTTPError(f"No matches found for hash: {bin_id}")
383
+
384
+ res = reveng_req(requests.get, f"analyse/status/{bid}")
385
+
386
+ if res.status_code == 400:
387
+ logger.warning(" Error, status not found for %s.", bin_id)
194
388
 
195
389
  res.raise_for_status()
390
+ return res
196
391
 
197
392
 
198
- def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5):
393
+ def RE_compute_distance(embedding: list, embeddings: list, nns: int = 5) -> list:
199
394
  """
200
- Compute the cosine distance between source embedding and embeddinsg from binary
395
+ Compute the cosine distance between source embedding and embedding from binary
396
+ :param embedding: Embedding vector as python list
397
+ :param embeddings: Symbol embeddings
398
+ :param nns: Number of nearest neighbors
201
399
  """
202
400
  df = DataFrame(data=embeddings)
203
401
  np_embedding = array(embedding).reshape(1, -1)
204
- source_embeddings = vstack(df['embedding'].values)
402
+ source_embeddings = vstack(df["embedding"].values)
205
403
  closest = cosine_similarity(source_embeddings, np_embedding).squeeze().argsort()[::-1][:nns]
206
404
  distances = cosine_similarity(source_embeddings[closest], np_embedding)
405
+
207
406
  # match closest embeddings with similarity
208
407
  closest_df = df.iloc[closest]
408
+
209
409
  # create json similarity object
210
410
  similarities = list(zip(distances, closest_df.index.tolist()))
211
- json_sims = [{'similaritiy': float(d[0]), 'vaddr': int(df.iloc[v]['vaddr']), 'name': str(df.iloc[v]['name']), 'size': int(df.iloc[v]['size'])} for d, v in similarities]
411
+ json_sims = [{"similaritiy": float(d[0]), "vaddr": int(df.iloc[v]["vaddr"]), "name": str(df.iloc[v]["name"]),
412
+ "size": int(df.iloc[v]["size"])} for d, v in similarities]
212
413
  return json_sims
213
414
 
214
415
 
215
- def RE_nearest_symbols(embedding: list, model_name, nns: int = 5, collections : list = None):
416
+ def RE_nearest_symbols(embedding: list, model_name: str, nns: int = 5,
417
+ collections: list = None, ignore_hashes: list = None,
418
+ distance: float = 0.0, debug_enabled: bool = False) -> Response:
216
419
  """
217
- Get function name suggestions for an embedding
218
- :param embedding: embedding vector as python list
219
- :param nns: Number of nearest neighbors
220
- :param collections: str RegEx to search through RevEng.AI collections
420
+ Get function name suggestions for an embedding
421
+ :param embedding: Embedding vector as python list
422
+ :param model_name: Binary model name
423
+ :param nns: Number of nearest neighbors
424
+ :param collections: List of collections RevEng.AI collection names to search through
425
+ :param ignore_hashes: List[str] SHA-256 hash of binary file to ignore symbols from (usually the current binary)
426
+ :param distance: How close we want the ANN search to filter for
427
+ :param debug_enabled: ANN Symbol Search, only perform ANN on debug symbols if set
221
428
  """
222
- params={'nns': nns, 'model_name': model_name }
429
+ params = {"nns": nns, "model_name": model_name, "debug_enabled": debug_enabled}
430
+
431
+ if collections and len(collections) > 0:
432
+ # api param is collection, not collections
433
+ params["collection"] = "|".join(collections)
434
+
435
+ if ignore_hashes and len(ignore_hashes) > 0:
436
+ params["ignore_hashes"] = ignore_hashes
223
437
 
224
- if collections:
225
- params['collections'] = collections
438
+ if distance > 0.0:
439
+ params["distance"] = distance
226
440
 
227
441
  res = reveng_req(requests.post, "ann/symbol", data=json.dumps(embedding), params=params)
442
+
228
443
  res.raise_for_status()
229
- f_suggestions = res.json()
230
- print_json(data=f_suggestions)
444
+ return res
231
445
 
232
446
 
233
- def RE_nearest_binaries(embedding: list, model_name, nns: int = 5, collections : list = None):
447
+ def RE_nearest_binaries(embedding: list, model_name: str, nns: int = 5,
448
+ collections: list = None, ignore_hashes: list = None) -> Response:
234
449
  """
235
- Get executable suggestions for a binary embedding
236
- :param embedding: embedding vector as python list
237
- :param nns: Number of nearest neighbors
238
- :param collections: str RegEx to search through RevEng.AI collections
450
+ Get executable suggestions for a binary embedding
451
+ :param embedding: Embedding vector as python list
452
+ :param model_name: Binary model name
453
+ :param nns: Number of nearest neighbors
454
+ :param collections: List of collections RevEng.AI collection names to search through
455
+ :param ignore_hashes: List[str] SHA-256 hash of binary files to ignore symbols from (usually the current binary)
239
456
  """
240
- params={'nns': nns, 'model_name': model_name }
457
+ params = {"nns": nns, "model_name": model_name}
458
+
459
+ if collections and len(collections) > 0:
460
+ # api param is collection, not collections
461
+ params["collection"] = "|".join(collections)
241
462
 
242
- if collections:
243
- params['collections'] = collections
463
+ if ignore_hashes and len(ignore_hashes) > 0:
464
+ params["ignore_hashes"] = ignore_hashes
244
465
 
245
466
  res = reveng_req(requests.post, "ann/binary", data=json.dumps(embedding), params=params)
467
+
246
468
  res.raise_for_status()
247
- f_suggestions = res.json()
248
- print_json(data=f_suggestions)
469
+ return res
249
470
 
250
471
 
251
- def RE_SBOM(fpath: str, model_name: str):
472
+ def RE_SBOM(fpath: str, binary_id: int = 0) -> Response:
252
473
  """
253
- Get Software Bill Of Materials for binary
254
- :param fpath: File path for binaty to analyse
255
- :param model_name: str model name of RevEng.AI AI model
474
+ Get Software Bill Of Materials for binary
475
+ :param fpath: File path for binary to analyse
476
+ :param binary_id: ID of binary
256
477
  """
257
- params={'model_name': model_name }
478
+ bin_id = re_binary_id(fpath)
479
+ bid = re_bid_search(bin_id) if binary_id == 0 else binary_id
480
+
481
+ if bid == -1:
482
+ raise HTTPError(f"No matches found for hash: {bin_id}")
483
+
484
+ res = reveng_req(requests.get, f"sboms/{bid}")
485
+
486
+ logger.info("SBOM for %s:\n%s", fpath, res.text)
258
487
 
259
- res = reveng_req(requests.get, f"sboms/{binary_id(fpath)}", params=params)
260
488
  res.raise_for_status()
261
- sbom = res.json()
262
- print_json(data=sbom)
489
+ return res
263
490
 
264
491
 
265
- def binary_id(path: str):
266
- """Take the SHA-256 hash of binary file"""
492
+ def RE_functions_rename(function_id: int, new_name: str) -> Response:
493
+ """
494
+ Send the new name of a function to C2
495
+ :param function_id: ID of a function
496
+ :param new_name: New function name
497
+ """
498
+ res = reveng_req(requests.post, f"functions/rename/{function_id}", json_data={"new_name": new_name})
499
+
500
+ if res.status_code == 200:
501
+ logger.info("FunctionId %d has been renamed with '%s'.", function_id, new_name)
502
+ else:
503
+ logger.warning("Error, cannot rename FunctionId %d. %s", function_id, res.text)
504
+
505
+ res.raise_for_status()
506
+ return res
507
+
508
+
509
+ def re_binary_id(fpath: str) -> str:
510
+ """
511
+ Take the SHA-256 hash of binary file
512
+ :param fpath: File path for binary to analyse
513
+ """
514
+ if not fpath or not exists(fpath):
515
+ return "undefined"
516
+
267
517
  hf = sha256()
268
- with open(path, "rb") as f:
518
+
519
+ with open(fpath, "rb") as f:
269
520
  c = f.read()
270
521
  hf.update(c)
522
+
271
523
  return hf.hexdigest()
272
524
 
273
525
 
274
- def _binary_isa(lief_hdlr, exec_type):
526
+ def _binary_isa(lief_hdlr, exec_type: str) -> str:
275
527
  """
276
- Get executable file format
528
+ Get ISA format
277
529
  """
278
530
  if exec_type == "elf":
279
531
  machine_type = lief_hdlr.header.machine_type
280
- if machine_type == lief.ELF.ARCH.i386:
532
+
533
+ if machine_type == ELF.ARCH.i386:
281
534
  return "x86"
282
- elif machine_type == lief.ELF.ARCH.x86_64:
535
+ elif machine_type == ELF.ARCH.x86_64:
283
536
  return "x86_64"
284
-
285
537
  elif exec_type == "pe":
286
538
  machine_type = lief_hdlr.header.machine
287
- if machine_type == lief.PE.MACHINE_TYPES.I386:
539
+
540
+ if machine_type == PE.MACHINE_TYPES.I386:
288
541
  return "x86"
289
- elif machine_type == lief.PE.MACHINE_TYPES.AMD64:
542
+ elif machine_type == PE.MACHINE_TYPES.AMD64:
290
543
  return "x86_64"
291
-
292
544
  elif exec_type == "macho":
293
545
  machine_type = lief_hdlr.header.cpu_type
294
- if machine_type == lief.MachO.CPU_TYPES.x86:
546
+
547
+ if machine_type == MachO.CPU_TYPES.x86:
295
548
  return "x86"
296
- elif machine_type == lief.MachO.CPU_TYPES.x86_64:
549
+ elif machine_type == MachO.CPU_TYPES.x86_64:
297
550
  return "x86_64"
298
-
299
- raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}")
300
551
 
552
+ logger.error("Error, failed to determine or unsupported ISA for exec_type: %s.", exec_type)
553
+ raise RuntimeError(f"Error, failed to determine or unsupported ISA for exec_type:{exec_type}.")
301
554
 
302
- def _binary_format(lief_hdlr):
555
+
556
+ def _binary_format(lief_hdlr) -> str:
303
557
  """
304
- Get executable file format
558
+ Get executable file format
305
559
  """
306
560
  if lief_hdlr.format == lief_hdlr.format.PE:
307
561
  return "pe"
@@ -309,41 +563,42 @@ def _binary_format(lief_hdlr):
309
563
  return "elf"
310
564
  if lief_hdlr.format == lief_hdlr.format.MACHO:
311
565
  return "macho"
312
-
313
- raise RuntimeError("Error, could not determine binary format")
314
566
 
567
+ logger.error("Error, could not determine binary format: %s.", lief_hdlr.format)
568
+ raise RuntimeError("Error, could not determine binary format.")
315
569
 
316
570
 
317
- def file_type(fpath: str):
571
+ def file_type(fpath: str) -> tuple[str, str]:
318
572
  """
319
- Determine ISA for binary
573
+ Determine ISA for binary
574
+ :param fpath: File path for binary to analyse
320
575
  """
321
- binary = lief.parse(fpath)
576
+ binary = parse(fpath)
322
577
 
323
578
  # handle PE and ELF files
324
579
  file_format = _binary_format(binary)
325
- isa = _binary_isa(binary, file_format)
580
+ isa = _binary_isa(binary, file_format)
581
+
326
582
  return file_format, isa
327
583
 
328
584
 
329
- def parse_config():
585
+ def parse_config() -> None:
586
+ """
587
+ Parse ~/.reait.toml config file
330
588
  """
331
- Parse ~/.reait.toml config file
332
- """
333
- if not os.path.exists(os.path.expanduser("~/.reait.toml")):
334
- return
589
+ if exists(expanduser("~/.reait.toml")):
590
+ with open(expanduser("~/.reait.toml"), "r") as file:
591
+ config = tomli.loads(file.read())
335
592
 
336
- with open(os.path.expanduser("~/.reait.toml"), "r") as file:
337
- config = tomli.loads(file.read())
338
- for key in ('apikey', 'host'):
339
- if key in config:
340
- re_conf[key] = config[key]
593
+ for key in ("apikey", "host", "model"):
594
+ if key in config:
595
+ re_conf[key] = config[key]
341
596
 
342
597
 
343
- def angular_distance(x, y):
598
+ def angular_distance(x, y) -> float:
344
599
  """
345
600
  Compute angular distance between two embedding vectors
346
- Normalised euclidean distance
601
+ Normalised euclidian distance
347
602
  """
348
603
  cos = dot(x, y) / ((dot(x, x) * dot(y, y)) ** 0.5)
349
- return 1.0 - arccos(cos)/pi
604
+ return 1.0 - arccos(cos) / pi